ilo: clean up ilo_blitter_pipe_begin()
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate winsys tiling to hardware tiling.
43 */
44 int
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
46 {
47 switch (tiling) {
48 case INTEL_TILING_NONE:
49 return 0;
50 case INTEL_TILING_X:
51 return BRW_SURFACE_TILED;
52 case INTEL_TILING_Y:
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
54 default:
55 assert(!"unknown tiling");
56 return 0;
57 }
58 }
59
60 /**
61 * Translate a pipe primitive type to the matching hardware primitive type.
62 */
63 int
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
65 {
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
81 };
82
83 assert(prim_mapping[prim]);
84
85 return prim_mapping[prim];
86 }
87
88 /**
89 * Translate a pipe texture target to the matching hardware surface type.
90 */
91 int
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
93 {
94 switch (target) {
95 case PIPE_BUFFER:
96 return BRW_SURFACE_BUFFER;
97 case PIPE_TEXTURE_1D:
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
109 default:
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
112 }
113 }
114
115 /**
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
118 */
119 static int
120 gen6_translate_depth_format(enum pipe_format format)
121 {
122 switch (format) {
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
133 default:
134 return -1;
135 }
136 }
137
138 /**
139 * Translate a pipe logicop to the matching hardware logicop.
140 */
141 static int
142 gen6_translate_pipe_logicop(unsigned logicop)
143 {
144 switch (logicop) {
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
161 default:
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
164 }
165 }
166
167 /**
168 * Translate a pipe blend function to the matching hardware blend function.
169 */
170 static int
171 gen6_translate_pipe_blend(unsigned blend)
172 {
173 switch (blend) {
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
179 default:
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
182 };
183 }
184
185 /**
186 * Translate a pipe blend factor to the matching hardware blend factor.
187 */
188 static int
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
190 {
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
211 default:
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
214 };
215 }
216
217 /**
218 * Translate a pipe stencil op to the matching hardware stencil op.
219 */
220 static int
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
222 {
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
232 default:
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
235 }
236 }
237
238 /**
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
240 */
241 static int
242 gen6_translate_tex_mipfilter(unsigned filter)
243 {
244 switch (filter) {
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
248 default:
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
251 }
252 }
253
254 /**
255 * Translate a pipe texture filter to the matching hardware mapfilter.
256 */
257 static int
258 gen6_translate_tex_filter(unsigned filter)
259 {
260 switch (filter) {
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
263 default:
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
266 }
267 }
268
269 /**
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
271 * wrapping mode.
272 */
273 static int
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
275 {
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
280 }
281
282 switch (wrap) {
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
291 default:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
294 }
295 }
296
297 /**
298 * Translate a pipe DSA test function to the matching hardware compare
299 * function.
300 */
301 static int
302 gen6_translate_dsa_func(unsigned func)
303 {
304 switch (func) {
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
313 default:
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
316 }
317 }
318
319 /**
320 * Translate a pipe shadow compare function to the matching hardware shadow
321 * function.
322 */
323 static int
324 gen6_translate_shadow_func(unsigned func)
325 {
326 /*
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
329 *
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
332 */
333 switch (func) {
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
342 default:
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
345 }
346 }
347
348 /**
349 * Translate an index size to the matching hardware index format.
350 */
351 static int
352 gen6_translate_index_size(int size)
353 {
354 switch (size) {
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
358 default:
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
361 }
362 }
363
364 static void
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
379
380 ILO_GPE_VALID_GEN(dev, 6, 7);
381
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
385
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
388
389 ilo_cp_write_bo(cp, 1, general_state_bo,
390 INTEL_DOMAIN_RENDER,
391 0);
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
394 0);
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
397 0);
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
399 0,
400 0);
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
403 0);
404
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
407 INTEL_DOMAIN_RENDER,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 1);
413 }
414
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
423 }
424
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
427 0,
428 0);
429 }
430 else {
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
433 }
434
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
438 0);
439 }
440 else {
441 /* skip range check */
442 ilo_cp_write(cp, 1);
443 }
444
445 ilo_cp_end(cp);
446 }
447
448 static void
449 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
450 uint32_t sip,
451 struct ilo_cp *cp)
452 {
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
455
456 ILO_GPE_VALID_GEN(dev, 6, 7);
457
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
461 ilo_cp_end(cp);
462 }
463
464 static void
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
466 bool enable,
467 struct ilo_cp *cp)
468 {
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
471
472 ILO_GPE_VALID_GEN(dev, 6, 7);
473
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
476 ilo_cp_end(cp);
477 }
478
479 static void
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
481 int pipeline,
482 struct ilo_cp *cp)
483 {
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
486
487 ILO_GPE_VALID_GEN(dev, 6, 7);
488
489 /* 3D or media */
490 assert(pipeline == 0x0 || pipeline == 0x1);
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
494 ilo_cp_end(cp);
495 }
496
497 static void
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
499 int max_threads, int num_urb_entries,
500 int urb_entry_size,
501 struct ilo_cp *cp)
502 {
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
505 uint32_t dw2, dw4;
506
507 ILO_GPE_VALID_GEN(dev, 6, 6);
508
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
513
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
516
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
524 ilo_cp_write(cp, 0);
525 ilo_cp_write(cp, 0);
526 ilo_cp_end(cp);
527 }
528
529 static void
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
531 uint32_t buf, int size,
532 struct ilo_cp *cp)
533 {
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
536
537 ILO_GPE_VALID_GEN(dev, 6, 6);
538
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
548 ilo_cp_end(cp);
549 }
550
551 static void
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
553 uint32_t offset, int num_ids,
554 struct ilo_cp *cp)
555 {
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
558
559 ILO_GPE_VALID_GEN(dev, 6, 6);
560
561 assert(offset % 32 == 0);
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
569 ilo_cp_end(cp);
570 }
571
572 static void
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
574 int id, int byte, int thread_count,
575 struct ilo_cp *cp)
576 {
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
579 uint32_t dw1;
580
581 ILO_GPE_VALID_GEN(dev, 6, 6);
582
583 dw1 = id << 16 |
584 byte << 8 |
585 thread_count;
586
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
595 int thread_count_water_mark,
596 int barrier_mask,
597 struct ilo_cp *cp)
598 {
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
601 uint32_t dw1;
602
603 ILO_GPE_VALID_GEN(dev, 6, 6);
604
605 dw1 = thread_count_water_mark << 16 |
606 barrier_mask;
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
611 ilo_cp_end(cp);
612 }
613
614 static void
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
616 struct ilo_cp *cp)
617 {
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
619 }
620
621 static void
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
641 ilo_cp_end(cp);
642 }
643
644 static void
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
649 struct ilo_cp *cp)
650 {
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
653
654 ILO_GPE_VALID_GEN(dev, 6, 6);
655
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
664 ilo_cp_end(cp);
665 }
666
667 static void
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
671 struct ilo_cp *cp)
672 {
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
678
679 ILO_GPE_VALID_GEN(dev, 6, 6);
680
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
684
685 /* the valid range is [1, 5] */
686 if (!vs_alloc_size)
687 vs_alloc_size = 1;
688 if (!gs_alloc_size)
689 gs_alloc_size = 1;
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
691
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
697
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
702
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
709 ilo_cp_end(cp);
710 }
711
712 static void
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
714 const struct pipe_vertex_buffer *vbuffers,
715 uint64_t vbuffer_mask,
716 const struct ilo_ve_state *ve,
717 struct ilo_cp *cp)
718 {
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
720 uint8_t cmd_len;
721 unsigned hw_idx;
722
723 ILO_GPE_VALID_GEN(dev, 6, 7);
724
725 /*
726 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
727 *
728 * "From 1 to 33 VBs can be specified..."
729 */
730 assert(vbuffer_mask <= (1UL << 33));
731
732 if (!vbuffer_mask)
733 return;
734
735 cmd_len = 1;
736
737 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
738 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
739
740 if (vbuffer_mask & (1 << pipe_idx))
741 cmd_len += 4;
742 }
743
744 ilo_cp_begin(cp, cmd_len);
745 ilo_cp_write(cp, cmd | (cmd_len - 2));
746
747 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
748 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
749 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
750 const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
751 uint32_t dw;
752
753 if (!(vbuffer_mask & (1 << pipe_idx)))
754 continue;
755
756 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
757
758 if (instance_divisor)
759 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
760 else
761 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
762
763 if (dev->gen >= ILO_GEN(7))
764 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
765
766 /* use null vb if there is no buffer or the stride is out of range */
767 if (vb->buffer && vb->stride <= 2048) {
768 const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
769 const uint32_t start_offset = vb->buffer_offset;
770 /*
771 * As noted in ilo_translate_format(), we treat some 3-component
772 * formats as 4-component formats to work around hardware
773 * limitations. Imagine the case where the vertex buffer holds a
774 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
775 * The hardware would not be able to fetch it because the vertex
776 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
777 * and that takes at least 8 bytes.
778 *
779 * For the workaround to work, we query the physical size, which is
780 * page aligned, to calculate end_offset so that the last vertex has
781 * a better chance to be fetched.
782 */
783 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
784
785 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
786
787 ilo_cp_write(cp, dw);
788 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
789 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
790 ilo_cp_write(cp, instance_divisor);
791 }
792 else {
793 dw |= 1 << 13;
794
795 ilo_cp_write(cp, dw);
796 ilo_cp_write(cp, 0);
797 ilo_cp_write(cp, 0);
798 ilo_cp_write(cp, instance_divisor);
799 }
800 }
801
802 ilo_cp_end(cp);
803 }
804
805 static void
806 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
807 struct ilo_ve_cso *cso)
808 {
809 int format;
810
811 ILO_GPE_VALID_GEN(dev, 6, 7);
812
813 /*
814 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
815 *
816 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
817 * valid VERTEX_ELEMENT structure.
818 *
819 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
820 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
821 *
822 * - The Source Element Format must be set to the UINT format.
823 *
824 * - [DevSNB]: Edge Flags are not supported for QUADLIST
825 * primitives. Software may elect to convert QUADLIST primitives
826 * to some set of corresponding edge-flag-supported primitive
827 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
828 */
829
830 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
831 cso->payload[1] =
832 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
833 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
834 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
835 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
836
837 /*
838 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
839 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
840 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
841 *
842 * Since all the hardware cares about is whether the flags are zero or not,
843 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
844 */
845 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
846 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
847 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
848 BRW_SURFACEFORMAT_R32_FLOAT - 1);
849
850 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
851 }
852 else {
853 assert(format == BRW_SURFACEFORMAT_R8_UINT);
854 }
855 }
856
857 static void
858 ve_init_cso_with_components(const struct ilo_dev_info *dev,
859 int comp0, int comp1, int comp2, int comp3,
860 struct ilo_ve_cso *cso)
861 {
862 ILO_GPE_VALID_GEN(dev, 6, 7);
863
864 STATIC_ASSERT(Elements(cso->payload) >= 2);
865 cso->payload[0] = GEN6_VE0_VALID;
866 cso->payload[1] =
867 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
868 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
869 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
870 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
871 }
872
873 static void
874 ve_init_cso(const struct ilo_dev_info *dev,
875 const struct pipe_vertex_element *state,
876 unsigned vb_index,
877 struct ilo_ve_cso *cso)
878 {
879 int comp[4] = {
880 BRW_VE1_COMPONENT_STORE_SRC,
881 BRW_VE1_COMPONENT_STORE_SRC,
882 BRW_VE1_COMPONENT_STORE_SRC,
883 BRW_VE1_COMPONENT_STORE_SRC,
884 };
885 int format;
886
887 ILO_GPE_VALID_GEN(dev, 6, 7);
888
889 switch (util_format_get_nr_components(state->src_format)) {
890 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
891 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
892 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
893 BRW_VE1_COMPONENT_STORE_1_INT :
894 BRW_VE1_COMPONENT_STORE_1_FLT;
895 }
896
897 format = ilo_translate_vertex_format(state->src_format);
898
899 STATIC_ASSERT(Elements(cso->payload) >= 2);
900 cso->payload[0] =
901 vb_index << GEN6_VE0_INDEX_SHIFT |
902 GEN6_VE0_VALID |
903 format << BRW_VE0_FORMAT_SHIFT |
904 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
905
906 cso->payload[1] =
907 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
908 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
909 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
910 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
911 }
912
913 void
914 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
915 unsigned num_states,
916 const struct pipe_vertex_element *states,
917 struct ilo_ve_state *ve)
918 {
919 unsigned i;
920
921 ILO_GPE_VALID_GEN(dev, 6, 7);
922
923 ve->count = num_states;
924 ve->vb_count = 0;
925
926 for (i = 0; i < num_states; i++) {
927 const unsigned pipe_idx = states[i].vertex_buffer_index;
928 const unsigned instance_divisor = states[i].instance_divisor;
929 unsigned hw_idx;
930
931 /*
932 * map the pipe vb to the hardware vb, which has a fixed instance
933 * divisor
934 */
935 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
936 if (ve->vb_mapping[hw_idx] == pipe_idx &&
937 ve->instance_divisors[hw_idx] == instance_divisor)
938 break;
939 }
940
941 /* create one if there is no matching hardware vb */
942 if (hw_idx >= ve->vb_count) {
943 hw_idx = ve->vb_count++;
944
945 ve->vb_mapping[hw_idx] = pipe_idx;
946 ve->instance_divisors[hw_idx] = instance_divisor;
947 }
948
949 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
950 }
951 }
952
953 static void
954 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
955 const struct ilo_ve_state *ve,
956 bool last_velement_edgeflag,
957 bool prepend_generated_ids,
958 struct ilo_cp *cp)
959 {
960 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
961 uint8_t cmd_len;
962 unsigned i;
963
964 ILO_GPE_VALID_GEN(dev, 6, 7);
965
966 /*
967 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
968 *
969 * "Up to 34 (DevSNB+) vertex elements are supported."
970 */
971 assert(ve->count + prepend_generated_ids <= 34);
972
973 if (!ve->count && !prepend_generated_ids) {
974 struct ilo_ve_cso dummy;
975
976 ve_init_cso_with_components(dev,
977 BRW_VE1_COMPONENT_STORE_0,
978 BRW_VE1_COMPONENT_STORE_0,
979 BRW_VE1_COMPONENT_STORE_0,
980 BRW_VE1_COMPONENT_STORE_1_FLT,
981 &dummy);
982
983 cmd_len = 3;
984 ilo_cp_begin(cp, cmd_len);
985 ilo_cp_write(cp, cmd | (cmd_len - 2));
986 ilo_cp_write_multi(cp, dummy.payload, 2);
987 ilo_cp_end(cp);
988
989 return;
990 }
991
992 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
993
994 ilo_cp_begin(cp, cmd_len);
995 ilo_cp_write(cp, cmd | (cmd_len - 2));
996
997 if (prepend_generated_ids) {
998 struct ilo_ve_cso gen_ids;
999
1000 ve_init_cso_with_components(dev,
1001 BRW_VE1_COMPONENT_STORE_VID,
1002 BRW_VE1_COMPONENT_STORE_IID,
1003 BRW_VE1_COMPONENT_NOSTORE,
1004 BRW_VE1_COMPONENT_NOSTORE,
1005 &gen_ids);
1006
1007 ilo_cp_write_multi(cp, gen_ids.payload, 2);
1008 }
1009
1010 if (last_velement_edgeflag) {
1011 struct ilo_ve_cso edgeflag;
1012
1013 for (i = 0; i < ve->count - 1; i++)
1014 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1015
1016 edgeflag = ve->cso[i];
1017 ve_set_cso_edgeflag(dev, &edgeflag);
1018 ilo_cp_write_multi(cp, edgeflag.payload, 2);
1019 }
1020 else {
1021 for (i = 0; i < ve->count; i++)
1022 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1023 }
1024
1025 ilo_cp_end(cp);
1026 }
1027
1028 static void
1029 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
1030 const struct ilo_ib_state *ib,
1031 bool enable_cut_index,
1032 struct ilo_cp *cp)
1033 {
1034 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
1035 const uint8_t cmd_len = 3;
1036 const struct ilo_buffer *buf = ilo_buffer(ib->resource);
1037 uint32_t start_offset, end_offset;
1038 int format;
1039
1040 ILO_GPE_VALID_GEN(dev, 6, 7);
1041
1042 if (!buf)
1043 return;
1044
1045 format = gen6_translate_index_size(ib->state.index_size);
1046
1047 /*
1048 * set start_offset to 0 here and adjust pipe_draw_info::start with
1049 * ib->draw_start_offset in 3DPRIMITIVE
1050 */
1051 start_offset = 0;
1052 end_offset = buf->bo_size;
1053
1054 /* end_offset must also be aligned and is inclusive */
1055 end_offset -= (end_offset % ib->state.index_size);
1056 end_offset--;
1057
1058 ilo_cp_begin(cp, cmd_len);
1059 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1060 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
1061 format << 8);
1062 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1063 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1064 ilo_cp_end(cp);
1065 }
1066
1067 static void
1068 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
1069 uint32_t clip_viewport,
1070 uint32_t sf_viewport,
1071 uint32_t cc_viewport,
1072 struct ilo_cp *cp)
1073 {
1074 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
1075 const uint8_t cmd_len = 4;
1076
1077 ILO_GPE_VALID_GEN(dev, 6, 6);
1078
1079 ilo_cp_begin(cp, cmd_len);
1080 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1081 GEN6_CLIP_VIEWPORT_MODIFY |
1082 GEN6_SF_VIEWPORT_MODIFY |
1083 GEN6_CC_VIEWPORT_MODIFY);
1084 ilo_cp_write(cp, clip_viewport);
1085 ilo_cp_write(cp, sf_viewport);
1086 ilo_cp_write(cp, cc_viewport);
1087 ilo_cp_end(cp);
1088 }
1089
1090 static void
1091 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
1092 uint32_t blend_state,
1093 uint32_t depth_stencil_state,
1094 uint32_t color_calc_state,
1095 struct ilo_cp *cp)
1096 {
1097 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1098 const uint8_t cmd_len = 4;
1099
1100 ILO_GPE_VALID_GEN(dev, 6, 6);
1101
1102 ilo_cp_begin(cp, cmd_len);
1103 ilo_cp_write(cp, cmd | (cmd_len - 2));
1104 ilo_cp_write(cp, blend_state | 1);
1105 ilo_cp_write(cp, depth_stencil_state | 1);
1106 ilo_cp_write(cp, color_calc_state | 1);
1107 ilo_cp_end(cp);
1108 }
1109
1110 static void
1111 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1112 uint32_t scissor_rect,
1113 struct ilo_cp *cp)
1114 {
1115 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1116 const uint8_t cmd_len = 2;
1117
1118 ILO_GPE_VALID_GEN(dev, 6, 7);
1119
1120 ilo_cp_begin(cp, cmd_len);
1121 ilo_cp_write(cp, cmd | (cmd_len - 2));
1122 ilo_cp_write(cp, scissor_rect);
1123 ilo_cp_end(cp);
1124 }
1125
1126 void
1127 ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
1128 const struct ilo_shader_state *vs,
1129 struct ilo_shader_cso *cso)
1130 {
1131 int start_grf, vue_read_len, max_threads;
1132 uint32_t dw2, dw4, dw5;
1133
1134 ILO_GPE_VALID_GEN(dev, 6, 7);
1135
1136 start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
1137 vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
1138
1139 /*
1140 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1141 *
1142 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1143 * 128-bit vertex elements to be passed into the payload for each
1144 * vertex."
1145 *
1146 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1147 * data to be read and passed to the thread."
1148 */
1149 vue_read_len = (vue_read_len + 1) / 2;
1150 if (!vue_read_len)
1151 vue_read_len = 1;
1152
1153 switch (dev->gen) {
1154 case ILO_GEN(6):
1155 /*
1156 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1157 *
1158 * "Device # of EUs #Threads/EU
1159 * SNB GT2 12 5
1160 * SNB GT1 6 4"
1161 */
1162 max_threads = (dev->gt == 2) ? 60 : 24;
1163 break;
1164 case ILO_GEN(7):
1165 /*
1166 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1167 *
1168 * "Device # of EUs #Threads/EU
1169 * Ivy Bridge (GT2) 16 8
1170 * Ivy Bridge (GT1) 6 6"
1171 */
1172 max_threads = (dev->gt == 2) ? 128 : 36;
1173 break;
1174 case ILO_GEN(7.5):
1175 /* see brwCreateContext() */
1176 max_threads = (dev->gt == 2) ? 280 : 70;
1177 break;
1178 default:
1179 max_threads = 1;
1180 break;
1181 }
1182
1183 dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
1184
1185 dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1186 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1187 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1188
1189 dw5 = GEN6_VS_STATISTICS_ENABLE |
1190 GEN6_VS_ENABLE;
1191
1192 if (dev->gen >= ILO_GEN(7.5))
1193 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1194 else
1195 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1196
1197 STATIC_ASSERT(Elements(cso->payload) >= 3);
1198 cso->payload[0] = dw2;
1199 cso->payload[1] = dw4;
1200 cso->payload[2] = dw5;
1201 }
1202
1203 static void
1204 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1205 const struct ilo_shader_state *vs,
1206 int num_samplers,
1207 struct ilo_cp *cp)
1208 {
1209 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1210 const uint8_t cmd_len = 6;
1211 const struct ilo_shader_cso *cso;
1212 uint32_t dw2, dw4, dw5;
1213
1214 ILO_GPE_VALID_GEN(dev, 6, 7);
1215
1216 if (!vs) {
1217 ilo_cp_begin(cp, cmd_len);
1218 ilo_cp_write(cp, cmd | (cmd_len - 2));
1219 ilo_cp_write(cp, 0);
1220 ilo_cp_write(cp, 0);
1221 ilo_cp_write(cp, 0);
1222 ilo_cp_write(cp, 0);
1223 ilo_cp_write(cp, 0);
1224 ilo_cp_end(cp);
1225 return;
1226 }
1227
1228 cso = ilo_shader_get_kernel_cso(vs);
1229 dw2 = cso->payload[0];
1230 dw4 = cso->payload[1];
1231 dw5 = cso->payload[2];
1232
1233 dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1234
1235 ilo_cp_begin(cp, cmd_len);
1236 ilo_cp_write(cp, cmd | (cmd_len - 2));
1237 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1238 ilo_cp_write(cp, dw2);
1239 ilo_cp_write(cp, 0); /* scratch */
1240 ilo_cp_write(cp, dw4);
1241 ilo_cp_write(cp, dw5);
1242 ilo_cp_end(cp);
1243 }
1244
1245 void
1246 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
1247 const struct ilo_shader_state *gs,
1248 struct ilo_shader_cso *cso)
1249 {
1250 int start_grf, vue_read_len, max_threads;
1251 uint32_t dw2, dw4, dw5, dw6;
1252
1253 ILO_GPE_VALID_GEN(dev, 6, 6);
1254
1255 if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
1256 start_grf = ilo_shader_get_kernel_param(gs,
1257 ILO_KERNEL_URB_DATA_START_REG);
1258
1259 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
1260 }
1261 else {
1262 start_grf = ilo_shader_get_kernel_param(gs,
1263 ILO_KERNEL_VS_GEN6_SO_START_REG);
1264
1265 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
1266 }
1267
1268 /*
1269 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1270 *
1271 * "Specifies the amount of URB data read and passed in the thread
1272 * payload for each Vertex URB entry, in 256-bit register increments.
1273 *
1274 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1275 * 0 indicating no Vertex URB data to be read and passed to the
1276 * thread."
1277 */
1278 vue_read_len = (vue_read_len + 1) / 2;
1279 if (!vue_read_len)
1280 vue_read_len = 1;
1281
1282 /*
1283 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1284 *
1285 * "Maximum Number of Threads valid range is [0,27] when Rendering
1286 * Enabled bit is set."
1287 *
1288 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1289 *
1290 * "Programming Note: If the GS stage is enabled, software must always
1291 * allocate at least one GS URB Entry. This is true even if the GS
1292 * thread never needs to output vertices to the pipeline, e.g., when
1293 * only performing stream output. This is an artifact of the need to
1294 * pass the GS thread an initial destination URB handle."
1295 *
1296 * As such, we always enable rendering, and limit the number of threads.
1297 */
1298 if (dev->gt == 2) {
1299 /* maximum is 60, but limited to 28 */
1300 max_threads = 28;
1301 }
1302 else {
1303 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1304 max_threads = 21;
1305 }
1306
1307 dw2 = GEN6_GS_SPF_MODE;
1308
1309 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1310 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1311 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1312
1313 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1314 GEN6_GS_STATISTICS_ENABLE |
1315 GEN6_GS_SO_STATISTICS_ENABLE |
1316 GEN6_GS_RENDERING_ENABLE;
1317
1318 /*
1319 * we cannot make use of GEN6_GS_REORDER because it will reorder
1320 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1321 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1322 * (2N+2, 2N+1, 2N+3)).
1323 */
1324 dw6 = GEN6_GS_ENABLE;
1325
1326 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
1327 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1328
1329 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
1330 const uint32_t svbi_post_inc =
1331 ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
1332
1333 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1334 if (svbi_post_inc) {
1335 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1336 svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1337 }
1338 }
1339
1340 STATIC_ASSERT(Elements(cso->payload) >= 4);
1341 cso->payload[0] = dw2;
1342 cso->payload[1] = dw4;
1343 cso->payload[2] = dw5;
1344 cso->payload[3] = dw6;
1345 }
1346
1347 static void
1348 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1349 const struct ilo_shader_state *gs,
1350 const struct ilo_shader_state *vs,
1351 int verts_per_prim,
1352 struct ilo_cp *cp)
1353 {
1354 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1355 const uint8_t cmd_len = 7;
1356 uint32_t dw1, dw2, dw4, dw5, dw6;
1357
1358 ILO_GPE_VALID_GEN(dev, 6, 6);
1359
1360 if (gs) {
1361 const struct ilo_shader_cso *cso;
1362
1363 dw1 = ilo_shader_get_kernel_offset(gs);
1364
1365 cso = ilo_shader_get_kernel_cso(gs);
1366 dw2 = cso->payload[0];
1367 dw4 = cso->payload[1];
1368 dw5 = cso->payload[2];
1369 dw6 = cso->payload[3];
1370 }
1371 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1372 struct ilo_shader_cso cso;
1373 enum ilo_kernel_param param;
1374
1375 switch (verts_per_prim) {
1376 case 1:
1377 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1378 break;
1379 case 2:
1380 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1381 break;
1382 default:
1383 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1384 break;
1385 }
1386
1387 dw1 = ilo_shader_get_kernel_offset(vs) +
1388 ilo_shader_get_kernel_param(vs, param);
1389
1390 /* cannot use VS's CSO */
1391 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1392 dw2 = cso.payload[0];
1393 dw4 = cso.payload[1];
1394 dw5 = cso.payload[2];
1395 dw6 = cso.payload[3];
1396 }
1397 else {
1398 dw1 = 0;
1399 dw2 = 0;
1400 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1401 dw5 = GEN6_GS_STATISTICS_ENABLE;
1402 dw6 = 0;
1403 }
1404
1405 ilo_cp_begin(cp, cmd_len);
1406 ilo_cp_write(cp, cmd | (cmd_len - 2));
1407 ilo_cp_write(cp, dw1);
1408 ilo_cp_write(cp, dw2);
1409 ilo_cp_write(cp, 0);
1410 ilo_cp_write(cp, dw4);
1411 ilo_cp_write(cp, dw5);
1412 ilo_cp_write(cp, dw6);
1413 ilo_cp_end(cp);
1414 }
1415
1416 void
1417 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
1418 const struct pipe_rasterizer_state *state,
1419 struct ilo_rasterizer_clip *clip)
1420 {
1421 uint32_t dw1, dw2, dw3;
1422
1423 ILO_GPE_VALID_GEN(dev, 6, 7);
1424
1425 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1426
1427 if (dev->gen >= ILO_GEN(7)) {
1428 /*
1429 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1430 *
1431 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1432 * enabled only for the cases where the incoming primitive topology
1433 * into the clipper guaranteed to be Trilist."
1434 *
1435 * What does this mean?
1436 */
1437 dw1 |= 0 << 19 |
1438 GEN7_CLIP_EARLY_CULL;
1439
1440 if (state->front_ccw)
1441 dw1 |= GEN7_CLIP_WINDING_CCW;
1442
1443 switch (state->cull_face) {
1444 case PIPE_FACE_NONE:
1445 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1446 break;
1447 case PIPE_FACE_FRONT:
1448 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1449 break;
1450 case PIPE_FACE_BACK:
1451 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1452 break;
1453 case PIPE_FACE_FRONT_AND_BACK:
1454 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1455 break;
1456 }
1457 }
1458
1459 dw2 = GEN6_CLIP_ENABLE |
1460 GEN6_CLIP_XY_TEST |
1461 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1462 GEN6_CLIP_MODE_NORMAL;
1463
1464 if (state->clip_halfz)
1465 dw2 |= GEN6_CLIP_API_D3D;
1466 else
1467 dw2 |= GEN6_CLIP_API_OGL;
1468
1469 if (state->depth_clip)
1470 dw2 |= GEN6_CLIP_Z_TEST;
1471
1472 if (state->flatshade_first) {
1473 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1474 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1475 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1476 }
1477 else {
1478 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1479 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1480 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1481 }
1482
1483 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1484 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
1485
1486 clip->payload[0] = dw1;
1487 clip->payload[1] = dw2;
1488 clip->payload[2] = dw3;
1489
1490 clip->can_enable_guardband = true;
1491
1492 /*
1493 * There are several reasons that guard band test should be disabled
1494 *
1495 * - GL wide points (to avoid partially visibie object)
1496 * - GL wide or AA lines (to avoid partially visibie object)
1497 */
1498 if (state->point_size_per_vertex || state->point_size > 1.0f)
1499 clip->can_enable_guardband = false;
1500 if (state->line_smooth || state->line_width > 1.0f)
1501 clip->can_enable_guardband = false;
1502 }
1503
1504 static void
1505 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1506 const struct ilo_rasterizer_state *rasterizer,
1507 const struct ilo_shader_state *fs,
1508 bool enable_guardband,
1509 int num_viewports,
1510 struct ilo_cp *cp)
1511 {
1512 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1513 const uint8_t cmd_len = 4;
1514 uint32_t dw1, dw2, dw3;
1515
1516 if (rasterizer) {
1517 int interps;
1518
1519 dw1 = rasterizer->clip.payload[0];
1520 dw2 = rasterizer->clip.payload[1];
1521 dw3 = rasterizer->clip.payload[2];
1522
1523 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1524 dw2 |= GEN6_CLIP_GB_TEST;
1525
1526 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1527 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1528
1529 if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1530 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1531 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
1532 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1533
1534 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1535 (num_viewports - 1);
1536 }
1537 else {
1538 dw1 = 0;
1539 dw2 = 0;
1540 dw3 = 0;
1541 }
1542
1543 ilo_cp_begin(cp, cmd_len);
1544 ilo_cp_write(cp, cmd | (cmd_len - 2));
1545 ilo_cp_write(cp, dw1);
1546 ilo_cp_write(cp, dw2);
1547 ilo_cp_write(cp, dw3);
1548 ilo_cp_end(cp);
1549 }
1550
1551 void
1552 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
1553 const struct pipe_rasterizer_state *state,
1554 struct ilo_rasterizer_sf *sf)
1555 {
1556 float offset_const, offset_scale, offset_clamp;
1557 int line_width, point_width;
1558 uint32_t dw1, dw2, dw3;
1559
1560 ILO_GPE_VALID_GEN(dev, 6, 7);
1561
1562 /*
1563 * Scale the constant term. The minimum representable value used by the HW
1564 * is not large enouch to be the minimum resolvable difference.
1565 */
1566 offset_const = state->offset_units * 2.0f;
1567
1568 offset_scale = state->offset_scale;
1569 offset_clamp = state->offset_clamp;
1570
1571 /*
1572 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1573 *
1574 * "This bit (Statistics Enable) should be set whenever clipping is
1575 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1576 * should be cleared if clipping is disabled or Statistics Enable in
1577 * CLIP_STATE is clear."
1578 */
1579 dw1 = GEN6_SF_STATISTICS_ENABLE |
1580 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1581
1582 /* XXX GEN6 path seems to work fine for GEN7 */
1583 if (false && dev->gen >= ILO_GEN(7)) {
1584 /*
1585 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1586 *
1587 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1588 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1589 * Depth Offset Enable Point) should be set whenever non zero depth
1590 * bias (Slope, Bias) values are used. Setting this bit may have
1591 * some degradation of performance for some workloads."
1592 */
1593 if (state->offset_tri || state->offset_line || state->offset_point) {
1594 /* XXX need to scale offset_const according to the depth format */
1595 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1596
1597 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1598 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1599 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1600 }
1601 else {
1602 offset_const = 0.0f;
1603 offset_scale = 0.0f;
1604 offset_clamp = 0.0f;
1605 }
1606 }
1607 else {
1608 if (state->offset_tri)
1609 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1610 if (state->offset_line)
1611 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1612 if (state->offset_point)
1613 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1614 }
1615
1616 switch (state->fill_front) {
1617 case PIPE_POLYGON_MODE_FILL:
1618 dw1 |= GEN6_SF_FRONT_SOLID;
1619 break;
1620 case PIPE_POLYGON_MODE_LINE:
1621 dw1 |= GEN6_SF_FRONT_WIREFRAME;
1622 break;
1623 case PIPE_POLYGON_MODE_POINT:
1624 dw1 |= GEN6_SF_FRONT_POINT;
1625 break;
1626 }
1627
1628 switch (state->fill_back) {
1629 case PIPE_POLYGON_MODE_FILL:
1630 dw1 |= GEN6_SF_BACK_SOLID;
1631 break;
1632 case PIPE_POLYGON_MODE_LINE:
1633 dw1 |= GEN6_SF_BACK_WIREFRAME;
1634 break;
1635 case PIPE_POLYGON_MODE_POINT:
1636 dw1 |= GEN6_SF_BACK_POINT;
1637 break;
1638 }
1639
1640 if (state->front_ccw)
1641 dw1 |= GEN6_SF_WINDING_CCW;
1642
1643 dw2 = 0;
1644
1645 if (state->line_smooth) {
1646 /*
1647 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1648 *
1649 * "This field (Anti-aliasing Enable) must be disabled if any of the
1650 * render targets have integer (UINT or SINT) surface format."
1651 *
1652 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1653 *
1654 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1655 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1656 *
1657 * TODO We do not check those yet.
1658 */
1659 dw2 |= GEN6_SF_LINE_AA_ENABLE |
1660 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1661 }
1662
1663 switch (state->cull_face) {
1664 case PIPE_FACE_NONE:
1665 dw2 |= GEN6_SF_CULL_NONE;
1666 break;
1667 case PIPE_FACE_FRONT:
1668 dw2 |= GEN6_SF_CULL_FRONT;
1669 break;
1670 case PIPE_FACE_BACK:
1671 dw2 |= GEN6_SF_CULL_BACK;
1672 break;
1673 case PIPE_FACE_FRONT_AND_BACK:
1674 dw2 |= GEN6_SF_CULL_BOTH;
1675 break;
1676 }
1677
1678 /*
1679 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1680 * pixels in the minor direction. We have to make the lines slightly
1681 * thicker, 0.5 pixel on both sides, so that they intersect that many
1682 * pixels are considered into the lines.
1683 *
1684 * Line width is in U3.7.
1685 */
1686 line_width = (int) ((state->line_width +
1687 (float) state->line_smooth) * 128.0f + 0.5f);
1688 line_width = CLAMP(line_width, 0, 1023);
1689
1690 if (line_width == 128 && !state->line_smooth) {
1691 /* use GIQ rules */
1692 line_width = 0;
1693 }
1694
1695 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1696
1697 if (state->scissor)
1698 dw2 |= GEN6_SF_SCISSOR_ENABLE;
1699
1700 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
1701 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1702
1703 if (state->line_last_pixel)
1704 dw3 |= 1 << 31;
1705
1706 if (state->flatshade_first) {
1707 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1708 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1709 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1710 }
1711 else {
1712 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1713 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1714 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1715 }
1716
1717 if (!state->point_size_per_vertex)
1718 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
1719
1720 /* in U8.3 */
1721 point_width = (int) (state->point_size * 8.0f + 0.5f);
1722 point_width = CLAMP(point_width, 1, 2047);
1723
1724 dw3 |= point_width;
1725
1726 STATIC_ASSERT(Elements(sf->payload) >= 6);
1727 sf->payload[0] = dw1;
1728 sf->payload[1] = dw2;
1729 sf->payload[2] = dw3;
1730 sf->payload[3] = fui(offset_const);
1731 sf->payload[4] = fui(offset_scale);
1732 sf->payload[5] = fui(offset_clamp);
1733
1734 if (state->multisample) {
1735 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
1736
1737 /*
1738 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1739 *
1740 * "Software must not program a value of 0.0 when running in
1741 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
1742 * when multisampling rasterization is enabled."
1743 */
1744 if (!line_width) {
1745 line_width = 128; /* 1.0f */
1746
1747 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1748 }
1749 }
1750 else {
1751 sf->dw_msaa = 0;
1752 }
1753 }
1754
1755 /**
1756 * Fill in DW2 to DW7 of 3DSTATE_SF.
1757 */
1758 void
1759 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
1760 const struct ilo_rasterizer_state *rasterizer,
1761 int num_samples,
1762 enum pipe_format depth_format,
1763 uint32_t *payload, unsigned payload_len)
1764 {
1765 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
1766
1767 assert(payload_len == Elements(sf->payload));
1768
1769 if (sf) {
1770 memcpy(payload, sf->payload, sizeof(sf->payload));
1771
1772 if (num_samples > 1)
1773 payload[1] |= sf->dw_msaa;
1774
1775 if (dev->gen >= ILO_GEN(7)) {
1776 int format;
1777
1778 /* separate stencil */
1779 switch (depth_format) {
1780 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1781 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1782 break;
1783 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1784 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1785 break;
1786 case PIPE_FORMAT_S8_UINT:
1787 depth_format = PIPE_FORMAT_NONE;
1788 break;
1789 default:
1790 break;
1791 }
1792
1793 format = gen6_translate_depth_format(depth_format);
1794 /* FLOAT surface is assumed when there is no depth buffer */
1795 if (format < 0)
1796 format = BRW_DEPTHFORMAT_D32_FLOAT;
1797
1798 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1799 }
1800 }
1801 else {
1802 payload[0] = 0;
1803 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1804 payload[2] = 0;
1805 payload[3] = 0;
1806 payload[4] = 0;
1807 payload[5] = 0;
1808 }
1809 }
1810
1811 /**
1812 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1813 */
1814 void
1815 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
1816 const struct ilo_rasterizer_state *rasterizer,
1817 const struct ilo_shader_state *fs,
1818 const struct ilo_shader_state *last_sh,
1819 uint32_t *dw, int num_dwords)
1820 {
1821 int output_count, vue_offset, vue_len;
1822 const struct ilo_kernel_routing *routing;
1823
1824 ILO_GPE_VALID_GEN(dev, 6, 7);
1825 assert(num_dwords == 13);
1826
1827 if (!fs) {
1828 memset(dw, 0, sizeof(dw[0]) * num_dwords);
1829
1830 if (dev->gen >= ILO_GEN(7))
1831 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1832 else
1833 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1834
1835 return;
1836 }
1837
1838 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
1839 assert(output_count <= 32);
1840
1841 routing = ilo_shader_get_kernel_routing(fs);
1842
1843 vue_offset = routing->source_skip;
1844 assert(vue_offset % 2 == 0);
1845 vue_offset /= 2;
1846
1847 vue_len = (routing->source_len + 1) / 2;
1848 if (!vue_len)
1849 vue_len = 1;
1850
1851 if (dev->gen >= ILO_GEN(7)) {
1852 dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1853 vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1854 vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1855 if (routing->swizzle_enable)
1856 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1857 }
1858 else {
1859 dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1860 vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1861 vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1862 if (routing->swizzle_enable)
1863 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1864 }
1865
1866 switch (rasterizer->state.sprite_coord_mode) {
1867 case PIPE_SPRITE_COORD_UPPER_LEFT:
1868 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1869 break;
1870 case PIPE_SPRITE_COORD_LOWER_LEFT:
1871 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1872 break;
1873 }
1874
1875 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
1876 memcpy(&dw[1], routing->swizzles, 2 * 16);
1877
1878 /*
1879 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1880 *
1881 * "This field (Point Sprite Texture Coordinate Enable) must be
1882 * programmed to 0 when non-point primitives are rendered."
1883 *
1884 * TODO We do not check that yet.
1885 */
1886 dw[9] = routing->point_sprite_enable;
1887
1888 dw[10] = routing->const_interp_enable;
1889
1890 /* WrapShortest enables */
1891 dw[11] = 0;
1892 dw[12] = 0;
1893 }
1894
1895 static void
1896 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1897 const struct ilo_rasterizer_state *rasterizer,
1898 const struct ilo_shader_state *fs,
1899 const struct ilo_shader_state *last_sh,
1900 struct ilo_cp *cp)
1901 {
1902 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1903 const uint8_t cmd_len = 20;
1904 uint32_t payload_raster[6], payload_sbe[13];
1905
1906 ILO_GPE_VALID_GEN(dev, 6, 6);
1907
1908 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1909 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1910 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1911 fs, last_sh, payload_sbe, Elements(payload_sbe));
1912
1913 ilo_cp_begin(cp, cmd_len);
1914 ilo_cp_write(cp, cmd | (cmd_len - 2));
1915 ilo_cp_write(cp, payload_sbe[0]);
1916 ilo_cp_write_multi(cp, payload_raster, 6);
1917 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1918 ilo_cp_end(cp);
1919 }
1920
1921 void
1922 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
1923 const struct pipe_rasterizer_state *state,
1924 struct ilo_rasterizer_wm *wm)
1925 {
1926 uint32_t dw5, dw6;
1927
1928 ILO_GPE_VALID_GEN(dev, 6, 6);
1929
1930 /* only the FF unit states are set, as in GEN7 */
1931
1932 dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
1933
1934 /* same value as in 3DSTATE_SF */
1935 if (state->line_smooth)
1936 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
1937
1938 if (state->poly_stipple_enable)
1939 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
1940 if (state->line_stipple_enable)
1941 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
1942
1943 dw6 = GEN6_WM_POSITION_ZW_PIXEL |
1944 GEN6_WM_MSRAST_OFF_PIXEL |
1945 GEN6_WM_MSDISPMODE_PERSAMPLE;
1946
1947 if (state->bottom_edge_rule)
1948 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
1949
1950 /*
1951 * assertion that makes sure
1952 *
1953 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
1954 *
1955 * is valid
1956 */
1957 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
1958 GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
1959
1960 wm->dw_msaa_rast =
1961 (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
1962 wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
1963
1964 STATIC_ASSERT(Elements(wm->payload) >= 2);
1965 wm->payload[0] = dw5;
1966 wm->payload[1] = dw6;
1967 }
1968
1969 void
1970 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
1971 const struct ilo_shader_state *fs,
1972 struct ilo_shader_cso *cso)
1973 {
1974 int start_grf, input_count, interps, max_threads;
1975 uint32_t dw2, dw4, dw5, dw6;
1976
1977 ILO_GPE_VALID_GEN(dev, 6, 6);
1978
1979 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
1980 input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
1981 interps = ilo_shader_get_kernel_param(fs,
1982 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
1983
1984 /* see brwCreateContext() */
1985 max_threads = (dev->gt == 2) ? 80 : 40;
1986
1987 dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
1988
1989 dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
1990 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
1991 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
1992
1993 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
1994
1995 /*
1996 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1997 *
1998 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1999 * PS kernel or color calculator has the ability to kill (discard)
2000 * pixels or samples, other than due to depth or stencil testing.
2001 * This bit is required to be ENABLED in the following situations:
2002 *
2003 * The API pixel shader program contains "killpix" or "discard"
2004 * instructions, or other code in the pixel shader kernel that can
2005 * cause the final pixel mask to differ from the pixel mask received
2006 * on dispatch.
2007 *
2008 * A sampler with chroma key enabled with kill pixel mode is used by
2009 * the pixel shader.
2010 *
2011 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
2012 * enabled.
2013 *
2014 * The pixel shader kernel generates and outputs oMask.
2015 *
2016 * Note: As ClipDistance clipping is fully supported in hardware and
2017 * therefore not via PS instructions, there should be no need to
2018 * ENABLE this bit due to ClipDistance clipping."
2019 */
2020 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
2021 dw5 |= GEN6_WM_KILL_ENABLE;
2022
2023 /*
2024 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2025 *
2026 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
2027 * field must be set to disabled."
2028 *
2029 * TODO This is not checked yet.
2030 */
2031 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
2032 dw5 |= GEN6_WM_COMPUTED_DEPTH;
2033
2034 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
2035 dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
2036
2037 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
2038 dw5 |= GEN6_WM_USES_SOURCE_W;
2039
2040 /*
2041 * TODO set this bit only when
2042 *
2043 * a) fs writes colors and color is not masked, or
2044 * b) fs writes depth, or
2045 * c) fs or cc kills
2046 */
2047 if (true)
2048 dw5 |= GEN6_WM_DISPATCH_ENABLE;
2049
2050 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
2051 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
2052
2053 dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
2054 GEN6_WM_POSOFFSET_NONE |
2055 interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
2056
2057 STATIC_ASSERT(Elements(cso->payload) >= 4);
2058 cso->payload[0] = dw2;
2059 cso->payload[1] = dw4;
2060 cso->payload[2] = dw5;
2061 cso->payload[3] = dw6;
2062 }
2063
2064 static void
2065 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
2066 const struct ilo_shader_state *fs,
2067 int num_samplers,
2068 const struct ilo_rasterizer_state *rasterizer,
2069 bool dual_blend, bool cc_may_kill,
2070 struct ilo_cp *cp)
2071 {
2072 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
2073 const uint8_t cmd_len = 9;
2074 const int num_samples = 1;
2075 const struct ilo_shader_cso *fs_cso;
2076 uint32_t dw2, dw4, dw5, dw6;
2077
2078 ILO_GPE_VALID_GEN(dev, 6, 6);
2079
2080 if (!fs) {
2081 /* see brwCreateContext() */
2082 const int max_threads = (dev->gt == 2) ? 80 : 40;
2083
2084 ilo_cp_begin(cp, cmd_len);
2085 ilo_cp_write(cp, cmd | (cmd_len - 2));
2086 ilo_cp_write(cp, 0);
2087 ilo_cp_write(cp, 0);
2088 ilo_cp_write(cp, 0);
2089 ilo_cp_write(cp, 0);
2090 /* honor the valid range even if dispatching is disabled */
2091 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
2092 ilo_cp_write(cp, 0);
2093 ilo_cp_write(cp, 0);
2094 ilo_cp_write(cp, 0);
2095 ilo_cp_end(cp);
2096
2097 return;
2098 }
2099
2100 fs_cso = ilo_shader_get_kernel_cso(fs);
2101 dw2 = fs_cso->payload[0];
2102 dw4 = fs_cso->payload[1];
2103 dw5 = fs_cso->payload[2];
2104 dw6 = fs_cso->payload[3];
2105
2106 dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
2107
2108 if (true) {
2109 dw4 |= GEN6_WM_STATISTICS_ENABLE;
2110 }
2111 else {
2112 /*
2113 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
2114 *
2115 * "This bit (Statistics Enable) must be disabled if either of these
2116 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
2117 * Resolve Enable or Depth Buffer Resolve Enable."
2118 */
2119 dw4 |= GEN6_WM_DEPTH_CLEAR;
2120 dw4 |= GEN6_WM_DEPTH_RESOLVE;
2121 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
2122 }
2123
2124 if (cc_may_kill) {
2125 dw5 |= GEN6_WM_KILL_ENABLE |
2126 GEN6_WM_DISPATCH_ENABLE;
2127 }
2128
2129 if (dual_blend)
2130 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
2131
2132 dw5 |= rasterizer->wm.payload[0];
2133
2134 dw6 |= rasterizer->wm.payload[1];
2135
2136 if (num_samples > 1) {
2137 dw6 |= rasterizer->wm.dw_msaa_rast |
2138 rasterizer->wm.dw_msaa_disp;
2139 }
2140
2141 ilo_cp_begin(cp, cmd_len);
2142 ilo_cp_write(cp, cmd | (cmd_len - 2));
2143 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
2144 ilo_cp_write(cp, dw2);
2145 ilo_cp_write(cp, 0); /* scratch */
2146 ilo_cp_write(cp, dw4);
2147 ilo_cp_write(cp, dw5);
2148 ilo_cp_write(cp, dw6);
2149 ilo_cp_write(cp, 0); /* kernel 1 */
2150 ilo_cp_write(cp, 0); /* kernel 2 */
2151 ilo_cp_end(cp);
2152 }
2153
2154 static unsigned
2155 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
2156 const uint32_t *bufs, const int *sizes,
2157 int num_bufs, int max_read_length,
2158 uint32_t *dw, int num_dwords)
2159 {
2160 unsigned enabled = 0x0;
2161 int total_read_length, i;
2162
2163 assert(num_dwords == 4);
2164
2165 total_read_length = 0;
2166 for (i = 0; i < 4; i++) {
2167 if (i < num_bufs && sizes[i]) {
2168 /* in 256-bit units minus one */
2169 const int read_len = (sizes[i] + 31) / 32 - 1;
2170
2171 assert(bufs[i] % 32 == 0);
2172 assert(read_len < 32);
2173
2174 enabled |= 1 << i;
2175 dw[i] = bufs[i] | read_len;
2176
2177 total_read_length += read_len + 1;
2178 }
2179 else {
2180 dw[i] = 0;
2181 }
2182 }
2183
2184 assert(total_read_length <= max_read_length);
2185
2186 return enabled;
2187 }
2188
2189 static void
2190 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
2191 const uint32_t *bufs, const int *sizes,
2192 int num_bufs,
2193 struct ilo_cp *cp)
2194 {
2195 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2196 const uint8_t cmd_len = 5;
2197 uint32_t buf_dw[4], buf_enabled;
2198
2199 ILO_GPE_VALID_GEN(dev, 6, 6);
2200 assert(num_bufs <= 4);
2201
2202 /*
2203 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2204 *
2205 * "The sum of all four read length fields (each incremented to
2206 * represent the actual read length) must be less than or equal to 32"
2207 */
2208 buf_enabled = gen6_fill_3dstate_constant(dev,
2209 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2210
2211 ilo_cp_begin(cp, cmd_len);
2212 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2213 ilo_cp_write(cp, buf_dw[0]);
2214 ilo_cp_write(cp, buf_dw[1]);
2215 ilo_cp_write(cp, buf_dw[2]);
2216 ilo_cp_write(cp, buf_dw[3]);
2217 ilo_cp_end(cp);
2218 }
2219
2220 static void
2221 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
2222 const uint32_t *bufs, const int *sizes,
2223 int num_bufs,
2224 struct ilo_cp *cp)
2225 {
2226 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2227 const uint8_t cmd_len = 5;
2228 uint32_t buf_dw[4], buf_enabled;
2229
2230 ILO_GPE_VALID_GEN(dev, 6, 6);
2231 assert(num_bufs <= 4);
2232
2233 /*
2234 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2235 *
2236 * "The sum of all four read length fields (each incremented to
2237 * represent the actual read length) must be less than or equal to 64"
2238 */
2239 buf_enabled = gen6_fill_3dstate_constant(dev,
2240 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2241
2242 ilo_cp_begin(cp, cmd_len);
2243 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2244 ilo_cp_write(cp, buf_dw[0]);
2245 ilo_cp_write(cp, buf_dw[1]);
2246 ilo_cp_write(cp, buf_dw[2]);
2247 ilo_cp_write(cp, buf_dw[3]);
2248 ilo_cp_end(cp);
2249 }
2250
2251 static void
2252 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
2253 const uint32_t *bufs, const int *sizes,
2254 int num_bufs,
2255 struct ilo_cp *cp)
2256 {
2257 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2258 const uint8_t cmd_len = 5;
2259 uint32_t buf_dw[4], buf_enabled;
2260
2261 ILO_GPE_VALID_GEN(dev, 6, 6);
2262 assert(num_bufs <= 4);
2263
2264 /*
2265 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2266 *
2267 * "The sum of all four read length fields (each incremented to
2268 * represent the actual read length) must be less than or equal to 64"
2269 */
2270 buf_enabled = gen6_fill_3dstate_constant(dev,
2271 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2272
2273 ilo_cp_begin(cp, cmd_len);
2274 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2275 ilo_cp_write(cp, buf_dw[0]);
2276 ilo_cp_write(cp, buf_dw[1]);
2277 ilo_cp_write(cp, buf_dw[2]);
2278 ilo_cp_write(cp, buf_dw[3]);
2279 ilo_cp_end(cp);
2280 }
2281
2282 static void
2283 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
2284 unsigned sample_mask,
2285 struct ilo_cp *cp)
2286 {
2287 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2288 const uint8_t cmd_len = 2;
2289 const unsigned valid_mask = 0xf;
2290
2291 ILO_GPE_VALID_GEN(dev, 6, 6);
2292
2293 sample_mask &= valid_mask;
2294
2295 ilo_cp_begin(cp, cmd_len);
2296 ilo_cp_write(cp, cmd | (cmd_len - 2));
2297 ilo_cp_write(cp, sample_mask);
2298 ilo_cp_end(cp);
2299 }
2300
2301 static void
2302 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
2303 unsigned x, unsigned y,
2304 unsigned width, unsigned height,
2305 struct ilo_cp *cp)
2306 {
2307 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2308 const uint8_t cmd_len = 4;
2309 unsigned xmax = x + width - 1;
2310 unsigned ymax = y + height - 1;
2311 int rect_limit;
2312
2313 ILO_GPE_VALID_GEN(dev, 6, 7);
2314
2315 if (dev->gen >= ILO_GEN(7)) {
2316 rect_limit = 16383;
2317 }
2318 else {
2319 /*
2320 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2321 *
2322 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2323 * must be an even number"
2324 */
2325 assert(y % 2 == 0);
2326
2327 rect_limit = 8191;
2328 }
2329
2330 if (x > rect_limit) x = rect_limit;
2331 if (y > rect_limit) y = rect_limit;
2332 if (xmax > rect_limit) xmax = rect_limit;
2333 if (ymax > rect_limit) ymax = rect_limit;
2334
2335 ilo_cp_begin(cp, cmd_len);
2336 ilo_cp_write(cp, cmd | (cmd_len - 2));
2337 ilo_cp_write(cp, y << 16 | x);
2338 ilo_cp_write(cp, ymax << 16 | xmax);
2339
2340 /*
2341 * There is no need to set the origin. It is intended to support front
2342 * buffer rendering.
2343 */
2344 ilo_cp_write(cp, 0);
2345
2346 ilo_cp_end(cp);
2347 }
2348
2349 struct ilo_zs_surface_info {
2350 int surface_type;
2351 int format;
2352
2353 struct {
2354 struct intel_bo *bo;
2355 unsigned stride;
2356 enum intel_tiling_mode tiling;
2357 uint32_t offset;
2358 } zs, stencil, hiz;
2359
2360 unsigned width, height, depth;
2361 unsigned lod, first_layer, num_layers;
2362 uint32_t x_offset, y_offset;
2363 };
2364
2365 static void
2366 zs_init_info_null(const struct ilo_dev_info *dev,
2367 struct ilo_zs_surface_info *info)
2368 {
2369 ILO_GPE_VALID_GEN(dev, 6, 7);
2370
2371 memset(info, 0, sizeof(*info));
2372
2373 info->surface_type = BRW_SURFACE_NULL;
2374 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
2375 info->width = 1;
2376 info->height = 1;
2377 info->depth = 1;
2378 info->num_layers = 1;
2379 }
2380
2381 static void
2382 zs_init_info(const struct ilo_dev_info *dev,
2383 const struct ilo_texture *tex,
2384 enum pipe_format format,
2385 unsigned level,
2386 unsigned first_layer, unsigned num_layers,
2387 struct ilo_zs_surface_info *info)
2388 {
2389 const bool rebase_layer = true;
2390 struct intel_bo * const hiz_bo = NULL;
2391 bool separate_stencil;
2392 uint32_t x_offset[3], y_offset[3];
2393
2394 ILO_GPE_VALID_GEN(dev, 6, 7);
2395
2396 memset(info, 0, sizeof(*info));
2397
2398 info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
2399
2400 if (info->surface_type == BRW_SURFACE_CUBE) {
2401 /*
2402 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2403 *
2404 * "For Other Surfaces (Cube Surfaces):
2405 * This field (Minimum Array Element) is ignored."
2406 *
2407 * "For Other Surfaces (Cube Surfaces):
2408 * This field (Render Target View Extent) is ignored."
2409 *
2410 * As such, we cannot set first_layer and num_layers on cube surfaces.
2411 * To work around that, treat it as a 2D surface.
2412 */
2413 info->surface_type = BRW_SURFACE_2D;
2414 }
2415
2416 if (dev->gen >= ILO_GEN(7)) {
2417 separate_stencil = true;
2418 }
2419 else {
2420 /*
2421 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2422 *
2423 * "This field (Separate Stencil Buffer Enable) must be set to the
2424 * same value (enabled or disabled) as Hierarchical Depth Buffer
2425 * Enable."
2426 */
2427 separate_stencil = (hiz_bo != NULL);
2428 }
2429
2430 /*
2431 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2432 *
2433 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2434 * Surface Format of the depth buffer cannot be
2435 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2436 * requires the separate stencil buffer."
2437 *
2438 * From the Ironlake PRM, volume 2 part 1, page 330:
2439 *
2440 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2441 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2442 *
2443 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2444 * is indeed used, the depth values output by the fragment shaders will
2445 * be different when read back.
2446 *
2447 * As for GEN7+, separate_stencil is always true.
2448 */
2449 switch (format) {
2450 case PIPE_FORMAT_Z16_UNORM:
2451 info->format = BRW_DEPTHFORMAT_D16_UNORM;
2452 break;
2453 case PIPE_FORMAT_Z32_FLOAT:
2454 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
2455 break;
2456 case PIPE_FORMAT_Z24X8_UNORM:
2457 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2458 info->format = (separate_stencil) ?
2459 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2460 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2461 break;
2462 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2463 info->format = (separate_stencil) ?
2464 BRW_DEPTHFORMAT_D32_FLOAT :
2465 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2466 break;
2467 case PIPE_FORMAT_S8_UINT:
2468 if (separate_stencil) {
2469 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
2470 break;
2471 }
2472 /* fall through */
2473 default:
2474 assert(!"unsupported depth/stencil format");
2475 zs_init_info_null(dev, info);
2476 return;
2477 break;
2478 }
2479
2480 if (format != PIPE_FORMAT_S8_UINT) {
2481 info->zs.bo = tex->bo;
2482 info->zs.stride = tex->bo_stride;
2483 info->zs.tiling = tex->tiling;
2484
2485 if (rebase_layer) {
2486 info->zs.offset = ilo_texture_get_slice_offset(tex,
2487 level, first_layer, &x_offset[0], &y_offset[0]);
2488 }
2489 }
2490
2491 if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
2492 const struct ilo_texture *s8_tex =
2493 (tex->separate_s8) ? tex->separate_s8 : tex;
2494
2495 info->stencil.bo = s8_tex->bo;
2496
2497 /*
2498 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2499 *
2500 * "The pitch must be set to 2x the value computed based on width,
2501 * as the stencil buffer is stored with two rows interleaved."
2502 *
2503 * According to the classic driver, we need to do the same for GEN7+
2504 * even though the Ivy Bridge PRM does not say anything about it.
2505 */
2506 info->stencil.stride = s8_tex->bo_stride * 2;
2507
2508 info->stencil.tiling = s8_tex->tiling;
2509
2510 if (rebase_layer) {
2511 info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
2512 level, first_layer, &x_offset[1], &y_offset[1]);
2513 }
2514 }
2515
2516 if (hiz_bo) {
2517 info->hiz.bo = hiz_bo;
2518 info->hiz.stride = 0;
2519 info->hiz.tiling = 0;
2520 info->hiz.offset = 0;
2521 x_offset[2] = 0;
2522 y_offset[2] = 0;
2523 }
2524
2525 info->width = tex->base.width0;
2526 info->height = tex->base.height0;
2527 info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
2528 tex->base.depth0 : num_layers;
2529
2530 info->lod = level;
2531 info->first_layer = first_layer;
2532 info->num_layers = num_layers;
2533
2534 if (rebase_layer) {
2535 /* the size of the layer */
2536 info->width = u_minify(info->width, level);
2537 info->height = u_minify(info->height, level);
2538 if (info->surface_type == BRW_SURFACE_3D)
2539 info->depth = u_minify(info->depth, level);
2540 else
2541 info->depth = 1;
2542
2543 /* no layered rendering */
2544 assert(num_layers == 1);
2545
2546 info->lod = 0;
2547 info->first_layer = 0;
2548 info->num_layers = 1;
2549
2550 /* all three share the same X/Y offsets */
2551 if (info->zs.bo) {
2552 if (info->stencil.bo) {
2553 assert(x_offset[0] == x_offset[1]);
2554 assert(y_offset[0] == y_offset[1]);
2555 }
2556
2557 info->x_offset = x_offset[0];
2558 info->y_offset = y_offset[0];
2559 }
2560 else {
2561 assert(info->stencil.bo);
2562
2563 info->x_offset = x_offset[1];
2564 info->y_offset = y_offset[1];
2565 }
2566
2567 if (info->hiz.bo) {
2568 assert(info->x_offset == x_offset[2]);
2569 assert(info->y_offset == y_offset[2]);
2570 }
2571
2572 /*
2573 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2574 *
2575 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2576 * Coordinate Offset X) must be zero to ensure correct alignment"
2577 *
2578 * XXX Skip the check for gen6, which seems to be fine. We need to make
2579 * sure that does not happen eventually.
2580 */
2581 if (dev->gen >= ILO_GEN(7)) {
2582 assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
2583 info->x_offset &= ~7;
2584 info->y_offset &= ~7;
2585 }
2586
2587 info->width += info->x_offset;
2588 info->height += info->y_offset;
2589
2590 /* we have to treat them as 2D surfaces */
2591 if (info->surface_type == BRW_SURFACE_CUBE) {
2592 assert(tex->base.width0 == tex->base.height0);
2593 /* we will set slice_offset to point to the single face */
2594 info->surface_type = BRW_SURFACE_2D;
2595 }
2596 else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
2597 assert(tex->base.height0 == 1);
2598 info->surface_type = BRW_SURFACE_2D;
2599 }
2600 }
2601 }
2602
2603 void
2604 ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
2605 const struct ilo_texture *tex,
2606 enum pipe_format format,
2607 unsigned level,
2608 unsigned first_layer, unsigned num_layers,
2609 struct ilo_zs_surface *zs)
2610 {
2611 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
2612 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
2613 struct ilo_zs_surface_info info;
2614 uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
2615
2616 ILO_GPE_VALID_GEN(dev, 6, 7);
2617
2618 if (tex)
2619 zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
2620 else
2621 zs_init_info_null(dev, &info);
2622
2623 switch (info.surface_type) {
2624 case BRW_SURFACE_NULL:
2625 break;
2626 case BRW_SURFACE_1D:
2627 assert(info.width <= max_2d_size && info.height == 1 &&
2628 info.depth <= max_array_size);
2629 assert(info.first_layer < max_array_size - 1 &&
2630 info.num_layers <= max_array_size);
2631 break;
2632 case BRW_SURFACE_2D:
2633 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
2634 info.depth <= max_array_size);
2635 assert(info.first_layer < max_array_size - 1 &&
2636 info.num_layers <= max_array_size);
2637 break;
2638 case BRW_SURFACE_3D:
2639 assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
2640 assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
2641 assert(info.x_offset == 0 && info.y_offset == 0);
2642 break;
2643 case BRW_SURFACE_CUBE:
2644 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
2645 info.depth == 1);
2646 assert(info.first_layer == 0 && info.num_layers == 1);
2647 assert(info.width == info.height);
2648 assert(info.x_offset == 0 && info.y_offset == 0);
2649 break;
2650 default:
2651 assert(!"unexpected depth surface type");
2652 break;
2653 }
2654
2655 dw1 = info.surface_type << 29 |
2656 info.format << 18;
2657
2658 if (info.zs.bo) {
2659 /* required for GEN6+ */
2660 assert(info.zs.tiling == INTEL_TILING_Y);
2661 assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
2662 info.zs.stride % 128 == 0);
2663 assert(info.width <= info.zs.stride);
2664
2665 dw1 |= (info.zs.stride - 1);
2666 dw2 = info.zs.offset;
2667 }
2668 else {
2669 dw2 = 0;
2670 }
2671
2672 if (dev->gen >= ILO_GEN(7)) {
2673 if (info.zs.bo)
2674 dw1 |= 1 << 28;
2675
2676 if (info.stencil.bo)
2677 dw1 |= 1 << 27;
2678
2679 if (info.hiz.bo)
2680 dw1 |= 1 << 22;
2681
2682 dw3 = (info.height - 1) << 18 |
2683 (info.width - 1) << 4 |
2684 info.lod;
2685
2686 dw4 = (info.depth - 1) << 21 |
2687 info.first_layer << 10;
2688
2689 dw5 = info.y_offset << 16 | info.x_offset;
2690
2691 dw6 = (info.num_layers - 1) << 21;
2692 }
2693 else {
2694 /* always Y-tiled */
2695 dw1 |= 1 << 27 |
2696 1 << 26;
2697
2698 if (info.hiz.bo) {
2699 dw1 |= 1 << 22 |
2700 1 << 21;
2701 }
2702
2703 dw3 = (info.height - 1) << 19 |
2704 (info.width - 1) << 6 |
2705 info.lod << 2 |
2706 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2707
2708 dw4 = (info.depth - 1) << 21 |
2709 info.first_layer << 10 |
2710 (info.num_layers - 1) << 1;
2711
2712 dw5 = info.y_offset << 16 | info.x_offset;
2713
2714 dw6 = 0;
2715 }
2716
2717 STATIC_ASSERT(Elements(zs->payload) >= 10);
2718
2719 zs->payload[0] = dw1;
2720 zs->payload[1] = dw2;
2721 zs->payload[2] = dw3;
2722 zs->payload[3] = dw4;
2723 zs->payload[4] = dw5;
2724 zs->payload[5] = dw6;
2725
2726 /* do not increment reference count */
2727 zs->bo = info.zs.bo;
2728
2729 /* separate stencil */
2730 if (info.stencil.bo) {
2731 assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
2732 info.stencil.stride % 128 == 0);
2733
2734 zs->payload[6] = info.stencil.stride - 1;
2735 zs->payload[7] = info.stencil.offset;
2736
2737 /* do not increment reference count */
2738 zs->separate_s8_bo = info.stencil.bo;
2739 }
2740 else {
2741 zs->payload[6] = 0;
2742 zs->payload[7] = 0;
2743 zs->separate_s8_bo = NULL;
2744 }
2745
2746 /* hiz */
2747 if (info.hiz.bo) {
2748 zs->payload[8] = info.hiz.stride - 1;
2749 zs->payload[9] = info.hiz.offset;
2750
2751 /* do not increment reference count */
2752 zs->hiz_bo = info.hiz.bo;
2753 }
2754 else {
2755 zs->payload[8] = 0;
2756 zs->payload[9] = 0;
2757 zs->hiz_bo = NULL;
2758 }
2759 }
2760
2761 static void
2762 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2763 const struct ilo_zs_surface *zs,
2764 struct ilo_cp *cp)
2765 {
2766 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2767 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2768 const uint8_t cmd_len = 7;
2769
2770 ILO_GPE_VALID_GEN(dev, 6, 7);
2771
2772 ilo_cp_begin(cp, cmd_len);
2773 ilo_cp_write(cp, cmd | (cmd_len - 2));
2774 ilo_cp_write(cp, zs->payload[0]);
2775 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
2776 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2777 ilo_cp_write(cp, zs->payload[2]);
2778 ilo_cp_write(cp, zs->payload[3]);
2779 ilo_cp_write(cp, zs->payload[4]);
2780 ilo_cp_write(cp, zs->payload[5]);
2781 ilo_cp_end(cp);
2782 }
2783
2784 static void
2785 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
2786 int x_offset, int y_offset,
2787 struct ilo_cp *cp)
2788 {
2789 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2790 const uint8_t cmd_len = 2;
2791
2792 ILO_GPE_VALID_GEN(dev, 6, 7);
2793 assert(x_offset >= 0 && x_offset <= 31);
2794 assert(y_offset >= 0 && y_offset <= 31);
2795
2796 ilo_cp_begin(cp, cmd_len);
2797 ilo_cp_write(cp, cmd | (cmd_len - 2));
2798 ilo_cp_write(cp, x_offset << 8 | y_offset);
2799 ilo_cp_end(cp);
2800 }
2801
2802 static void
2803 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
2804 const struct pipe_poly_stipple *pattern,
2805 struct ilo_cp *cp)
2806 {
2807 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2808 const uint8_t cmd_len = 33;
2809 int i;
2810
2811 ILO_GPE_VALID_GEN(dev, 6, 7);
2812 assert(Elements(pattern->stipple) == 32);
2813
2814 ilo_cp_begin(cp, cmd_len);
2815 ilo_cp_write(cp, cmd | (cmd_len - 2));
2816 for (i = 0; i < 32; i++)
2817 ilo_cp_write(cp, pattern->stipple[i]);
2818 ilo_cp_end(cp);
2819 }
2820
2821 static void
2822 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
2823 unsigned pattern, unsigned factor,
2824 struct ilo_cp *cp)
2825 {
2826 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2827 const uint8_t cmd_len = 3;
2828 unsigned inverse;
2829
2830 ILO_GPE_VALID_GEN(dev, 6, 7);
2831 assert((pattern & 0xffff) == pattern);
2832 assert(factor >= 1 && factor <= 256);
2833
2834 ilo_cp_begin(cp, cmd_len);
2835 ilo_cp_write(cp, cmd | (cmd_len - 2));
2836 ilo_cp_write(cp, pattern);
2837
2838 if (dev->gen >= ILO_GEN(7)) {
2839 /* in U1.16 */
2840 inverse = (unsigned) (65536.0f / factor);
2841 ilo_cp_write(cp, inverse << 15 | factor);
2842 }
2843 else {
2844 /* in U1.13 */
2845 inverse = (unsigned) (8192.0f / factor);
2846 ilo_cp_write(cp, inverse << 16 | factor);
2847 }
2848
2849 ilo_cp_end(cp);
2850 }
2851
2852 static void
2853 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
2854 struct ilo_cp *cp)
2855 {
2856 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2857 const uint8_t cmd_len = 3;
2858
2859 ILO_GPE_VALID_GEN(dev, 6, 7);
2860
2861 ilo_cp_begin(cp, cmd_len);
2862 ilo_cp_write(cp, cmd | (cmd_len - 2));
2863 ilo_cp_write(cp, 0 << 16 | 0);
2864 ilo_cp_write(cp, 0 << 16 | 0);
2865 ilo_cp_end(cp);
2866 }
2867
2868 static void
2869 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
2870 int index, unsigned svbi,
2871 unsigned max_svbi,
2872 bool load_vertex_count,
2873 struct ilo_cp *cp)
2874 {
2875 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2876 const uint8_t cmd_len = 4;
2877 uint32_t dw1;
2878
2879 ILO_GPE_VALID_GEN(dev, 6, 6);
2880 assert(index >= 0 && index < 4);
2881
2882 dw1 = index << SVB_INDEX_SHIFT;
2883 if (load_vertex_count)
2884 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2885
2886 ilo_cp_begin(cp, cmd_len);
2887 ilo_cp_write(cp, cmd | (cmd_len - 2));
2888 ilo_cp_write(cp, dw1);
2889 ilo_cp_write(cp, svbi);
2890 ilo_cp_write(cp, max_svbi);
2891 ilo_cp_end(cp);
2892 }
2893
2894 static void
2895 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
2896 int num_samples,
2897 const uint32_t *packed_sample_pos,
2898 bool pixel_location_center,
2899 struct ilo_cp *cp)
2900 {
2901 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2902 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
2903 uint32_t dw1, dw2, dw3;
2904
2905 ILO_GPE_VALID_GEN(dev, 6, 7);
2906
2907 dw1 = (pixel_location_center) ?
2908 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2909
2910 switch (num_samples) {
2911 case 0:
2912 case 1:
2913 dw1 |= MS_NUMSAMPLES_1;
2914 dw2 = 0;
2915 dw3 = 0;
2916 break;
2917 case 4:
2918 dw1 |= MS_NUMSAMPLES_4;
2919 dw2 = packed_sample_pos[0];
2920 dw3 = 0;
2921 break;
2922 case 8:
2923 assert(dev->gen >= ILO_GEN(7));
2924 dw1 |= MS_NUMSAMPLES_8;
2925 dw2 = packed_sample_pos[0];
2926 dw3 = packed_sample_pos[1];
2927 break;
2928 default:
2929 assert(!"unsupported sample count");
2930 dw1 |= MS_NUMSAMPLES_1;
2931 dw2 = 0;
2932 dw3 = 0;
2933 break;
2934 }
2935
2936 ilo_cp_begin(cp, cmd_len);
2937 ilo_cp_write(cp, cmd | (cmd_len - 2));
2938 ilo_cp_write(cp, dw1);
2939 ilo_cp_write(cp, dw2);
2940 if (dev->gen >= ILO_GEN(7))
2941 ilo_cp_write(cp, dw3);
2942 ilo_cp_end(cp);
2943 }
2944
2945 static void
2946 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
2947 const struct ilo_zs_surface *zs,
2948 struct ilo_cp *cp)
2949 {
2950 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2951 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2952 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2953 const uint8_t cmd_len = 3;
2954
2955 ILO_GPE_VALID_GEN(dev, 6, 7);
2956
2957 ilo_cp_begin(cp, cmd_len);
2958 ilo_cp_write(cp, cmd | (cmd_len - 2));
2959 /* see ilo_gpe_init_zs_surface() */
2960 ilo_cp_write(cp, zs->payload[6]);
2961 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
2962 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2963 ilo_cp_end(cp);
2964 }
2965
2966 static void
2967 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2968 const struct ilo_zs_surface *zs,
2969 struct ilo_cp *cp)
2970 {
2971 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2972 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2973 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2974 const uint8_t cmd_len = 3;
2975
2976 ILO_GPE_VALID_GEN(dev, 6, 7);
2977
2978 ilo_cp_begin(cp, cmd_len);
2979 ilo_cp_write(cp, cmd | (cmd_len - 2));
2980 /* see ilo_gpe_init_zs_surface() */
2981 ilo_cp_write(cp, zs->payload[8]);
2982 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
2983 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2984 ilo_cp_end(cp);
2985 }
2986
2987 static void
2988 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
2989 uint32_t clear_val,
2990 struct ilo_cp *cp)
2991 {
2992 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2993 const uint8_t cmd_len = 2;
2994
2995 ILO_GPE_VALID_GEN(dev, 6, 6);
2996
2997 ilo_cp_begin(cp, cmd_len);
2998 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2999 GEN5_DEPTH_CLEAR_VALID);
3000 ilo_cp_write(cp, clear_val);
3001 ilo_cp_end(cp);
3002 }
3003
3004 static void
3005 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
3006 uint32_t dw1,
3007 struct intel_bo *bo, uint32_t bo_offset,
3008 bool write_qword,
3009 struct ilo_cp *cp)
3010 {
3011 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
3012 const uint8_t cmd_len = (write_qword) ? 5 : 4;
3013 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
3014 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
3015
3016 ILO_GPE_VALID_GEN(dev, 6, 7);
3017
3018 if (dw1 & PIPE_CONTROL_CS_STALL) {
3019 /*
3020 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
3021 *
3022 * "1 of the following must also be set (when CS stall is set):
3023 *
3024 * * Depth Cache Flush Enable ([0] of DW1)
3025 * * Stall at Pixel Scoreboard ([1] of DW1)
3026 * * Depth Stall ([13] of DW1)
3027 * * Post-Sync Operation ([13] of DW1)
3028 * * Render Target Cache Flush Enable ([12] of DW1)
3029 * * Notify Enable ([8] of DW1)"
3030 *
3031 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
3032 *
3033 * "One of the following must also be set (when CS stall is set):
3034 *
3035 * * Render Target Cache Flush Enable ([12] of DW1)
3036 * * Depth Cache Flush Enable ([0] of DW1)
3037 * * Stall at Pixel Scoreboard ([1] of DW1)
3038 * * Depth Stall ([13] of DW1)
3039 * * Post-Sync Operation ([13] of DW1)"
3040 */
3041 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
3042 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3043 PIPE_CONTROL_STALL_AT_SCOREBOARD |
3044 PIPE_CONTROL_DEPTH_STALL;
3045
3046 /* post-sync op */
3047 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
3048 PIPE_CONTROL_WRITE_DEPTH_COUNT |
3049 PIPE_CONTROL_WRITE_TIMESTAMP;
3050
3051 if (dev->gen == ILO_GEN(6))
3052 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
3053
3054 assert(dw1 & bit_test);
3055 }
3056
3057 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
3058 /*
3059 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
3060 *
3061 * "Following bits must be clear (when Depth Stall is set):
3062 *
3063 * * Render Target Cache Flush Enable ([12] of DW1)
3064 * * Depth Cache Flush Enable ([0] of DW1)"
3065 */
3066 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
3067 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
3068 }
3069
3070 ilo_cp_begin(cp, cmd_len);
3071 ilo_cp_write(cp, cmd | (cmd_len - 2));
3072 ilo_cp_write(cp, dw1);
3073 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
3074 ilo_cp_write(cp, 0);
3075 if (write_qword)
3076 ilo_cp_write(cp, 0);
3077 ilo_cp_end(cp);
3078 }
3079
3080 static void
3081 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
3082 const struct pipe_draw_info *info,
3083 const struct ilo_ib_state *ib,
3084 bool rectlist,
3085 struct ilo_cp *cp)
3086 {
3087 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
3088 const uint8_t cmd_len = 6;
3089 const int prim = (rectlist) ?
3090 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
3091 const int vb_access = (info->indexed) ?
3092 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
3093 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
3094 const uint32_t vb_start = info->start +
3095 ((info->indexed) ? ib->draw_start_offset : 0);
3096
3097 ILO_GPE_VALID_GEN(dev, 6, 6);
3098
3099 ilo_cp_begin(cp, cmd_len);
3100 ilo_cp_write(cp, cmd | (cmd_len - 2) |
3101 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
3102 vb_access);
3103 ilo_cp_write(cp, info->count);
3104 ilo_cp_write(cp, vb_start);
3105 ilo_cp_write(cp, info->instance_count);
3106 ilo_cp_write(cp, info->start_instance);
3107 ilo_cp_write(cp, info->index_bias);
3108 ilo_cp_end(cp);
3109 }
3110
3111 static uint32_t
3112 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
3113 const struct ilo_shader_state **cs,
3114 uint32_t *sampler_state,
3115 int *num_samplers,
3116 uint32_t *binding_table_state,
3117 int *num_surfaces,
3118 int num_ids,
3119 struct ilo_cp *cp)
3120 {
3121 /*
3122 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
3123 *
3124 * "(Interface Descriptor Total Length) This field must have the same
3125 * alignment as the Interface Descriptor Data Start Address.
3126 *
3127 * It must be DQWord (32-byte) aligned..."
3128 *
3129 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
3130 *
3131 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
3132 * aligned address of the Interface Descriptor data."
3133 */
3134 const int state_align = 32 / 4;
3135 const int state_len = (32 / 4) * num_ids;
3136 uint32_t state_offset, *dw;
3137 int i;
3138
3139 ILO_GPE_VALID_GEN(dev, 6, 6);
3140
3141 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
3142 state_len, state_align, &state_offset);
3143
3144 for (i = 0; i < num_ids; i++) {
3145 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
3146 dw[1] = 1 << 18; /* SPF */
3147 dw[2] = sampler_state[i] |
3148 (num_samplers[i] + 3) / 4 << 2;
3149 dw[3] = binding_table_state[i] |
3150 num_surfaces[i];
3151 dw[4] = 0 << 16 | /* CURBE Read Length */
3152 0; /* CURBE Read Offset */
3153 dw[5] = 0; /* Barrier ID */
3154 dw[6] = 0;
3155 dw[7] = 0;
3156
3157 dw += 8;
3158 }
3159
3160 return state_offset;
3161 }
3162
3163 static void
3164 viewport_get_guardband(const struct ilo_dev_info *dev,
3165 int center_x, int center_y,
3166 int *min_gbx, int *max_gbx,
3167 int *min_gby, int *max_gby)
3168 {
3169 /*
3170 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
3171 *
3172 * "Per-Device Guardband Extents
3173 *
3174 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
3175 * - Maximum Post-Clamp Delta (X or Y): 16K"
3176 *
3177 * "In addition, in order to be correctly rendered, objects must have a
3178 * screenspace bounding box not exceeding 8K in the X or Y direction.
3179 * This additional restriction must also be comprehended by software,
3180 * i.e., enforced by use of clipping."
3181 *
3182 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
3183 *
3184 * "Per-Device Guardband Extents
3185 *
3186 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3187 * - Maximum Post-Clamp Delta (X or Y): N/A"
3188 *
3189 * "In addition, in order to be correctly rendered, objects must have a
3190 * screenspace bounding box not exceeding 8K in the X or Y direction.
3191 * This additional restriction must also be comprehended by software,
3192 * i.e., enforced by use of clipping."
3193 *
3194 * Combined, the bounding box of any object can not exceed 8K in both
3195 * width and height.
3196 *
3197 * Below we set the guardband as a squre of length 8K, centered at where
3198 * the viewport is. This makes sure all objects passing the GB test are
3199 * valid to the renderer, and those failing the XY clipping have a
3200 * better chance of passing the GB test.
3201 */
3202 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
3203 const int half_len = 8192 / 2;
3204
3205 /* make sure the guardband is within the valid range */
3206 if (center_x - half_len < -max_extent)
3207 center_x = -max_extent + half_len;
3208 else if (center_x + half_len > max_extent - 1)
3209 center_x = max_extent - half_len;
3210
3211 if (center_y - half_len < -max_extent)
3212 center_y = -max_extent + half_len;
3213 else if (center_y + half_len > max_extent - 1)
3214 center_y = max_extent - half_len;
3215
3216 *min_gbx = (float) (center_x - half_len);
3217 *max_gbx = (float) (center_x + half_len);
3218 *min_gby = (float) (center_y - half_len);
3219 *max_gby = (float) (center_y + half_len);
3220 }
3221
3222 void
3223 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
3224 const struct pipe_viewport_state *state,
3225 struct ilo_viewport_cso *vp)
3226 {
3227 const float scale_x = fabs(state->scale[0]);
3228 const float scale_y = fabs(state->scale[1]);
3229 const float scale_z = fabs(state->scale[2]);
3230 int min_gbx, max_gbx, min_gby, max_gby;
3231
3232 ILO_GPE_VALID_GEN(dev, 6, 7);
3233
3234 viewport_get_guardband(dev,
3235 (int) state->translate[0],
3236 (int) state->translate[1],
3237 &min_gbx, &max_gbx, &min_gby, &max_gby);
3238
3239 /* matrix form */
3240 vp->m00 = state->scale[0];
3241 vp->m11 = state->scale[1];
3242 vp->m22 = state->scale[2];
3243 vp->m30 = state->translate[0];
3244 vp->m31 = state->translate[1];
3245 vp->m32 = state->translate[2];
3246
3247 /* guardband in NDC space */
3248 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
3249 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
3250 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
3251 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
3252
3253 /* viewport in screen space */
3254 vp->min_x = scale_x * -1.0f + state->translate[0];
3255 vp->max_x = scale_x * 1.0f + state->translate[0];
3256 vp->min_y = scale_y * -1.0f + state->translate[1];
3257 vp->max_y = scale_y * 1.0f + state->translate[1];
3258 vp->min_z = scale_z * -1.0f + state->translate[2];
3259 vp->max_z = scale_z * 1.0f + state->translate[2];
3260 }
3261
3262 static uint32_t
3263 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
3264 const struct ilo_viewport_cso *viewports,
3265 unsigned num_viewports,
3266 struct ilo_cp *cp)
3267 {
3268 const int state_align = 32 / 4;
3269 const int state_len = 8 * num_viewports;
3270 uint32_t state_offset, *dw;
3271 unsigned i;
3272
3273 ILO_GPE_VALID_GEN(dev, 6, 6);
3274
3275 /*
3276 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3277 *
3278 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3279 * stored as an array of up to 16 elements..."
3280 */
3281 assert(num_viewports && num_viewports <= 16);
3282
3283 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3284 state_len, state_align, &state_offset);
3285
3286 for (i = 0; i < num_viewports; i++) {
3287 const struct ilo_viewport_cso *vp = &viewports[i];
3288
3289 dw[0] = fui(vp->m00);
3290 dw[1] = fui(vp->m11);
3291 dw[2] = fui(vp->m22);
3292 dw[3] = fui(vp->m30);
3293 dw[4] = fui(vp->m31);
3294 dw[5] = fui(vp->m32);
3295 dw[6] = 0;
3296 dw[7] = 0;
3297
3298 dw += 8;
3299 }
3300
3301 return state_offset;
3302 }
3303
3304 static uint32_t
3305 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
3306 const struct ilo_viewport_cso *viewports,
3307 unsigned num_viewports,
3308 struct ilo_cp *cp)
3309 {
3310 const int state_align = 32 / 4;
3311 const int state_len = 4 * num_viewports;
3312 uint32_t state_offset, *dw;
3313 unsigned i;
3314
3315 ILO_GPE_VALID_GEN(dev, 6, 6);
3316
3317 /*
3318 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3319 *
3320 * "The viewport-related state is stored as an array of up to 16
3321 * elements..."
3322 */
3323 assert(num_viewports && num_viewports <= 16);
3324
3325 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3326 state_len, state_align, &state_offset);
3327
3328 for (i = 0; i < num_viewports; i++) {
3329 const struct ilo_viewport_cso *vp = &viewports[i];
3330
3331 dw[0] = fui(vp->min_gbx);
3332 dw[1] = fui(vp->max_gbx);
3333 dw[2] = fui(vp->min_gby);
3334 dw[3] = fui(vp->max_gby);
3335
3336 dw += 4;
3337 }
3338
3339 return state_offset;
3340 }
3341
3342 static uint32_t
3343 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
3344 const struct ilo_viewport_cso *viewports,
3345 unsigned num_viewports,
3346 struct ilo_cp *cp)
3347 {
3348 const int state_align = 32 / 4;
3349 const int state_len = 2 * num_viewports;
3350 uint32_t state_offset, *dw;
3351 unsigned i;
3352
3353 ILO_GPE_VALID_GEN(dev, 6, 7);
3354
3355 /*
3356 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3357 *
3358 * "The viewport state is stored as an array of up to 16 elements..."
3359 */
3360 assert(num_viewports && num_viewports <= 16);
3361
3362 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3363 state_len, state_align, &state_offset);
3364
3365 for (i = 0; i < num_viewports; i++) {
3366 const struct ilo_viewport_cso *vp = &viewports[i];
3367
3368 dw[0] = fui(vp->min_z);
3369 dw[1] = fui(vp->max_z);
3370
3371 dw += 2;
3372 }
3373
3374 return state_offset;
3375 }
3376
3377 static uint32_t
3378 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
3379 const struct pipe_stencil_ref *stencil_ref,
3380 float alpha_ref,
3381 const struct pipe_blend_color *blend_color,
3382 struct ilo_cp *cp)
3383 {
3384 const int state_align = 64 / 4;
3385 const int state_len = 6;
3386 uint32_t state_offset, *dw;
3387
3388 ILO_GPE_VALID_GEN(dev, 6, 7);
3389
3390 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3391 state_len, state_align, &state_offset);
3392
3393 dw[0] = stencil_ref->ref_value[0] << 24 |
3394 stencil_ref->ref_value[1] << 16 |
3395 BRW_ALPHATEST_FORMAT_UNORM8;
3396 dw[1] = float_to_ubyte(alpha_ref);
3397 dw[2] = fui(blend_color->color[0]);
3398 dw[3] = fui(blend_color->color[1]);
3399 dw[4] = fui(blend_color->color[2]);
3400 dw[5] = fui(blend_color->color[3]);
3401
3402 return state_offset;
3403 }
3404
3405 static int
3406 gen6_blend_factor_dst_alpha_forced_one(int factor)
3407 {
3408 switch (factor) {
3409 case BRW_BLENDFACTOR_DST_ALPHA:
3410 return BRW_BLENDFACTOR_ONE;
3411 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3412 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3413 return BRW_BLENDFACTOR_ZERO;
3414 default:
3415 return factor;
3416 }
3417 }
3418
3419 static uint32_t
3420 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
3421 const struct pipe_rt_blend_state *rt,
3422 bool dst_alpha_forced_one)
3423 {
3424 int rgb_src, rgb_dst, a_src, a_dst;
3425 uint32_t dw;
3426
3427 if (!rt->blend_enable)
3428 return 0;
3429
3430 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3431 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3432 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3433 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3434
3435 if (dst_alpha_forced_one) {
3436 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3437 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3438 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3439 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3440 }
3441
3442 dw = 1 << 31 |
3443 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3444 a_src << 20 |
3445 a_dst << 15 |
3446 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3447 rgb_src << 5 |
3448 rgb_dst;
3449
3450 if (rt->rgb_func != rt->alpha_func ||
3451 rgb_src != a_src || rgb_dst != a_dst)
3452 dw |= 1 << 30;
3453
3454 return dw;
3455 }
3456
3457 void
3458 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
3459 const struct pipe_blend_state *state,
3460 struct ilo_blend_state *blend)
3461 {
3462 unsigned num_cso, i;
3463
3464 ILO_GPE_VALID_GEN(dev, 6, 7);
3465
3466 if (state->independent_blend_enable) {
3467 num_cso = Elements(blend->cso);
3468 }
3469 else {
3470 memset(blend->cso, 0, sizeof(blend->cso));
3471 num_cso = 1;
3472 }
3473
3474 blend->independent_blend_enable = state->independent_blend_enable;
3475 blend->alpha_to_coverage = state->alpha_to_coverage;
3476 blend->dual_blend = false;
3477
3478 for (i = 0; i < num_cso; i++) {
3479 const struct pipe_rt_blend_state *rt = &state->rt[i];
3480 struct ilo_blend_cso *cso = &blend->cso[i];
3481 bool dual_blend;
3482
3483 cso->payload[0] = 0;
3484 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
3485 0x3;
3486
3487 if (!(rt->colormask & PIPE_MASK_A))
3488 cso->payload[1] |= 1 << 27;
3489 if (!(rt->colormask & PIPE_MASK_R))
3490 cso->payload[1] |= 1 << 26;
3491 if (!(rt->colormask & PIPE_MASK_G))
3492 cso->payload[1] |= 1 << 25;
3493 if (!(rt->colormask & PIPE_MASK_B))
3494 cso->payload[1] |= 1 << 24;
3495
3496 if (state->dither)
3497 cso->payload[1] |= 1 << 12;
3498
3499 /*
3500 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3501 *
3502 * "Color Buffer Blending and Logic Ops must not be enabled
3503 * simultaneously, or behavior is UNDEFINED."
3504 *
3505 * Since state->logicop_enable takes precedence over rt->blend_enable,
3506 * no special care is needed.
3507 */
3508 if (state->logicop_enable) {
3509 cso->dw_logicop = 1 << 22 |
3510 gen6_translate_pipe_logicop(state->logicop_func) << 18;
3511
3512 cso->dw_blend = 0;
3513 cso->dw_blend_dst_alpha_forced_one = 0;
3514
3515 dual_blend = false;
3516 }
3517 else {
3518 cso->dw_logicop = 0;
3519
3520 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
3521 cso->dw_blend_dst_alpha_forced_one =
3522 blend_get_rt_blend_enable(dev, rt, true);
3523
3524 dual_blend = (rt->blend_enable &&
3525 util_blend_state_is_dual(state, i));
3526 }
3527
3528 cso->dw_alpha_mod = 0;
3529
3530 if (state->alpha_to_coverage) {
3531 cso->dw_alpha_mod |= 1 << 31;
3532
3533 if (dev->gen >= ILO_GEN(7))
3534 cso->dw_alpha_mod |= 1 << 29;
3535 }
3536
3537 /*
3538 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3539 *
3540 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
3541 * must be disabled."
3542 */
3543 if (state->alpha_to_one && !dual_blend)
3544 cso->dw_alpha_mod |= 1 << 30;
3545
3546 if (dual_blend)
3547 blend->dual_blend = true;
3548 }
3549 }
3550
3551 static uint32_t
3552 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
3553 const struct ilo_blend_state *blend,
3554 const struct ilo_fb_state *fb,
3555 const struct pipe_alpha_state *alpha,
3556 struct ilo_cp *cp)
3557 {
3558 const int state_align = 64 / 4;
3559 int state_len;
3560 uint32_t state_offset, *dw;
3561 unsigned num_targets, i;
3562
3563 ILO_GPE_VALID_GEN(dev, 6, 7);
3564
3565 /*
3566 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3567 *
3568 * "The blend state is stored as an array of up to 8 elements..."
3569 */
3570 num_targets = fb->state.nr_cbufs;
3571 assert(num_targets <= 8);
3572
3573 if (!num_targets) {
3574 if (!alpha->enabled)
3575 return 0;
3576 /* to be able to reference alpha func */
3577 num_targets = 1;
3578 }
3579
3580 state_len = 2 * num_targets;
3581
3582 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3583 state_len, state_align, &state_offset);
3584
3585 for (i = 0; i < num_targets; i++) {
3586 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
3587 const struct ilo_blend_cso *cso = &blend->cso[idx];
3588 const int num_samples = fb->num_samples;
3589 const struct util_format_description *format_desc =
3590 (idx < fb->state.nr_cbufs) ?
3591 util_format_description(fb->state.cbufs[idx]->format) : NULL;
3592 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3593
3594 rt_is_unorm = true;
3595 rt_is_pure_integer = false;
3596 rt_dst_alpha_forced_one = false;
3597
3598 if (format_desc) {
3599 int ch;
3600
3601 switch (format_desc->format) {
3602 case PIPE_FORMAT_B8G8R8X8_UNORM:
3603 /* force alpha to one when the HW format has alpha */
3604 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3605 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3606 rt_dst_alpha_forced_one = true;
3607 break;
3608 default:
3609 break;
3610 }
3611
3612 for (ch = 0; ch < 4; ch++) {
3613 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3614 continue;
3615
3616 if (format_desc->channel[ch].pure_integer) {
3617 rt_is_unorm = false;
3618 rt_is_pure_integer = true;
3619 break;
3620 }
3621
3622 if (!format_desc->channel[ch].normalized ||
3623 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3624 rt_is_unorm = false;
3625 }
3626 }
3627
3628 dw[0] = cso->payload[0];
3629 dw[1] = cso->payload[1];
3630
3631 if (!rt_is_pure_integer) {
3632 if (rt_dst_alpha_forced_one)
3633 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
3634 else
3635 dw[0] |= cso->dw_blend;
3636 }
3637
3638 /*
3639 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3640 *
3641 * "Logic Ops are only supported on *_UNORM surfaces (excluding
3642 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3643 *
3644 * Since logicop is ignored for non-UNORM color buffers, no special care
3645 * is needed.
3646 */
3647 if (rt_is_unorm)
3648 dw[1] |= cso->dw_logicop;
3649
3650 /*
3651 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3652 *
3653 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3654 * Dither both must be disabled."
3655 *
3656 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3657 * requires that anyway.
3658 */
3659 if (num_samples > 1)
3660 dw[1] |= cso->dw_alpha_mod;
3661
3662 /*
3663 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3664 *
3665 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3666 * alpha value."
3667 */
3668 if (alpha->enabled && !rt_is_pure_integer) {
3669 dw[1] |= 1 << 16 |
3670 gen6_translate_dsa_func(alpha->func) << 13;
3671 }
3672
3673 dw += 2;
3674 }
3675
3676 return state_offset;
3677 }
3678
3679 void
3680 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
3681 const struct pipe_depth_stencil_alpha_state *state,
3682 struct ilo_dsa_state *dsa)
3683 {
3684 const struct pipe_depth_state *depth = &state->depth;
3685 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
3686 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
3687 uint32_t *dw;
3688
3689 ILO_GPE_VALID_GEN(dev, 6, 7);
3690
3691 /* copy alpha state for later use */
3692 dsa->alpha = state->alpha;
3693
3694 STATIC_ASSERT(Elements(dsa->payload) >= 3);
3695 dw = dsa->payload;
3696
3697 /*
3698 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3699 *
3700 * "If the Depth Buffer is either undefined or does not have a surface
3701 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3702 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3703 *
3704 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3705 *
3706 * "This field (Stencil Test Enable) cannot be enabled if
3707 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3708 *
3709 * TODO We do not check these yet.
3710 */
3711 if (stencil0->enabled) {
3712 dw[0] = 1 << 31 |
3713 gen6_translate_dsa_func(stencil0->func) << 28 |
3714 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
3715 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
3716 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
3717 if (stencil0->writemask)
3718 dw[0] |= 1 << 18;
3719
3720 dw[1] = stencil0->valuemask << 24 |
3721 stencil0->writemask << 16;
3722
3723 if (stencil1->enabled) {
3724 dw[0] |= 1 << 15 |
3725 gen6_translate_dsa_func(stencil1->func) << 12 |
3726 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
3727 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
3728 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
3729 if (stencil1->writemask)
3730 dw[0] |= 1 << 18;
3731
3732 dw[1] |= stencil1->valuemask << 8 |
3733 stencil1->writemask;
3734 }
3735 }
3736 else {
3737 dw[0] = 0;
3738 dw[1] = 0;
3739 }
3740
3741 /*
3742 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3743 *
3744 * "Enabling the Depth Test function without defining a Depth Buffer is
3745 * UNDEFINED."
3746 *
3747 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3748 *
3749 * "A Depth Buffer must be defined before enabling writes to it, or
3750 * operation is UNDEFINED."
3751 *
3752 * TODO We do not check these yet.
3753 */
3754 dw[2] = depth->enabled << 31 |
3755 depth->writemask << 26;
3756 if (depth->enabled)
3757 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
3758 else
3759 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3760 }
3761
3762 static uint32_t
3763 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
3764 const struct ilo_dsa_state *dsa,
3765 struct ilo_cp *cp)
3766 {
3767 const int state_align = 64 / 4;
3768 const int state_len = 3;
3769 uint32_t state_offset, *dw;
3770
3771
3772 ILO_GPE_VALID_GEN(dev, 6, 7);
3773
3774 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3775 state_len, state_align, &state_offset);
3776
3777 dw[0] = dsa->payload[0];
3778 dw[1] = dsa->payload[1];
3779 dw[2] = dsa->payload[2];
3780
3781 return state_offset;
3782 }
3783
3784 void
3785 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
3786 unsigned start_slot,
3787 unsigned num_states,
3788 const struct pipe_scissor_state *states,
3789 struct ilo_scissor_state *scissor)
3790 {
3791 unsigned i;
3792
3793 ILO_GPE_VALID_GEN(dev, 6, 7);
3794
3795 for (i = 0; i < num_states; i++) {
3796 uint16_t min_x, min_y, max_x, max_y;
3797
3798 /* both max and min are inclusive in SCISSOR_RECT */
3799 if (states[i].minx < states[i].maxx &&
3800 states[i].miny < states[i].maxy) {
3801 min_x = states[i].minx;
3802 min_y = states[i].miny;
3803 max_x = states[i].maxx - 1;
3804 max_y = states[i].maxy - 1;
3805 }
3806 else {
3807 /* we have to make min greater than max */
3808 min_x = 1;
3809 min_y = 1;
3810 max_x = 0;
3811 max_y = 0;
3812 }
3813
3814 scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
3815 scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
3816 }
3817
3818 if (!start_slot && num_states)
3819 scissor->scissor0 = states[0];
3820 }
3821
3822 void
3823 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
3824 struct ilo_scissor_state *scissor)
3825 {
3826 unsigned i;
3827
3828 for (i = 0; i < Elements(scissor->payload); i += 2) {
3829 scissor->payload[i + 0] = 1 << 16 | 1;
3830 scissor->payload[i + 1] = 0;
3831 }
3832 }
3833
3834 static uint32_t
3835 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
3836 const struct ilo_scissor_state *scissor,
3837 unsigned num_viewports,
3838 struct ilo_cp *cp)
3839 {
3840 const int state_align = 32 / 4;
3841 const int state_len = 2 * num_viewports;
3842 uint32_t state_offset, *dw;
3843
3844 ILO_GPE_VALID_GEN(dev, 6, 7);
3845
3846 /*
3847 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3848 *
3849 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3850 * stored as an array of up to 16 elements..."
3851 */
3852 assert(num_viewports && num_viewports <= 16);
3853
3854 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3855 state_len, state_align, &state_offset);
3856
3857 memcpy(dw, scissor->payload, state_len * 4);
3858
3859 return state_offset;
3860 }
3861
3862 static uint32_t
3863 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
3864 uint32_t *surface_states,
3865 int num_surface_states,
3866 struct ilo_cp *cp)
3867 {
3868 const int state_align = 32 / 4;
3869 const int state_len = num_surface_states;
3870 uint32_t state_offset, *dw;
3871
3872 ILO_GPE_VALID_GEN(dev, 6, 7);
3873
3874 /*
3875 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3876 *
3877 * "It is stored as an array of up to 256 elements..."
3878 */
3879 assert(num_surface_states <= 256);
3880
3881 if (!num_surface_states)
3882 return 0;
3883
3884 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3885 state_len, state_align, &state_offset);
3886 memcpy(dw, surface_states,
3887 num_surface_states * sizeof(surface_states[0]));
3888
3889 return state_offset;
3890 }
3891
3892 void
3893 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
3894 unsigned width, unsigned height,
3895 unsigned depth, unsigned level,
3896 struct ilo_view_surface *surf)
3897 {
3898 uint32_t *dw;
3899
3900 ILO_GPE_VALID_GEN(dev, 6, 6);
3901
3902 /*
3903 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3904 *
3905 * "A null surface will be used in instances where an actual surface is
3906 * not bound. When a write message is generated to a null surface, no
3907 * actual surface is written to. When a read message (including any
3908 * sampling engine message) is generated to a null surface, the result
3909 * is all zeros. Note that a null surface type is allowed to be used
3910 * with all messages, even if it is not specificially indicated as
3911 * supported. All of the remaining fields in surface state are ignored
3912 * for null surfaces, with the following exceptions:
3913 *
3914 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3915 * depth buffer's corresponding state for all render target
3916 * surfaces, including null.
3917 * * Surface Format must be R8G8B8A8_UNORM."
3918 *
3919 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3920 *
3921 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3922 * true"
3923 */
3924
3925 STATIC_ASSERT(Elements(surf->payload) >= 6);
3926 dw = surf->payload;
3927
3928 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3929 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3930
3931 dw[1] = 0;
3932
3933 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3934 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3935 level << BRW_SURFACE_LOD_SHIFT;
3936
3937 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3938 BRW_SURFACE_TILED;
3939
3940 dw[4] = 0;
3941 dw[5] = 0;
3942
3943 surf->bo = NULL;
3944 }
3945
3946 void
3947 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
3948 const struct ilo_buffer *buf,
3949 unsigned offset, unsigned size,
3950 unsigned struct_size,
3951 enum pipe_format elem_format,
3952 bool is_rt, bool render_cache_rw,
3953 struct ilo_view_surface *surf)
3954 {
3955 const int elem_size = util_format_get_blocksize(elem_format);
3956 int width, height, depth, pitch;
3957 int surface_format, num_entries;
3958 uint32_t *dw;
3959
3960 ILO_GPE_VALID_GEN(dev, 6, 6);
3961
3962 /*
3963 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3964 * structure in a buffer.
3965 */
3966
3967 surface_format = ilo_translate_color_format(elem_format);
3968
3969 num_entries = size / struct_size;
3970 /* see if there is enough space to fit another element */
3971 if (size % struct_size >= elem_size)
3972 num_entries++;
3973
3974 /*
3975 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3976 *
3977 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3978 * Address) specifies the base address of first element of the
3979 * surface. The surface is interpreted as a simple array of that
3980 * single element type. The address must be naturally-aligned to the
3981 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3982 * must be 16-byte aligned).
3983 *
3984 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3985 * the base address of the first element of the surface, computed in
3986 * software by adding the surface base address to the byte offset of
3987 * the element in the buffer."
3988 */
3989 if (is_rt)
3990 assert(offset % elem_size == 0);
3991
3992 /*
3993 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3994 *
3995 * "For buffer surfaces, the number of entries in the buffer ranges
3996 * from 1 to 2^27."
3997 */
3998 assert(num_entries >= 1 && num_entries <= 1 << 27);
3999
4000 /*
4001 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4002 *
4003 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
4004 * indicates the size of the structure."
4005 */
4006 pitch = struct_size;
4007
4008 pitch--;
4009 num_entries--;
4010 /* bits [6:0] */
4011 width = (num_entries & 0x0000007f);
4012 /* bits [19:7] */
4013 height = (num_entries & 0x000fff80) >> 7;
4014 /* bits [26:20] */
4015 depth = (num_entries & 0x07f00000) >> 20;
4016
4017 STATIC_ASSERT(Elements(surf->payload) >= 6);
4018 dw = surf->payload;
4019
4020 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
4021 surface_format << BRW_SURFACE_FORMAT_SHIFT;
4022 if (render_cache_rw)
4023 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
4024
4025 dw[1] = offset;
4026
4027 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
4028 width << BRW_SURFACE_WIDTH_SHIFT;
4029
4030 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
4031 pitch << BRW_SURFACE_PITCH_SHIFT;
4032
4033 dw[4] = 0;
4034 dw[5] = 0;
4035
4036 /* do not increment reference count */
4037 surf->bo = buf->bo;
4038 }
4039
4040 void
4041 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
4042 const struct ilo_texture *tex,
4043 enum pipe_format format,
4044 unsigned first_level,
4045 unsigned num_levels,
4046 unsigned first_layer,
4047 unsigned num_layers,
4048 bool is_rt, bool render_cache_rw,
4049 struct ilo_view_surface *surf)
4050 {
4051 int surface_type, surface_format;
4052 int width, height, depth, pitch, lod;
4053 unsigned layer_offset, x_offset, y_offset;
4054 uint32_t *dw;
4055
4056 ILO_GPE_VALID_GEN(dev, 6, 6);
4057
4058 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
4059 assert(surface_type != BRW_SURFACE_BUFFER);
4060
4061 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
4062 format = PIPE_FORMAT_Z32_FLOAT;
4063
4064 if (is_rt)
4065 surface_format = ilo_translate_render_format(format);
4066 else
4067 surface_format = ilo_translate_texture_format(format);
4068 assert(surface_format >= 0);
4069
4070 width = tex->base.width0;
4071 height = tex->base.height0;
4072 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
4073 tex->base.depth0 : num_layers;
4074 pitch = tex->bo_stride;
4075
4076 if (surface_type == BRW_SURFACE_CUBE) {
4077 /*
4078 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4079 *
4080 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
4081 * range of this field (Depth) is [0,84], indicating the number of
4082 * cube array elements (equal to the number of underlying 2D array
4083 * elements divided by 6). For other surfaces, this field must be
4084 * zero."
4085 *
4086 * When is_rt is true, we treat the texture as a 2D one to avoid the
4087 * restriction.
4088 */
4089 if (is_rt) {
4090 surface_type = BRW_SURFACE_2D;
4091 }
4092 else {
4093 assert(num_layers % 6 == 0);
4094 depth = num_layers / 6;
4095 }
4096 }
4097
4098 /* sanity check the size */
4099 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
4100 switch (surface_type) {
4101 case BRW_SURFACE_1D:
4102 assert(width <= 8192 && height == 1 && depth <= 512);
4103 assert(first_layer < 512 && num_layers <= 512);
4104 break;
4105 case BRW_SURFACE_2D:
4106 assert(width <= 8192 && height <= 8192 && depth <= 512);
4107 assert(first_layer < 512 && num_layers <= 512);
4108 break;
4109 case BRW_SURFACE_3D:
4110 assert(width <= 2048 && height <= 2048 && depth <= 2048);
4111 assert(first_layer < 2048 && num_layers <= 512);
4112 if (!is_rt)
4113 assert(first_layer == 0);
4114 break;
4115 case BRW_SURFACE_CUBE:
4116 assert(width <= 8192 && height <= 8192 && depth <= 85);
4117 assert(width == height);
4118 assert(first_layer < 512 && num_layers <= 512);
4119 if (is_rt)
4120 assert(first_layer == 0);
4121 break;
4122 default:
4123 assert(!"unexpected surface type");
4124 break;
4125 }
4126
4127 /* non-full array spacing is supported only on GEN7+ */
4128 assert(tex->array_spacing_full);
4129 /* non-interleaved samples are supported only on GEN7+ */
4130 if (tex->base.nr_samples > 1)
4131 assert(tex->interleaved);
4132
4133 if (is_rt) {
4134 /*
4135 * Compute the offset to the layer manually.
4136 *
4137 * For rendering, the hardware requires LOD to be the same for all
4138 * render targets and the depth buffer. We need to compute the offset
4139 * to the layer manually and always set LOD to 0.
4140 */
4141 if (true) {
4142 /* we lose the capability for layered rendering */
4143 assert(num_layers == 1);
4144
4145 layer_offset = ilo_texture_get_slice_offset(tex,
4146 first_level, first_layer, &x_offset, &y_offset);
4147
4148 assert(x_offset % 4 == 0);
4149 assert(y_offset % 2 == 0);
4150 x_offset /= 4;
4151 y_offset /= 2;
4152
4153 /* derive the size for the LOD */
4154 width = u_minify(width, first_level);
4155 height = u_minify(height, first_level);
4156 if (surface_type == BRW_SURFACE_3D)
4157 depth = u_minify(depth, first_level);
4158 else
4159 depth = 1;
4160
4161 first_level = 0;
4162 first_layer = 0;
4163 lod = 0;
4164 }
4165 else {
4166 layer_offset = 0;
4167 x_offset = 0;
4168 y_offset = 0;
4169 }
4170
4171 assert(num_levels == 1);
4172 lod = first_level;
4173 }
4174 else {
4175 layer_offset = 0;
4176 x_offset = 0;
4177 y_offset = 0;
4178
4179 lod = num_levels - 1;
4180 }
4181
4182 /*
4183 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
4184 *
4185 * "Linear render target surface base addresses must be element-size
4186 * aligned, for non-YUV surface formats, or a multiple of 2
4187 * element-sizes for YUV surface formats. Other linear surfaces have
4188 * no alignment requirements (byte alignment is sufficient.)"
4189 *
4190 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4191 *
4192 * "For linear render target surfaces, the pitch must be a multiple
4193 * of the element size for non-YUV surface formats. Pitch must be a
4194 * multiple of 2 * element size for YUV surface formats."
4195 *
4196 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
4197 *
4198 * "For linear surfaces, this field (X Offset) must be zero"
4199 */
4200 if (tex->tiling == INTEL_TILING_NONE) {
4201 if (is_rt) {
4202 const int elem_size = util_format_get_blocksize(format);
4203 assert(layer_offset % elem_size == 0);
4204 assert(pitch % elem_size == 0);
4205 }
4206
4207 assert(!x_offset);
4208 }
4209
4210 STATIC_ASSERT(Elements(surf->payload) >= 6);
4211 dw = surf->payload;
4212
4213 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
4214 surface_format << BRW_SURFACE_FORMAT_SHIFT |
4215 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
4216
4217 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
4218 dw[0] |= 1 << 9 |
4219 BRW_SURFACE_CUBEFACE_ENABLES;
4220 }
4221
4222 if (render_cache_rw)
4223 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
4224
4225 dw[1] = layer_offset;
4226
4227 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
4228 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
4229 lod << BRW_SURFACE_LOD_SHIFT;
4230
4231 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
4232 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
4233 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
4234
4235 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
4236 first_layer << 17 |
4237 (num_layers - 1) << 8 |
4238 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
4239 BRW_SURFACE_MULTISAMPLECOUNT_1);
4240
4241 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
4242 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
4243 if (tex->valign_4)
4244 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
4245
4246 /* do not increment reference count */
4247 surf->bo = tex->bo;
4248 }
4249
4250 static uint32_t
4251 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
4252 const struct ilo_view_surface *surf,
4253 bool for_render,
4254 struct ilo_cp *cp)
4255 {
4256 const int state_align = 32 / 4;
4257 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
4258 uint32_t state_offset;
4259 uint32_t read_domains, write_domain;
4260
4261 ILO_GPE_VALID_GEN(dev, 6, 7);
4262
4263 if (for_render) {
4264 read_domains = INTEL_DOMAIN_RENDER;
4265 write_domain = INTEL_DOMAIN_RENDER;
4266 }
4267 else {
4268 read_domains = INTEL_DOMAIN_SAMPLER;
4269 write_domain = 0;
4270 }
4271
4272 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
4273
4274 STATIC_ASSERT(Elements(surf->payload) >= 8);
4275
4276 ilo_cp_write(cp, surf->payload[0]);
4277 ilo_cp_write_bo(cp, surf->payload[1],
4278 surf->bo, read_domains, write_domain);
4279 ilo_cp_write(cp, surf->payload[2]);
4280 ilo_cp_write(cp, surf->payload[3]);
4281 ilo_cp_write(cp, surf->payload[4]);
4282 ilo_cp_write(cp, surf->payload[5]);
4283
4284 if (dev->gen >= ILO_GEN(7)) {
4285 ilo_cp_write(cp, surf->payload[6]);
4286 ilo_cp_write(cp, surf->payload[7]);
4287 }
4288
4289 ilo_cp_end(cp);
4290
4291 return state_offset;
4292 }
4293
4294 static uint32_t
4295 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
4296 const struct pipe_stream_output_target *so,
4297 const struct pipe_stream_output_info *so_info,
4298 int so_index,
4299 struct ilo_cp *cp)
4300 {
4301 struct ilo_buffer *buf = ilo_buffer(so->buffer);
4302 unsigned bo_offset, struct_size;
4303 enum pipe_format elem_format;
4304 struct ilo_view_surface surf;
4305
4306 ILO_GPE_VALID_GEN(dev, 6, 6);
4307
4308 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
4309 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
4310
4311 switch (so_info->output[so_index].num_components) {
4312 case 1:
4313 elem_format = PIPE_FORMAT_R32_FLOAT;
4314 break;
4315 case 2:
4316 elem_format = PIPE_FORMAT_R32G32_FLOAT;
4317 break;
4318 case 3:
4319 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
4320 break;
4321 case 4:
4322 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4323 break;
4324 default:
4325 assert(!"unexpected SO components length");
4326 elem_format = PIPE_FORMAT_R32_FLOAT;
4327 break;
4328 }
4329
4330 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
4331 struct_size, elem_format, false, true, &surf);
4332
4333 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
4334 }
4335
4336 static void
4337 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
4338 const union pipe_color_union *color,
4339 uint32_t *dw, int num_dwords)
4340 {
4341 float rgba[4] = {
4342 color->f[0], color->f[1], color->f[2], color->f[3],
4343 };
4344
4345 ILO_GPE_VALID_GEN(dev, 6, 6);
4346
4347 assert(num_dwords >= 12);
4348
4349 /*
4350 * This state is not documented in the Sandy Bridge PRM, but in the
4351 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4352 */
4353
4354 /* IEEE_FP */
4355 dw[1] = fui(rgba[0]);
4356 dw[2] = fui(rgba[1]);
4357 dw[3] = fui(rgba[2]);
4358 dw[4] = fui(rgba[3]);
4359
4360 /* FLOAT_16 */
4361 dw[5] = util_float_to_half(rgba[0]) |
4362 util_float_to_half(rgba[1]) << 16;
4363 dw[6] = util_float_to_half(rgba[2]) |
4364 util_float_to_half(rgba[3]) << 16;
4365
4366 /* clamp to [-1.0f, 1.0f] */
4367 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4368 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4369 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4370 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4371
4372 /* SNORM16 */
4373 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4374 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4375 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4376 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4377
4378 /* SNORM8 */
4379 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4380 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4381 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4382 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4383
4384 /* clamp to [0.0f, 1.0f] */
4385 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4386 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4387 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4388 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4389
4390 /* UNORM8 */
4391 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4392 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4393 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4394 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4395
4396 /* UNORM16 */
4397 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4398 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4399 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4400 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4401 }
4402
4403 void
4404 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
4405 const struct pipe_sampler_state *state,
4406 struct ilo_sampler_cso *sampler)
4407 {
4408 int mip_filter, min_filter, mag_filter, max_aniso;
4409 int lod_bias, max_lod, min_lod;
4410 int wrap_s, wrap_t, wrap_r, wrap_cube;
4411 bool clamp_is_to_edge;
4412 uint32_t dw0, dw1, dw3;
4413
4414 ILO_GPE_VALID_GEN(dev, 6, 7);
4415
4416 memset(sampler, 0, sizeof(*sampler));
4417
4418 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
4419 min_filter = gen6_translate_tex_filter(state->min_img_filter);
4420 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
4421
4422 sampler->anisotropic = state->max_anisotropy;
4423
4424 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
4425 max_aniso = state->max_anisotropy / 2 - 1;
4426 else if (state->max_anisotropy > 16)
4427 max_aniso = BRW_ANISORATIO_16;
4428 else
4429 max_aniso = BRW_ANISORATIO_2;
4430
4431 /*
4432 *
4433 * Here is how the hardware calculate per-pixel LOD, from my reading of the
4434 * PRMs:
4435 *
4436 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4437 * other ways. The number of texels is measured using level
4438 * SurfMinLod.
4439 * 2) Bias is added to LOD.
4440 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4441 * compared with Base to determine whether magnification or
4442 * minification is needed. (if preclamp is disabled, LOD is compared
4443 * with Base before clamping)
4444 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4445 * set to floor(MinLod).
4446 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4447 *
4448 * With Gallium interface, Base is always zero and
4449 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
4450 */
4451 if (dev->gen >= ILO_GEN(7)) {
4452 const float scale = 256.0f;
4453
4454 /* [-16.0, 16.0) in S4.8 */
4455 lod_bias = (int)
4456 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4457 lod_bias &= 0x1fff;
4458
4459 /* [0.0, 14.0] in U4.8 */
4460 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
4461 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
4462 }
4463 else {
4464 const float scale = 64.0f;
4465
4466 /* [-16.0, 16.0) in S4.6 */
4467 lod_bias = (int)
4468 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4469 lod_bias &= 0x7ff;
4470
4471 /* [0.0, 13.0] in U4.6 */
4472 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
4473 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
4474 }
4475
4476 /*
4477 * We want LOD to be clamped to determine magnification/minification, and
4478 * get set to zero when it is magnification or when mipmapping is disabled.
4479 * The hardware would set LOD to floor(MinLod) and that is a problem when
4480 * MinLod is greater than or equal to 1.0f.
4481 *
4482 * With Base being zero, it is always minification when MinLod is non-zero.
4483 * To achieve our goal, we just need to set MinLod to zero and set
4484 * MagFilter to MinFilter when mipmapping is disabled.
4485 */
4486 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4487 min_lod = 0;
4488 mag_filter = min_filter;
4489 }
4490
4491 /*
4492 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4493 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
4494 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
4495 * texture coordinates to [0.0, 1.0].
4496 *
4497 * The clamping will be taken care of in the shaders. There are two
4498 * filters here, but let the minification one has a say.
4499 */
4500 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4501 if (!clamp_is_to_edge) {
4502 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
4503 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
4504 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
4505 }
4506
4507 /* determine wrap s/t/r */
4508 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
4509 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
4510 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
4511
4512 /*
4513 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4514 *
4515 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
4516 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
4517 * must have the same Address Control mode."
4518 *
4519 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4520 *
4521 * "This field (Cube Surface Control Mode) must be set to
4522 * CUBECTRLMODE_PROGRAMMED"
4523 *
4524 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
4525 * map filtering.
4526 */
4527 if (state->seamless_cube_map &&
4528 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4529 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4530 wrap_cube = BRW_TEXCOORDMODE_CUBE;
4531 }
4532 else {
4533 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
4534 }
4535
4536 if (!state->normalized_coords) {
4537 /*
4538 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4539 *
4540 * "The following state must be set as indicated if this field
4541 * (Non-normalized Coordinate Enable) is enabled:
4542 *
4543 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4544 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4545 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4546 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4547 * MAPFILTER_LINEAR.
4548 * - Min Mode Filter must be MAPFILTER_NEAREST or
4549 * MAPFILTER_LINEAR.
4550 * - Mip Mode Filter must be MIPFILTER_NONE.
4551 * - Min LOD must be 0.
4552 * - Max LOD must be 0.
4553 * - MIP Count must be 0.
4554 * - Surface Min LOD must be 0.
4555 * - Texture LOD Bias must be 0."
4556 */
4557 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4558 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4559 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4560 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4561 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4562 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4563
4564 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4565 mag_filter == BRW_MAPFILTER_LINEAR);
4566 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4567 min_filter == BRW_MAPFILTER_LINEAR);
4568
4569 /* work around a bug in util_blitter */
4570 mip_filter = BRW_MIPFILTER_NONE;
4571
4572 assert(mip_filter == BRW_MIPFILTER_NONE);
4573 }
4574
4575 if (dev->gen >= ILO_GEN(7)) {
4576 dw0 = 1 << 28 |
4577 mip_filter << 20 |
4578 lod_bias << 1;
4579
4580 sampler->dw_filter = mag_filter << 17 |
4581 min_filter << 14;
4582
4583 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4584 BRW_MAPFILTER_ANISOTROPIC << 14 |
4585 1;
4586
4587 dw1 = min_lod << 20 |
4588 max_lod << 8;
4589
4590 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4591 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
4592
4593 dw3 = max_aniso << 19;
4594
4595 /* round the coordinates for linear filtering */
4596 if (min_filter != BRW_MAPFILTER_NEAREST) {
4597 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4598 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4599 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4600 }
4601 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4602 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4603 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4604 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4605 }
4606
4607 if (!state->normalized_coords)
4608 dw3 |= 1 << 10;
4609
4610 sampler->dw_wrap = wrap_s << 6 |
4611 wrap_t << 3 |
4612 wrap_r;
4613
4614 /*
4615 * As noted in the classic i965 driver, the HW may still reference
4616 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
4617 * mode
4618 */
4619 sampler->dw_wrap_1d = wrap_s << 6 |
4620 BRW_TEXCOORDMODE_WRAP << 3 |
4621 BRW_TEXCOORDMODE_WRAP;
4622
4623 sampler->dw_wrap_cube = wrap_cube << 6 |
4624 wrap_cube << 3 |
4625 wrap_cube;
4626
4627 STATIC_ASSERT(Elements(sampler->payload) >= 7);
4628
4629 sampler->payload[0] = dw0;
4630 sampler->payload[1] = dw1;
4631 sampler->payload[2] = dw3;
4632
4633 memcpy(&sampler->payload[3],
4634 state->border_color.ui, sizeof(state->border_color.ui));
4635 }
4636 else {
4637 dw0 = 1 << 28 |
4638 mip_filter << 20 |
4639 lod_bias << 3;
4640
4641 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4642 dw0 |= gen6_translate_shadow_func(state->compare_func);
4643
4644 sampler->dw_filter = (min_filter != mag_filter) << 27 |
4645 mag_filter << 17 |
4646 min_filter << 14;
4647
4648 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4649 BRW_MAPFILTER_ANISOTROPIC << 14;
4650
4651 dw1 = min_lod << 22 |
4652 max_lod << 12;
4653
4654 sampler->dw_wrap = wrap_s << 6 |
4655 wrap_t << 3 |
4656 wrap_r;
4657
4658 sampler->dw_wrap_1d = wrap_s << 6 |
4659 BRW_TEXCOORDMODE_WRAP << 3 |
4660 BRW_TEXCOORDMODE_WRAP;
4661
4662 sampler->dw_wrap_cube = wrap_cube << 6 |
4663 wrap_cube << 3 |
4664 wrap_cube;
4665
4666 dw3 = max_aniso << 19;
4667
4668 /* round the coordinates for linear filtering */
4669 if (min_filter != BRW_MAPFILTER_NEAREST) {
4670 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4671 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4672 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4673 }
4674 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4675 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4676 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4677 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4678 }
4679
4680 if (!state->normalized_coords)
4681 dw3 |= 1;
4682
4683 STATIC_ASSERT(Elements(sampler->payload) >= 15);
4684
4685 sampler->payload[0] = dw0;
4686 sampler->payload[1] = dw1;
4687 sampler->payload[2] = dw3;
4688
4689 sampler_init_border_color_gen6(dev,
4690 &state->border_color, &sampler->payload[3], 12);
4691 }
4692 }
4693
4694 static uint32_t
4695 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
4696 const struct ilo_sampler_cso * const *samplers,
4697 const struct pipe_sampler_view * const *views,
4698 const uint32_t *sampler_border_colors,
4699 int num_samplers,
4700 struct ilo_cp *cp)
4701 {
4702 const int state_align = 32 / 4;
4703 const int state_len = 4 * num_samplers;
4704 uint32_t state_offset, *dw;
4705 int i;
4706
4707 ILO_GPE_VALID_GEN(dev, 6, 7);
4708
4709 /*
4710 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4711 *
4712 * "The sampler state is stored as an array of up to 16 elements..."
4713 */
4714 assert(num_samplers <= 16);
4715
4716 if (!num_samplers)
4717 return 0;
4718
4719 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
4720 state_len, state_align, &state_offset);
4721
4722 for (i = 0; i < num_samplers; i++) {
4723 const struct ilo_sampler_cso *sampler = samplers[i];
4724 const struct pipe_sampler_view *view = views[i];
4725 const uint32_t border_color = sampler_border_colors[i];
4726 uint32_t dw_filter, dw_wrap;
4727
4728 /* there may be holes */
4729 if (!sampler || !view) {
4730 /* disabled sampler */
4731 dw[0] = 1 << 31;
4732 dw[1] = 0;
4733 dw[2] = 0;
4734 dw[3] = 0;
4735 dw += 4;
4736
4737 continue;
4738 }
4739
4740 /* determine filter and wrap modes */
4741 switch (view->texture->target) {
4742 case PIPE_TEXTURE_1D:
4743 dw_filter = (sampler->anisotropic) ?
4744 sampler->dw_filter_aniso : sampler->dw_filter;
4745 dw_wrap = sampler->dw_wrap_1d;
4746 break;
4747 case PIPE_TEXTURE_3D:
4748 /*
4749 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4750 *
4751 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4752 * surfaces of type SURFTYPE_3D."
4753 */
4754 dw_filter = sampler->dw_filter;
4755 dw_wrap = sampler->dw_wrap;
4756 break;
4757 case PIPE_TEXTURE_CUBE:
4758 dw_filter = (sampler->anisotropic) ?
4759 sampler->dw_filter_aniso : sampler->dw_filter;
4760 dw_wrap = sampler->dw_wrap_cube;
4761 break;
4762 default:
4763 dw_filter = (sampler->anisotropic) ?
4764 sampler->dw_filter_aniso : sampler->dw_filter;
4765 dw_wrap = sampler->dw_wrap;
4766 break;
4767 }
4768
4769 dw[0] = sampler->payload[0];
4770 dw[1] = sampler->payload[1];
4771 assert(!(border_color & 0x1f));
4772 dw[2] = border_color;
4773 dw[3] = sampler->payload[2];
4774
4775 dw[0] |= dw_filter;
4776
4777 if (dev->gen >= ILO_GEN(7)) {
4778 dw[3] |= dw_wrap;
4779 }
4780 else {
4781 /*
4782 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4783 *
4784 * "[DevSNB] Errata: Incorrect behavior is observed in cases
4785 * where the min and mag mode filters are different and
4786 * SurfMinLOD is nonzero. The determination of MagMode uses the
4787 * following equation instead of the one in the above
4788 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
4789 *
4790 * As a way to work around that, we set Base to
4791 * view->u.tex.first_level.
4792 */
4793 dw[0] |= view->u.tex.first_level << 22;
4794
4795 dw[1] |= dw_wrap;
4796 }
4797
4798 dw += 4;
4799 }
4800
4801 return state_offset;
4802 }
4803
4804 static uint32_t
4805 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
4806 const struct ilo_sampler_cso *sampler,
4807 struct ilo_cp *cp)
4808 {
4809 const int state_align = 32 / 4;
4810 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
4811 uint32_t state_offset, *dw;
4812
4813 ILO_GPE_VALID_GEN(dev, 6, 7);
4814
4815 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4816 state_len, state_align, &state_offset);
4817
4818 /* see ilo_gpe_init_sampler_cso() */
4819 memcpy(dw, &sampler->payload[3], state_len * 4);
4820
4821 return state_offset;
4822 }
4823
4824 static uint32_t
4825 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
4826 int size, void **pcb,
4827 struct ilo_cp *cp)
4828 {
4829 /*
4830 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4831 * to 32 bytes, and their sizes are specified in 256-bit units.
4832 */
4833 const int state_align = 32 / 4;
4834 const int state_len = align(size, 32) / 4;
4835 uint32_t state_offset;
4836 char *buf;
4837
4838 ILO_GPE_VALID_GEN(dev, 6, 7);
4839
4840 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4841 state_len, state_align, &state_offset);
4842
4843 /* zero out the unused range */
4844 if (size < state_len * 4)
4845 memset(&buf[size], 0, state_len * 4 - size);
4846
4847 if (pcb)
4848 *pcb = buf;
4849
4850 return state_offset;
4851 }
4852
4853 static int
4854 gen6_estimate_command_size(const struct ilo_dev_info *dev,
4855 enum ilo_gpe_gen6_command cmd,
4856 int arg)
4857 {
4858 static const struct {
4859 int header;
4860 int body;
4861 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4862 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4863 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4864 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4865 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4866 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4867 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4868 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4869 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4870 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4871 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4872 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4873 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4874 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4875 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4876 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4877 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4878 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4879 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4880 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4881 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4882 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4883 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4884 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4885 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4886 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4887 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4888 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4889 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4890 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4891 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4892 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4893 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4894 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4895 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4896 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4897 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4898 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4899 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4900 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4901 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4902 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4903 };
4904 const int header = gen6_command_size_table[cmd].header;
4905 const int body = gen6_command_size_table[arg].body;
4906 const int count = arg;
4907
4908 ILO_GPE_VALID_GEN(dev, 6, 6);
4909 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4910
4911 return (likely(count)) ? header + body * count : 0;
4912 }
4913
4914 static int
4915 gen6_estimate_state_size(const struct ilo_dev_info *dev,
4916 enum ilo_gpe_gen6_state state,
4917 int arg)
4918 {
4919 static const struct {
4920 int alignment;
4921 int body;
4922 bool is_array;
4923 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4924 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4925 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4926 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4927 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4928 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4929 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4930 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4931 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4932 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4933 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4934 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4935 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4936 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4937 };
4938 const int alignment = gen6_state_size_table[state].alignment;
4939 const int body = gen6_state_size_table[state].body;
4940 const bool is_array = gen6_state_size_table[state].is_array;
4941 const int count = arg;
4942 int estimate;
4943
4944 ILO_GPE_VALID_GEN(dev, 6, 6);
4945 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4946
4947 if (likely(count)) {
4948 if (is_array) {
4949 estimate = (alignment - 1) + body * count;
4950 }
4951 else {
4952 estimate = (alignment - 1) + body;
4953 /* all states are aligned */
4954 if (count > 1)
4955 estimate += util_align_npot(body, alignment) * (count - 1);
4956 }
4957 }
4958 else {
4959 estimate = 0;
4960 }
4961
4962 return estimate;
4963 }
4964
4965 static const struct ilo_gpe_gen6 gen6_gpe = {
4966 .estimate_command_size = gen6_estimate_command_size,
4967 .estimate_state_size = gen6_estimate_state_size,
4968
4969 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4970 GEN6_SET(STATE_BASE_ADDRESS),
4971 GEN6_SET(STATE_SIP),
4972 GEN6_SET(3DSTATE_VF_STATISTICS),
4973 GEN6_SET(PIPELINE_SELECT),
4974 GEN6_SET(MEDIA_VFE_STATE),
4975 GEN6_SET(MEDIA_CURBE_LOAD),
4976 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4977 GEN6_SET(MEDIA_GATEWAY_STATE),
4978 GEN6_SET(MEDIA_STATE_FLUSH),
4979 GEN6_SET(MEDIA_OBJECT_WALKER),
4980 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4981 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4982 GEN6_SET(3DSTATE_URB),
4983 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4984 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4985 GEN6_SET(3DSTATE_INDEX_BUFFER),
4986 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4987 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4988 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4989 GEN6_SET(3DSTATE_VS),
4990 GEN6_SET(3DSTATE_GS),
4991 GEN6_SET(3DSTATE_CLIP),
4992 GEN6_SET(3DSTATE_SF),
4993 GEN6_SET(3DSTATE_WM),
4994 GEN6_SET(3DSTATE_CONSTANT_VS),
4995 GEN6_SET(3DSTATE_CONSTANT_GS),
4996 GEN6_SET(3DSTATE_CONSTANT_PS),
4997 GEN6_SET(3DSTATE_SAMPLE_MASK),
4998 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4999 GEN6_SET(3DSTATE_DEPTH_BUFFER),
5000 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
5001 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
5002 GEN6_SET(3DSTATE_LINE_STIPPLE),
5003 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
5004 GEN6_SET(3DSTATE_GS_SVB_INDEX),
5005 GEN6_SET(3DSTATE_MULTISAMPLE),
5006 GEN6_SET(3DSTATE_STENCIL_BUFFER),
5007 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
5008 GEN6_SET(3DSTATE_CLEAR_PARAMS),
5009 GEN6_SET(PIPE_CONTROL),
5010 GEN6_SET(3DPRIMITIVE),
5011 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
5012 GEN6_SET(SF_VIEWPORT),
5013 GEN6_SET(CLIP_VIEWPORT),
5014 GEN6_SET(CC_VIEWPORT),
5015 GEN6_SET(COLOR_CALC_STATE),
5016 GEN6_SET(BLEND_STATE),
5017 GEN6_SET(DEPTH_STENCIL_STATE),
5018 GEN6_SET(SCISSOR_RECT),
5019 GEN6_SET(BINDING_TABLE_STATE),
5020 GEN6_SET(SURFACE_STATE),
5021 GEN6_SET(so_SURFACE_STATE),
5022 GEN6_SET(SAMPLER_STATE),
5023 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
5024 GEN6_SET(push_constant_buffer),
5025 #undef GEN6_SET
5026 };
5027
5028 const struct ilo_gpe_gen6 *
5029 ilo_gpe_gen6_get(void)
5030 {
5031 return &gen6_gpe;
5032 }