ilo: hook up pipe context 3D functions
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate winsys tiling to hardware tiling.
43 */
44 int
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
46 {
47 switch (tiling) {
48 case INTEL_TILING_NONE:
49 return 0;
50 case INTEL_TILING_X:
51 return BRW_SURFACE_TILED;
52 case INTEL_TILING_Y:
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
54 default:
55 assert(!"unknown tiling");
56 return 0;
57 }
58 }
59
60 /**
61 * Translate a pipe primitive type to the matching hardware primitive type.
62 */
63 int
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
65 {
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
81 };
82
83 assert(prim_mapping[prim]);
84
85 return prim_mapping[prim];
86 }
87
88 /**
89 * Translate a pipe texture target to the matching hardware surface type.
90 */
91 int
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
93 {
94 switch (target) {
95 case PIPE_BUFFER:
96 return BRW_SURFACE_BUFFER;
97 case PIPE_TEXTURE_1D:
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
109 default:
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
112 }
113 }
114
115 /**
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
118 */
119 static int
120 gen6_translate_depth_format(enum pipe_format format)
121 {
122 switch (format) {
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
133 default:
134 return -1;
135 }
136 }
137
138 /**
139 * Translate a pipe logicop to the matching hardware logicop.
140 */
141 static int
142 gen6_translate_pipe_logicop(unsigned logicop)
143 {
144 switch (logicop) {
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
161 default:
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
164 }
165 }
166
167 /**
168 * Translate a pipe blend function to the matching hardware blend function.
169 */
170 static int
171 gen6_translate_pipe_blend(unsigned blend)
172 {
173 switch (blend) {
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
179 default:
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
182 };
183 }
184
185 /**
186 * Translate a pipe blend factor to the matching hardware blend factor.
187 */
188 static int
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
190 {
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
211 default:
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
214 };
215 }
216
217 /**
218 * Translate a pipe stencil op to the matching hardware stencil op.
219 */
220 static int
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
222 {
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
232 default:
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
235 }
236 }
237
238 /**
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
240 */
241 static int
242 gen6_translate_tex_mipfilter(unsigned filter)
243 {
244 switch (filter) {
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
248 default:
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
251 }
252 }
253
254 /**
255 * Translate a pipe texture filter to the matching hardware mapfilter.
256 */
257 static int
258 gen6_translate_tex_filter(unsigned filter)
259 {
260 switch (filter) {
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
263 default:
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
266 }
267 }
268
269 /**
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
271 * wrapping mode.
272 */
273 static int
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
275 {
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
280 }
281
282 switch (wrap) {
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
291 default:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
294 }
295 }
296
297 /**
298 * Translate a pipe DSA test function to the matching hardware compare
299 * function.
300 */
301 static int
302 gen6_translate_dsa_func(unsigned func)
303 {
304 switch (func) {
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
313 default:
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
316 }
317 }
318
319 /**
320 * Translate a pipe shadow compare function to the matching hardware shadow
321 * function.
322 */
323 static int
324 gen6_translate_shadow_func(unsigned func)
325 {
326 /*
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
329 *
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
332 */
333 switch (func) {
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
342 default:
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
345 }
346 }
347
348 /**
349 * Translate an index size to the matching hardware index format.
350 */
351 static int
352 gen6_translate_index_size(int size)
353 {
354 switch (size) {
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
358 default:
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
361 }
362 }
363
364 static void
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_gpe *gpe,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
379
380 ILO_GPE_VALID_GEN(gpe, 6, 7);
381
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
385
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
388
389 ilo_cp_write_bo(cp, 1, general_state_bo,
390 INTEL_DOMAIN_RENDER,
391 0);
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
394 0);
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
397 0);
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
399 0,
400 0);
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
403 0);
404
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
407 INTEL_DOMAIN_RENDER,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 1);
413 }
414
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
423 }
424
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
427 0,
428 0);
429 }
430 else {
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
433 }
434
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
438 0);
439 }
440 else {
441 /* skip range check */
442 ilo_cp_write(cp, 1);
443 }
444
445 ilo_cp_end(cp);
446 }
447
448 static void
449 gen6_emit_STATE_SIP(const struct ilo_gpe *gpe,
450 uint32_t sip,
451 struct ilo_cp *cp)
452 {
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
455
456 ILO_GPE_VALID_GEN(gpe, 6, 7);
457
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
461 ilo_cp_end(cp);
462 }
463
464 static void
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_gpe *gpe,
466 bool enable,
467 struct ilo_cp *cp)
468 {
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
471
472 ILO_GPE_VALID_GEN(gpe, 6, 7);
473
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
476 ilo_cp_end(cp);
477 }
478
479 static void
480 gen6_emit_PIPELINE_SELECT(const struct ilo_gpe *gpe,
481 int pipeline,
482 struct ilo_cp *cp)
483 {
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
486
487 ILO_GPE_VALID_GEN(gpe, 6, 7);
488
489 /* 3D or media */
490 assert(pipeline == 0x0 || pipeline == 0x1);
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
494 ilo_cp_end(cp);
495 }
496
497 static void
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_gpe *gpe,
499 int max_threads, int num_urb_entries,
500 int urb_entry_size,
501 struct ilo_cp *cp)
502 {
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
505 uint32_t dw2, dw4;
506
507 ILO_GPE_VALID_GEN(gpe, 6, 6);
508
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
513
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
516
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
524 ilo_cp_write(cp, 0);
525 ilo_cp_write(cp, 0);
526 ilo_cp_end(cp);
527 }
528
529 static void
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_gpe *gpe,
531 uint32_t buf, int size,
532 struct ilo_cp *cp)
533 {
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
536
537 ILO_GPE_VALID_GEN(gpe, 6, 6);
538
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
548 ilo_cp_end(cp);
549 }
550
551 static void
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_gpe *gpe,
553 uint32_t offset, int num_ids,
554 struct ilo_cp *cp)
555 {
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
558
559 ILO_GPE_VALID_GEN(gpe, 6, 6);
560
561 assert(offset % 32 == 0);
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
569 ilo_cp_end(cp);
570 }
571
572 static void
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_gpe *gpe,
574 int id, int byte, int thread_count,
575 struct ilo_cp *cp)
576 {
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
579 uint32_t dw1;
580
581 ILO_GPE_VALID_GEN(gpe, 6, 6);
582
583 dw1 = id << 16 |
584 byte << 8 |
585 thread_count;
586
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_gpe *gpe,
595 int thread_count_water_mark,
596 int barrier_mask,
597 struct ilo_cp *cp)
598 {
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
601 uint32_t dw1;
602
603 ILO_GPE_VALID_GEN(gpe, 6, 6);
604
605 dw1 = thread_count_water_mark << 16 |
606 barrier_mask;
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
611 ilo_cp_end(cp);
612 }
613
614 static void
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_gpe *gpe,
616 struct ilo_cp *cp)
617 {
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
619 }
620
621 static void
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_gpe *gpe,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(gpe, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
641 ilo_cp_end(cp);
642 }
643
644 static void
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_gpe *gpe,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
649 struct ilo_cp *cp)
650 {
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
653
654 ILO_GPE_VALID_GEN(gpe, 6, 6);
655
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
664 ilo_cp_end(cp);
665 }
666
667 static void
668 gen6_emit_3DSTATE_URB(const struct ilo_gpe *gpe,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
671 struct ilo_cp *cp)
672 {
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
678
679 ILO_GPE_VALID_GEN(gpe, 6, 6);
680
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
684
685 /* the valid range is [1, 5] */
686 if (!vs_alloc_size)
687 vs_alloc_size = 1;
688 if (!gs_alloc_size)
689 gs_alloc_size = 1;
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
691
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
697
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
702
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
709 ilo_cp_end(cp);
710 }
711
712 static void
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_gpe *gpe,
714 const struct pipe_vertex_buffer *vbuffers,
715 const int *instance_divisors,
716 uint32_t vbuffer_mask,
717 struct ilo_cp *cp)
718 {
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
720 uint8_t cmd_len;
721
722 ILO_GPE_VALID_GEN(gpe, 6, 7);
723
724 /*
725 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
726 *
727 * "From 1 to 33 VBs can be specified..."
728 *
729 * Because of the type of vbuffer_mask, this is always the case.
730 */
731 assert(vbuffer_mask <= (1UL << 33));
732
733 if (!vbuffer_mask)
734 return;
735
736 cmd_len = 4 * util_bitcount(vbuffer_mask) + 1;
737
738 ilo_cp_begin(cp, cmd_len);
739 ilo_cp_write(cp, cmd | (cmd_len - 2));
740
741 while (vbuffer_mask) {
742 const int index = u_bit_scan(&vbuffer_mask);
743 const struct pipe_vertex_buffer *vb = &vbuffers[index];
744 const int instance_divisor =
745 (instance_divisors) ? instance_divisors[index] : 0;
746 uint32_t dw;
747
748 dw = index << GEN6_VB0_INDEX_SHIFT;
749
750 if (instance_divisor)
751 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
752 else
753 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
754
755 if (gpe->gen >= ILO_GEN(7))
756 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
757
758 /* use null vb if there is no buffer or the stride is out of range */
759 if (vb->buffer && vb->stride <= 2048) {
760 const struct ilo_resource *res = ilo_resource(vb->buffer);
761 const uint32_t start_offset = vb->buffer_offset;
762 const uint32_t end_offset = res->bo->get_size(res->bo) - 1;
763
764 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
765
766 ilo_cp_write(cp, dw);
767 ilo_cp_write_bo(cp, start_offset, res->bo, INTEL_DOMAIN_VERTEX, 0);
768 ilo_cp_write_bo(cp, end_offset, res->bo, INTEL_DOMAIN_VERTEX, 0);
769 ilo_cp_write(cp, instance_divisor);
770 }
771 else {
772 dw |= 1 << 13;
773
774 ilo_cp_write(cp, dw);
775 ilo_cp_write(cp, 0);
776 ilo_cp_write(cp, 0);
777 ilo_cp_write(cp, instance_divisor);
778 }
779 }
780
781 ilo_cp_end(cp);
782 }
783
784 static void
785 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_gpe *gpe,
786 const struct pipe_vertex_element *velements,
787 int num_velements,
788 bool last_velement_edgeflag,
789 bool prepend_generated_ids,
790 struct ilo_cp *cp)
791 {
792 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
793 uint8_t cmd_len;
794 int format, i;
795
796 ILO_GPE_VALID_GEN(gpe, 6, 7);
797
798 /*
799 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
800 *
801 * "Up to 34 (DevSNB+) vertex elements are supported."
802 */
803 assert(num_velements + prepend_generated_ids <= 34);
804
805 if (!num_velements && !prepend_generated_ids) {
806 cmd_len = 3;
807 format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
808
809 ilo_cp_begin(cp, cmd_len);
810 ilo_cp_write(cp, cmd | (cmd_len - 2));
811 ilo_cp_write(cp,
812 0 << GEN6_VE0_INDEX_SHIFT |
813 GEN6_VE0_VALID |
814 format << BRW_VE0_FORMAT_SHIFT |
815 0 << BRW_VE0_SRC_OFFSET_SHIFT);
816 ilo_cp_write(cp,
817 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
818 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
819 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
820 BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
821 ilo_cp_end(cp);
822
823 return;
824 }
825
826 cmd_len = 2 * (num_velements + prepend_generated_ids) + 1;
827
828 ilo_cp_begin(cp, cmd_len);
829 ilo_cp_write(cp, cmd | (cmd_len - 2));
830
831 if (prepend_generated_ids) {
832 ilo_cp_write(cp, GEN6_VE0_VALID);
833 ilo_cp_write(cp,
834 BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT |
835 BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT |
836 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
837 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT);
838 }
839
840 for (i = 0; i < num_velements; i++) {
841 const struct pipe_vertex_element *ve = &velements[i];
842 int comp[4] = {
843 BRW_VE1_COMPONENT_STORE_SRC,
844 BRW_VE1_COMPONENT_STORE_SRC,
845 BRW_VE1_COMPONENT_STORE_SRC,
846 BRW_VE1_COMPONENT_STORE_SRC,
847 };
848 int edgeflag_enable;
849
850 if (last_velement_edgeflag && i == num_velements - 1) {
851 /*
852 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
853 *
854 * "* This bit (Edge Flag Enable) must only be ENABLED on the
855 * last valid VERTEX_ELEMENT structure.
856 *
857 * * When set, Component 0 Control must be set to
858 * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
859 * VFCOMP_NOSTORE.
860 *
861 * * The Source Element Format must be set to the UINT format.
862 *
863 * * [DevSNB]: Edge Flags are not supported for QUADLIST
864 * primitives. Software may elect to convert QUADLIST
865 * primitives to some set of corresponding edge-flag-supported
866 * primitive types (e.g., POLYGONs) prior to submission to the
867 * 3D pipeline."
868 *
869 * Only a limitied set of primitive types could have Edge Flag Enable
870 * set. The caller should not set last_velement_edgeflag for such
871 * primitive types.
872 */
873 comp[1] = BRW_VE1_COMPONENT_NOSTORE;
874 comp[2] = BRW_VE1_COMPONENT_NOSTORE;
875 comp[3] = BRW_VE1_COMPONENT_NOSTORE;
876
877 switch (ve->src_format) {
878 case PIPE_FORMAT_R32_FLOAT:
879 format = ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT);
880 break;
881 default:
882 assert(ve->src_format == PIPE_FORMAT_R8_UINT);
883 format = ilo_translate_vertex_format(ve->src_format);
884 break;
885 }
886
887 edgeflag_enable = GEN6_VE0_EDGE_FLAG_ENABLE;
888 }
889 else {
890 switch (util_format_get_nr_components(ve->src_format)) {
891 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
892 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
893 case 3: comp[3] = (util_format_is_pure_integer(ve->src_format)) ?
894 BRW_VE1_COMPONENT_STORE_1_INT :
895 BRW_VE1_COMPONENT_STORE_1_FLT;
896 }
897
898 format = ilo_translate_vertex_format(ve->src_format);
899
900 edgeflag_enable = 0;
901 }
902
903 ilo_cp_write(cp,
904 ve->vertex_buffer_index << GEN6_VE0_INDEX_SHIFT |
905 GEN6_VE0_VALID |
906 format << BRW_VE0_FORMAT_SHIFT |
907 edgeflag_enable |
908 ve->src_offset << BRW_VE0_SRC_OFFSET_SHIFT);
909
910 ilo_cp_write(cp,
911 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
912 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
913 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
914 comp[3] << BRW_VE1_COMPONENT_3_SHIFT);
915 }
916
917 ilo_cp_end(cp);
918 }
919
920 static void
921 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_gpe *gpe,
922 const struct pipe_index_buffer *ib,
923 bool enable_cut_index,
924 struct ilo_cp *cp)
925 {
926 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
927 const uint8_t cmd_len = 3;
928 const struct ilo_resource *res = ilo_resource(ib->buffer);
929 uint32_t start_offset, end_offset;
930 int format;
931
932 ILO_GPE_VALID_GEN(gpe, 6, 7);
933
934 if (!res)
935 return;
936
937 format = gen6_translate_index_size(ib->index_size);
938
939 start_offset = ib->offset;
940 /* start_offset must be aligned to index size */
941 if (start_offset % ib->index_size) {
942 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
943 assert(!"unaligned index buffer offset");
944 start_offset -= start_offset % ib->index_size;
945 }
946
947 /* end_offset must also be aligned */
948 end_offset = res->bo->get_size(res->bo);
949 end_offset -= (end_offset % ib->index_size);
950 /* it is inclusive */
951 end_offset -= 1;
952
953 ilo_cp_begin(cp, cmd_len);
954 ilo_cp_write(cp, cmd | (cmd_len - 2) |
955 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
956 format << 8);
957 ilo_cp_write_bo(cp, start_offset, res->bo, INTEL_DOMAIN_VERTEX, 0);
958 ilo_cp_write_bo(cp, end_offset, res->bo, INTEL_DOMAIN_VERTEX, 0);
959 ilo_cp_end(cp);
960 }
961
962 static void
963 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_gpe *gpe,
964 uint32_t clip_viewport,
965 uint32_t sf_viewport,
966 uint32_t cc_viewport,
967 struct ilo_cp *cp)
968 {
969 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
970 const uint8_t cmd_len = 4;
971
972 ILO_GPE_VALID_GEN(gpe, 6, 6);
973
974 ilo_cp_begin(cp, cmd_len);
975 ilo_cp_write(cp, cmd | (cmd_len - 2) |
976 GEN6_CLIP_VIEWPORT_MODIFY |
977 GEN6_SF_VIEWPORT_MODIFY |
978 GEN6_CC_VIEWPORT_MODIFY);
979 ilo_cp_write(cp, clip_viewport);
980 ilo_cp_write(cp, sf_viewport);
981 ilo_cp_write(cp, cc_viewport);
982 ilo_cp_end(cp);
983 }
984
985 static void
986 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_gpe *gpe,
987 uint32_t blend_state,
988 uint32_t depth_stencil_state,
989 uint32_t color_calc_state,
990 struct ilo_cp *cp)
991 {
992 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
993 const uint8_t cmd_len = 4;
994
995 ILO_GPE_VALID_GEN(gpe, 6, 6);
996
997 ilo_cp_begin(cp, cmd_len);
998 ilo_cp_write(cp, cmd | (cmd_len - 2));
999 ilo_cp_write(cp, blend_state | 1);
1000 ilo_cp_write(cp, depth_stencil_state | 1);
1001 ilo_cp_write(cp, color_calc_state | 1);
1002 ilo_cp_end(cp);
1003 }
1004
1005 static void
1006 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_gpe *gpe,
1007 uint32_t scissor_rect,
1008 struct ilo_cp *cp)
1009 {
1010 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1011 const uint8_t cmd_len = 2;
1012
1013 ILO_GPE_VALID_GEN(gpe, 6, 7);
1014
1015 ilo_cp_begin(cp, cmd_len);
1016 ilo_cp_write(cp, cmd | (cmd_len - 2));
1017 ilo_cp_write(cp, scissor_rect);
1018 ilo_cp_end(cp);
1019 }
1020
1021 static void
1022 gen6_emit_3DSTATE_VS(const struct ilo_gpe *gpe,
1023 const struct ilo_shader *vs,
1024 int max_threads, int num_samplers,
1025 struct ilo_cp *cp)
1026 {
1027 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1028 const uint8_t cmd_len = 6;
1029 uint32_t dw2, dw4, dw5;
1030 int vue_read_len;
1031
1032 ILO_GPE_VALID_GEN(gpe, 6, 7);
1033
1034 if (!vs) {
1035 ilo_cp_begin(cp, cmd_len);
1036 ilo_cp_write(cp, cmd | (cmd_len - 2));
1037 ilo_cp_write(cp, 0);
1038 ilo_cp_write(cp, 0);
1039 ilo_cp_write(cp, 0);
1040 ilo_cp_write(cp, 0);
1041 ilo_cp_write(cp, 0);
1042 ilo_cp_end(cp);
1043 return;
1044 }
1045
1046 /*
1047 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1048 *
1049 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1050 * 128-bit vertex elements to be passed into the payload for each
1051 * vertex."
1052 *
1053 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1054 * data to be read and passed to the thread."
1055 */
1056 vue_read_len = (vs->in.count + 1) / 2;
1057 if (!vue_read_len)
1058 vue_read_len = 1;
1059
1060 dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1061 if (false)
1062 dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
1063
1064 dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1065 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1066 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1067
1068 dw5 = GEN6_VS_STATISTICS_ENABLE |
1069 GEN6_VS_ENABLE;
1070
1071 if (gpe->gen >= ILO_GEN(7.5))
1072 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1073 else
1074 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1075
1076 ilo_cp_begin(cp, cmd_len);
1077 ilo_cp_write(cp, cmd | (cmd_len - 2));
1078 ilo_cp_write(cp, vs->cache_offset);
1079 ilo_cp_write(cp, dw2);
1080 ilo_cp_write(cp, 0); /* scratch */
1081 ilo_cp_write(cp, dw4);
1082 ilo_cp_write(cp, dw5);
1083 ilo_cp_end(cp);
1084 }
1085
1086 static void
1087 gen6_emit_3DSTATE_GS(const struct ilo_gpe *gpe,
1088 const struct ilo_shader *gs,
1089 int max_threads, const struct ilo_shader *vs,
1090 uint32_t vs_offset,
1091 struct ilo_cp *cp)
1092 {
1093 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1094 const uint8_t cmd_len = 7;
1095 uint32_t dw1, dw2, dw4, dw5, dw6;
1096 int i;
1097
1098 ILO_GPE_VALID_GEN(gpe, 6, 6);
1099
1100 if (!gs && (!vs || !vs->stream_output)) {
1101 dw1 = 0;
1102 dw2 = 0;
1103 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1104 dw5 = GEN6_GS_STATISTICS_ENABLE;
1105 dw6 = 0;
1106 }
1107 else {
1108 int vue_read_len;
1109
1110 /*
1111 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1112 *
1113 * "Maximum Number of Threads valid range is [0,27] when Rendering
1114 * Enabled bit is set."
1115 *
1116 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1117 *
1118 * "Programming Note: If the GS stage is enabled, software must
1119 * always allocate at least one GS URB Entry. This is true even if
1120 * the GS thread never needs to output vertices to the pipeline,
1121 * e.g., when only performing stream output. This is an artifact of
1122 * the need to pass the GS thread an initial destination URB
1123 * handle."
1124 *
1125 * As such, we always enable rendering, and limit the number of threads.
1126 */
1127 if (max_threads > 28)
1128 max_threads = 28;
1129
1130 dw2 = GEN6_GS_SPF_MODE;
1131
1132 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1133 GEN6_GS_STATISTICS_ENABLE |
1134 GEN6_GS_SO_STATISTICS_ENABLE |
1135 GEN6_GS_RENDERING_ENABLE;
1136
1137 /*
1138 * we cannot make use of GEN6_GS_REORDER because it will reorder
1139 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1140 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1141 * (2N+2, 2N+1, 2N+3)).
1142 */
1143 dw6 = GEN6_GS_ENABLE;
1144
1145 if (gs) {
1146 /* VS ouputs must match GS inputs */
1147 assert(gs->in.count == vs->out.count);
1148 for (i = 0; i < gs->in.count; i++) {
1149 assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
1150 assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
1151 }
1152
1153 /*
1154 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1155 *
1156 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1157 * Length) to 0 indicating no Vertex URB data to be read and
1158 * passed to the thread."
1159 */
1160 vue_read_len = (gs->in.count + 1) / 2;
1161 if (!vue_read_len)
1162 vue_read_len = 1;
1163
1164 dw1 = gs->cache_offset;
1165 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1166 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1167 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1168
1169 if (gs->in.discard_adj)
1170 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1171
1172 if (gs->stream_output) {
1173 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1174 if (gs->svbi_post_inc) {
1175 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1176 gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1177 }
1178 }
1179 }
1180 else {
1181 /*
1182 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1183 *
1184 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1185 * Length) to 0 indicating no Vertex URB data to be read and
1186 * passed to the thread."
1187 */
1188 vue_read_len = (vs->out.count + 1) / 2;
1189 if (!vue_read_len)
1190 vue_read_len = 1;
1191
1192 dw1 = vs_offset;
1193 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1194 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1195 vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1196
1197 if (vs->in.discard_adj)
1198 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1199
1200 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1201 if (vs->svbi_post_inc) {
1202 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1203 vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1204 }
1205 }
1206 }
1207
1208 ilo_cp_begin(cp, cmd_len);
1209 ilo_cp_write(cp, cmd | (cmd_len - 2));
1210 ilo_cp_write(cp, dw1);
1211 ilo_cp_write(cp, dw2);
1212 ilo_cp_write(cp, 0);
1213 ilo_cp_write(cp, dw4);
1214 ilo_cp_write(cp, dw5);
1215 ilo_cp_write(cp, dw6);
1216 ilo_cp_end(cp);
1217 }
1218
1219 static void
1220 gen6_emit_3DSTATE_CLIP(const struct ilo_gpe *gpe,
1221 const struct pipe_rasterizer_state *rasterizer,
1222 bool has_linear_interp,
1223 bool enable_guardband,
1224 int num_viewports,
1225 struct ilo_cp *cp)
1226 {
1227 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1228 const uint8_t cmd_len = 4;
1229 uint32_t dw1, dw2, dw3;
1230
1231 ILO_GPE_VALID_GEN(gpe, 6, 7);
1232
1233 if (!rasterizer) {
1234 ilo_cp_begin(cp, cmd_len);
1235 ilo_cp_write(cp, cmd | (cmd_len - 2));
1236 ilo_cp_write(cp, 0);
1237 ilo_cp_write(cp, 0);
1238 ilo_cp_write(cp, 0);
1239 ilo_cp_end(cp);
1240
1241 return;
1242 }
1243
1244 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1245
1246 if (gpe->gen >= ILO_GEN(7)) {
1247 /*
1248 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1249 *
1250 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1251 * enabled only for the cases where the incoming primitive topology
1252 * into the clipper guaranteed to be Trilist."
1253 *
1254 * What does this mean?
1255 */
1256 dw1 |= 0 << 19 |
1257 GEN7_CLIP_EARLY_CULL;
1258
1259 if (rasterizer->front_ccw)
1260 dw1 |= GEN7_CLIP_WINDING_CCW;
1261
1262 switch (rasterizer->cull_face) {
1263 case PIPE_FACE_NONE:
1264 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1265 break;
1266 case PIPE_FACE_FRONT:
1267 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1268 break;
1269 case PIPE_FACE_BACK:
1270 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1271 break;
1272 case PIPE_FACE_FRONT_AND_BACK:
1273 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1274 break;
1275 }
1276 }
1277
1278 dw2 = GEN6_CLIP_ENABLE |
1279 GEN6_CLIP_XY_TEST |
1280 rasterizer->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1281 GEN6_CLIP_MODE_NORMAL;
1282
1283 if (rasterizer->clip_halfz)
1284 dw2 |= GEN6_CLIP_API_D3D;
1285 else
1286 dw2 |= GEN6_CLIP_API_OGL;
1287
1288 if (rasterizer->depth_clip)
1289 dw2 |= GEN6_CLIP_Z_TEST;
1290
1291 /*
1292 * There are several reasons that guard band test should be disabled
1293 *
1294 * - when the renderer does not perform 2D clipping
1295 * - GL wide points (to avoid partially visibie object)
1296 * - GL wide or AA lines (to avoid partially visibie object)
1297 */
1298 if (enable_guardband && true /* API_GL */) {
1299 if (rasterizer->point_size_per_vertex || rasterizer->point_size > 1.0f)
1300 enable_guardband = false;
1301 if (rasterizer->line_smooth || rasterizer->line_width > 1.0f)
1302 enable_guardband = false;
1303 }
1304
1305 if (enable_guardband)
1306 dw2 |= GEN6_CLIP_GB_TEST;
1307
1308 if (has_linear_interp)
1309 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1310
1311 if (rasterizer->flatshade_first) {
1312 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1313 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1314 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1315 }
1316 else {
1317 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1318 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1319 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1320 }
1321
1322 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1323 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
1324 GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1325 (num_viewports - 1);
1326
1327 ilo_cp_begin(cp, cmd_len);
1328 ilo_cp_write(cp, cmd | (cmd_len - 2));
1329 ilo_cp_write(cp, dw1);
1330 ilo_cp_write(cp, dw2);
1331 ilo_cp_write(cp, dw3);
1332 ilo_cp_end(cp);
1333 }
1334
1335 /**
1336 * Fill in DW2 to DW7 of 3DSTATE_SF.
1337 */
1338 void
1339 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_gpe *gpe,
1340 const struct pipe_rasterizer_state *rasterizer,
1341 int num_samples,
1342 enum pipe_format depth_format,
1343 bool separate_stencil,
1344 uint32_t *dw, int num_dwords)
1345 {
1346 float offset_const, offset_scale, offset_clamp;
1347 int format, line_width, point_width;
1348
1349 ILO_GPE_VALID_GEN(gpe, 6, 7);
1350 assert(num_dwords == 6);
1351
1352 if (!rasterizer) {
1353 dw[0] = 0;
1354 dw[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1355 dw[2] = 0;
1356 dw[3] = 0;
1357 dw[4] = 0;
1358 dw[5] = 0;
1359
1360 return;
1361 }
1362
1363 /*
1364 * Scale the constant term. The minimum representable value used by the HW
1365 * is not large enouch to be the minimum resolvable difference.
1366 */
1367 offset_const = rasterizer->offset_units * 2.0f;
1368
1369 offset_scale = rasterizer->offset_scale;
1370 offset_clamp = rasterizer->offset_clamp;
1371
1372 if (separate_stencil) {
1373 switch (depth_format) {
1374 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1375 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1376 break;
1377 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1378 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1379 break;
1380 case PIPE_FORMAT_S8_UINT:
1381 depth_format = PIPE_FORMAT_NONE;
1382 break;
1383 default:
1384 break;
1385 }
1386 }
1387
1388 format = gen6_translate_depth_format(depth_format);
1389 /* FLOAT surface is assumed when there is no depth buffer */
1390 if (format < 0)
1391 format = BRW_DEPTHFORMAT_D32_FLOAT;
1392
1393 /*
1394 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1395 * pixels in the minor direction. We have to make the lines slightly
1396 * thicker, 0.5 pixel on both sides, so that they intersect that many
1397 * pixels are considered into the lines.
1398 *
1399 * Line width is in U3.7.
1400 */
1401 line_width = (int) ((rasterizer->line_width +
1402 (float) rasterizer->line_smooth) * 128.0f + 0.5f);
1403 line_width = CLAMP(line_width, 0, 1023);
1404
1405 /*
1406 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1407 *
1408 * "Software must not program a value of 0.0 when running in
1409 * MSRASTMODE_ON_xxx modes - zero-width lines are not available when
1410 * multisampling rasterization is enabled."
1411 */
1412 if (rasterizer->multisample) {
1413 if (!line_width)
1414 line_width = 128; /* 1.0f */
1415 }
1416 else if (line_width == 128 && !rasterizer->line_smooth) {
1417 /* use GIQ rules */
1418 line_width = 0;
1419 }
1420
1421 /* in U8.3 */
1422 point_width = (int) (rasterizer->point_size * 8.0f + 0.5f);
1423 point_width = CLAMP(point_width, 1, 2047);
1424
1425 /*
1426 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1427 *
1428 * "This bit (Statistics Enable) should be set whenever clipping is
1429 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1430 * should be cleared if clipping is disabled or Statistics Enable in
1431 * CLIP_STATE is clear."
1432 */
1433 dw[0] = GEN6_SF_STATISTICS_ENABLE |
1434 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1435
1436 /* XXX GEN6 path seems to work fine for GEN7 */
1437 if (false && gpe->gen >= ILO_GEN(7)) {
1438 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1439
1440 /*
1441 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1442 *
1443 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1444 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1445 * Depth Offset Enable Point) should be set whenever non zero depth
1446 * bias (Slope, Bias) values are used. Setting this bit may have
1447 * some degradation of performance for some workloads."
1448 */
1449 if (rasterizer->offset_tri ||
1450 rasterizer->offset_line ||
1451 rasterizer->offset_point) {
1452 /* XXX need to scale offset_const according to the depth format */
1453 dw[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1454
1455 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1456 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1457 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1458 }
1459 else {
1460 offset_const = 0.0f;
1461 offset_scale = 0.0f;
1462 offset_clamp = 0.0f;
1463 }
1464 }
1465 else {
1466 if (gpe->gen >= ILO_GEN(7))
1467 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1468
1469 if (rasterizer->offset_tri)
1470 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1471 if (rasterizer->offset_line)
1472 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1473 if (rasterizer->offset_point)
1474 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1475 }
1476
1477 switch (rasterizer->fill_front) {
1478 case PIPE_POLYGON_MODE_FILL:
1479 dw[0] |= GEN6_SF_FRONT_SOLID;
1480 break;
1481 case PIPE_POLYGON_MODE_LINE:
1482 dw[0] |= GEN6_SF_FRONT_WIREFRAME;
1483 break;
1484 case PIPE_POLYGON_MODE_POINT:
1485 dw[0] |= GEN6_SF_FRONT_POINT;
1486 break;
1487 }
1488
1489 switch (rasterizer->fill_back) {
1490 case PIPE_POLYGON_MODE_FILL:
1491 dw[0] |= GEN6_SF_BACK_SOLID;
1492 break;
1493 case PIPE_POLYGON_MODE_LINE:
1494 dw[0] |= GEN6_SF_BACK_WIREFRAME;
1495 break;
1496 case PIPE_POLYGON_MODE_POINT:
1497 dw[0] |= GEN6_SF_BACK_POINT;
1498 break;
1499 }
1500
1501 if (rasterizer->front_ccw)
1502 dw[0] |= GEN6_SF_WINDING_CCW;
1503
1504 dw[1] = 0;
1505
1506 if (rasterizer->line_smooth) {
1507 /*
1508 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1509 *
1510 * "This field (Anti-aliasing Enable) must be disabled if any of the
1511 * render targets have integer (UINT or SINT) surface format."
1512 *
1513 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1514 *
1515 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1516 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1517 *
1518 * TODO We do not check those yet.
1519 */
1520 dw[1] |= GEN6_SF_LINE_AA_ENABLE |
1521 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1522 }
1523
1524 switch (rasterizer->cull_face) {
1525 case PIPE_FACE_NONE:
1526 dw[1] |= GEN6_SF_CULL_NONE;
1527 break;
1528 case PIPE_FACE_FRONT:
1529 dw[1] |= GEN6_SF_CULL_FRONT;
1530 break;
1531 case PIPE_FACE_BACK:
1532 dw[1] |= GEN6_SF_CULL_BACK;
1533 break;
1534 case PIPE_FACE_FRONT_AND_BACK:
1535 dw[1] |= GEN6_SF_CULL_BOTH;
1536 break;
1537 }
1538
1539 dw[1] |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1540
1541 if (rasterizer->scissor)
1542 dw[1] |= GEN6_SF_SCISSOR_ENABLE;
1543
1544 if (num_samples > 1 && rasterizer->multisample)
1545 dw[1] |= GEN6_SF_MSRAST_ON_PATTERN;
1546
1547 dw[2] = GEN6_SF_LINE_AA_MODE_TRUE |
1548 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1549
1550 if (rasterizer->line_last_pixel)
1551 dw[2] |= 1 << 31;
1552
1553 if (rasterizer->flatshade_first) {
1554 dw[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1555 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1556 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1557 }
1558 else {
1559 dw[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1560 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1561 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1562 }
1563
1564 if (!rasterizer->point_size_per_vertex)
1565 dw[2] |= GEN6_SF_USE_STATE_POINT_WIDTH;
1566
1567 dw[2] |= point_width;
1568
1569 dw[3] = fui(offset_const);
1570 dw[4] = fui(offset_scale);
1571 dw[5] = fui(offset_clamp);
1572 }
1573
1574 /**
1575 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1576 */
1577 void
1578 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_gpe *gpe,
1579 const struct pipe_rasterizer_state *rasterizer,
1580 const struct ilo_shader *fs,
1581 const struct ilo_shader *last_sh,
1582 uint32_t *dw, int num_dwords)
1583 {
1584 uint32_t point_sprite_enable, const_interp_enable;
1585 uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
1586 int vue_offset, vue_len;
1587 int dst, max_src, i;
1588
1589 ILO_GPE_VALID_GEN(gpe, 6, 7);
1590 assert(num_dwords == 13);
1591
1592 if (!fs) {
1593 if (gpe->gen >= ILO_GEN(7))
1594 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1595 else
1596 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1597
1598 for (i = 1; i < num_dwords; i++)
1599 dw[i] = 0;
1600
1601 return;
1602 }
1603
1604 if (last_sh) {
1605 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1606 assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
1607 assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
1608 vue_offset = 2;
1609 vue_len = last_sh->out.count - vue_offset;
1610 }
1611 else {
1612 vue_offset = 0;
1613 vue_len = fs->in.count;
1614 }
1615
1616 point_sprite_enable = 0;
1617 const_interp_enable = 0;
1618 max_src = (last_sh) ? 0 : fs->in.count - 1;
1619
1620 for (dst = 0; dst < fs->in.count; dst++) {
1621 const int semantic = fs->in.semantic_names[dst];
1622 const int index = fs->in.semantic_indices[dst];
1623 const int interp = fs->in.interp[dst];
1624 int src;
1625 uint16_t ctrl;
1626
1627 /*
1628 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1629 *
1630 * "This field (Point Sprite Texture Coordinate Enable) must be
1631 * programmed to 0 when non-point primitives are rendered."
1632 *
1633 * TODO We do not check that yet.
1634 */
1635 if (semantic == TGSI_SEMANTIC_GENERIC &&
1636 (rasterizer->sprite_coord_enable & (1 << index)))
1637 point_sprite_enable |= 1 << dst;
1638
1639 if (interp == TGSI_INTERPOLATE_CONSTANT ||
1640 (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
1641 const_interp_enable |= 1 << dst;
1642
1643 if (!last_sh) {
1644 attr_ctrl[dst] = 0;
1645 continue;
1646 }
1647
1648 /* find the matching VS/GS OUT for FS IN[i] */
1649 ctrl = 0;
1650 for (src = 0; src < vue_len; src++) {
1651 if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
1652 last_sh->out.semantic_indices[vue_offset + src] != index)
1653 continue;
1654
1655 ctrl = src;
1656
1657 if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
1658 src < vue_len - 1) {
1659 const int next = src + 1;
1660
1661 if (last_sh->out.semantic_names[vue_offset + next] ==
1662 TGSI_SEMANTIC_BCOLOR &&
1663 last_sh->out.semantic_indices[vue_offset + next] == index) {
1664 ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
1665 ATTRIBUTE_SWIZZLE_SHIFT;
1666 src++;
1667 }
1668 }
1669
1670 break;
1671 }
1672
1673 /* if there is no COLOR, try BCOLOR */
1674 if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
1675 for (src = 0; src < vue_len; src++) {
1676 if (last_sh->out.semantic_names[vue_offset + src] !=
1677 TGSI_SEMANTIC_BCOLOR ||
1678 last_sh->out.semantic_indices[vue_offset + src] != index)
1679 continue;
1680
1681 ctrl = src;
1682 break;
1683 }
1684 }
1685
1686 if (src < vue_len) {
1687 attr_ctrl[dst] = ctrl;
1688 if (max_src < src)
1689 max_src = src;
1690 }
1691 else {
1692 /*
1693 * The previous shader stage does not output this attribute. The
1694 * value is supposed to be undefined for fs, unless the attribute
1695 * goes through point sprite replacement or the attribute is
1696 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1697 * attribute is picked.
1698 *
1699 * We should update the fs code and omit the output of
1700 * TGSI_SEMANTIC_POSITION here.
1701 */
1702 attr_ctrl[dst] = 0;
1703 }
1704 }
1705
1706 for (; dst < Elements(attr_ctrl); dst++)
1707 attr_ctrl[dst] = 0;
1708
1709 /* only the first 16 attributes can be remapped */
1710 for (dst = 16; dst < Elements(attr_ctrl); dst++)
1711 assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
1712
1713 /*
1714 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1715 *
1716 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1717 * 0 indicating no Vertex URB data to be read.
1718 *
1719 * This field should be set to the minimum length required to read the
1720 * maximum source attribute. The maximum source attribute is indicated
1721 * by the maximum value of the enabled Attribute # Source Attribute if
1722 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1723 * enable is not set.
1724 *
1725 * read_length = ceiling((max_source_attr+1)/2)
1726 *
1727 * [errata] Corruption/Hang possible if length programmed larger than
1728 * recommended"
1729 */
1730 vue_len = max_src + 1;
1731
1732 assert(fs->in.count <= 32);
1733 assert(vue_offset % 2 == 0);
1734
1735 if (gpe->gen >= ILO_GEN(7)) {
1736 dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1737 (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1738 vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1739
1740 if (last_sh)
1741 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1742 }
1743 else {
1744 dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1745 (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1746 vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1747
1748 if (last_sh)
1749 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1750 }
1751
1752 switch (rasterizer->sprite_coord_mode) {
1753 case PIPE_SPRITE_COORD_UPPER_LEFT:
1754 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1755 break;
1756 case PIPE_SPRITE_COORD_LOWER_LEFT:
1757 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1758 break;
1759 }
1760
1761 for (i = 0; i < 8; i++)
1762 dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
1763
1764 dw[9] = point_sprite_enable;
1765 dw[10] = const_interp_enable;
1766
1767 /* WrapShortest enables */
1768 dw[11] = 0;
1769 dw[12] = 0;
1770 }
1771
1772 static void
1773 gen6_emit_3DSTATE_SF(const struct ilo_gpe *gpe,
1774 const struct pipe_rasterizer_state *rasterizer,
1775 const struct ilo_shader *fs,
1776 const struct ilo_shader *last_sh,
1777 struct ilo_cp *cp)
1778 {
1779 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1780 const uint8_t cmd_len = 20;
1781 uint32_t dw_raster[6], dw_sbe[13];
1782
1783 ILO_GPE_VALID_GEN(gpe, 6, 6);
1784
1785 ilo_gpe_gen6_fill_3dstate_sf_raster(gpe, rasterizer,
1786 1, PIPE_FORMAT_NONE, false, dw_raster, Elements(dw_raster));
1787 ilo_gpe_gen6_fill_3dstate_sf_sbe(gpe, rasterizer,
1788 fs, last_sh, dw_sbe, Elements(dw_sbe));
1789
1790 ilo_cp_begin(cp, cmd_len);
1791 ilo_cp_write(cp, cmd | (cmd_len - 2));
1792 ilo_cp_write(cp, dw_sbe[0]);
1793 ilo_cp_write_multi(cp, dw_raster, 6);
1794 ilo_cp_write_multi(cp, &dw_sbe[1], 12);
1795 ilo_cp_end(cp);
1796 }
1797
1798 static void
1799 gen6_emit_3DSTATE_WM(const struct ilo_gpe *gpe,
1800 const struct ilo_shader *fs,
1801 int max_threads, int num_samplers,
1802 const struct pipe_rasterizer_state *rasterizer,
1803 bool dual_blend, bool cc_may_kill,
1804 struct ilo_cp *cp)
1805 {
1806 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1807 const uint8_t cmd_len = 9;
1808 const int num_samples = 1;
1809 uint32_t dw2, dw4, dw5, dw6;
1810
1811 ILO_GPE_VALID_GEN(gpe, 6, 6);
1812
1813 if (!fs) {
1814 ilo_cp_begin(cp, cmd_len);
1815 ilo_cp_write(cp, cmd | (cmd_len - 2));
1816 ilo_cp_write(cp, 0);
1817 ilo_cp_write(cp, 0);
1818 ilo_cp_write(cp, 0);
1819 ilo_cp_write(cp, 0);
1820 /* honor the valid range even if dispatching is disabled */
1821 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1822 ilo_cp_write(cp, 0);
1823 ilo_cp_write(cp, 0);
1824 ilo_cp_write(cp, 0);
1825 ilo_cp_end(cp);
1826
1827 return;
1828 }
1829
1830 dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1831 if (false)
1832 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
1833
1834 dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
1835 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
1836 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
1837
1838 if (true) {
1839 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1840 }
1841 else {
1842 /*
1843 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1844 *
1845 * "This bit (Statistics Enable) must be disabled if either of these
1846 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1847 * Resolve Enable or Depth Buffer Resolve Enable."
1848 */
1849 dw4 |= GEN6_WM_DEPTH_CLEAR;
1850 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1851 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1852 }
1853
1854 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
1855 GEN6_WM_LINE_AA_WIDTH_2_0;
1856
1857 /*
1858 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1859 *
1860 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1861 * PS kernel or color calculator has the ability to kill (discard)
1862 * pixels or samples, other than due to depth or stencil testing.
1863 * This bit is required to be ENABLED in the following situations:
1864 *
1865 * The API pixel shader program contains "killpix" or "discard"
1866 * instructions, or other code in the pixel shader kernel that can
1867 * cause the final pixel mask to differ from the pixel mask received
1868 * on dispatch.
1869 *
1870 * A sampler with chroma key enabled with kill pixel mode is used by
1871 * the pixel shader.
1872 *
1873 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
1874 * enabled.
1875 *
1876 * The pixel shader kernel generates and outputs oMask.
1877 *
1878 * Note: As ClipDistance clipping is fully supported in hardware and
1879 * therefore not via PS instructions, there should be no need to
1880 * ENABLE this bit due to ClipDistance clipping."
1881 */
1882 if (fs->has_kill || cc_may_kill)
1883 dw5 |= GEN6_WM_KILL_ENABLE;
1884
1885 /*
1886 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1887 *
1888 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
1889 * field must be set to disabled."
1890 *
1891 * TODO This is not checked yet.
1892 */
1893 if (fs->out.has_pos)
1894 dw5 |= GEN6_WM_COMPUTED_DEPTH;
1895
1896 if (fs->in.has_pos)
1897 dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
1898
1899 /*
1900 * Set this bit if
1901 *
1902 * a) fs writes colors and color is not masked, or
1903 * b) fs writes depth, or
1904 * c) fs or cc kills
1905 */
1906 if (true)
1907 dw5 |= GEN6_WM_DISPATCH_ENABLE;
1908
1909 /* same value as in 3DSTATE_SF */
1910 if (rasterizer->line_smooth)
1911 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
1912
1913 if (rasterizer->poly_stipple_enable)
1914 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
1915 if (rasterizer->line_stipple_enable)
1916 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
1917
1918 if (dual_blend)
1919 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1920
1921 if (fs->dispatch_16)
1922 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
1923 else
1924 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
1925
1926 dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
1927 GEN6_WM_POSOFFSET_NONE |
1928 GEN6_WM_POSITION_ZW_PIXEL |
1929 fs->in.barycentric_interpolation_mode <<
1930 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
1931
1932 if (rasterizer->bottom_edge_rule)
1933 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
1934
1935 if (num_samples > 1) {
1936 if (rasterizer->multisample)
1937 dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
1938 else
1939 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
1940 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
1941 }
1942 else {
1943 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
1944 GEN6_WM_MSDISPMODE_PERSAMPLE;
1945 }
1946
1947 ilo_cp_begin(cp, cmd_len);
1948 ilo_cp_write(cp, cmd | (cmd_len - 2));
1949 ilo_cp_write(cp, fs->cache_offset);
1950 ilo_cp_write(cp, dw2);
1951 ilo_cp_write(cp, 0); /* scratch */
1952 ilo_cp_write(cp, dw4);
1953 ilo_cp_write(cp, dw5);
1954 ilo_cp_write(cp, dw6);
1955 ilo_cp_write(cp, 0); /* kernel 1 */
1956 ilo_cp_write(cp, 0); /* kernel 2 */
1957 ilo_cp_end(cp);
1958 }
1959
1960 static unsigned
1961 gen6_fill_3dstate_constant(const struct ilo_gpe *gpe,
1962 const uint32_t *bufs, const int *sizes,
1963 int num_bufs, int max_read_length,
1964 uint32_t *dw, int num_dwords)
1965 {
1966 unsigned enabled = 0x0;
1967 int total_read_length, i;
1968
1969 assert(num_dwords == 4);
1970
1971 total_read_length = 0;
1972 for (i = 0; i < 4; i++) {
1973 if (i < num_bufs && sizes[i]) {
1974 /* in 256-bit units minus one */
1975 const int read_len = (sizes[i] + 31) / 32 - 1;
1976
1977 assert(bufs[i] % 32 == 0);
1978 assert(read_len < 32);
1979
1980 enabled |= 1 << i;
1981 dw[i] = bufs[i] | read_len;
1982
1983 total_read_length += read_len + 1;
1984 }
1985 else {
1986 dw[i] = 0;
1987 }
1988 }
1989
1990 assert(total_read_length <= max_read_length);
1991
1992 return enabled;
1993 }
1994
1995 static void
1996 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_gpe *gpe,
1997 const uint32_t *bufs, const int *sizes,
1998 int num_bufs,
1999 struct ilo_cp *cp)
2000 {
2001 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2002 const uint8_t cmd_len = 5;
2003 uint32_t buf_dw[4], buf_enabled;
2004
2005 ILO_GPE_VALID_GEN(gpe, 6, 6);
2006 assert(num_bufs <= 4);
2007
2008 /*
2009 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2010 *
2011 * "The sum of all four read length fields (each incremented to
2012 * represent the actual read length) must be less than or equal to 32"
2013 */
2014 buf_enabled = gen6_fill_3dstate_constant(gpe,
2015 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2016
2017 ilo_cp_begin(cp, cmd_len);
2018 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2019 ilo_cp_write(cp, buf_dw[0]);
2020 ilo_cp_write(cp, buf_dw[1]);
2021 ilo_cp_write(cp, buf_dw[2]);
2022 ilo_cp_write(cp, buf_dw[3]);
2023 ilo_cp_end(cp);
2024 }
2025
2026 static void
2027 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_gpe *gpe,
2028 const uint32_t *bufs, const int *sizes,
2029 int num_bufs,
2030 struct ilo_cp *cp)
2031 {
2032 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2033 const uint8_t cmd_len = 5;
2034 uint32_t buf_dw[4], buf_enabled;
2035
2036 ILO_GPE_VALID_GEN(gpe, 6, 6);
2037 assert(num_bufs <= 4);
2038
2039 /*
2040 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2041 *
2042 * "The sum of all four read length fields (each incremented to
2043 * represent the actual read length) must be less than or equal to 64"
2044 */
2045 buf_enabled = gen6_fill_3dstate_constant(gpe,
2046 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2047
2048 ilo_cp_begin(cp, cmd_len);
2049 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2050 ilo_cp_write(cp, buf_dw[0]);
2051 ilo_cp_write(cp, buf_dw[1]);
2052 ilo_cp_write(cp, buf_dw[2]);
2053 ilo_cp_write(cp, buf_dw[3]);
2054 ilo_cp_end(cp);
2055 }
2056
2057 static void
2058 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_gpe *gpe,
2059 const uint32_t *bufs, const int *sizes,
2060 int num_bufs,
2061 struct ilo_cp *cp)
2062 {
2063 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2064 const uint8_t cmd_len = 5;
2065 uint32_t buf_dw[4], buf_enabled;
2066
2067 ILO_GPE_VALID_GEN(gpe, 6, 6);
2068 assert(num_bufs <= 4);
2069
2070 /*
2071 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2072 *
2073 * "The sum of all four read length fields (each incremented to
2074 * represent the actual read length) must be less than or equal to 64"
2075 */
2076 buf_enabled = gen6_fill_3dstate_constant(gpe,
2077 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2078
2079 ilo_cp_begin(cp, cmd_len);
2080 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2081 ilo_cp_write(cp, buf_dw[0]);
2082 ilo_cp_write(cp, buf_dw[1]);
2083 ilo_cp_write(cp, buf_dw[2]);
2084 ilo_cp_write(cp, buf_dw[3]);
2085 ilo_cp_end(cp);
2086 }
2087
2088 static void
2089 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_gpe *gpe,
2090 unsigned sample_mask,
2091 struct ilo_cp *cp)
2092 {
2093 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2094 const uint8_t cmd_len = 2;
2095 const unsigned valid_mask = 0xf;
2096
2097 ILO_GPE_VALID_GEN(gpe, 6, 6);
2098
2099 sample_mask &= valid_mask;
2100
2101 ilo_cp_begin(cp, cmd_len);
2102 ilo_cp_write(cp, cmd | (cmd_len - 2));
2103 ilo_cp_write(cp, sample_mask);
2104 ilo_cp_end(cp);
2105 }
2106
2107 static void
2108 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_gpe *gpe,
2109 unsigned x, unsigned y,
2110 unsigned width, unsigned height,
2111 struct ilo_cp *cp)
2112 {
2113 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2114 const uint8_t cmd_len = 4;
2115 unsigned xmax = x + width - 1;
2116 unsigned ymax = y + height - 1;
2117 int rect_limit;
2118
2119 ILO_GPE_VALID_GEN(gpe, 6, 7);
2120
2121 if (gpe->gen >= ILO_GEN(7)) {
2122 rect_limit = 16383;
2123 }
2124 else {
2125 /*
2126 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2127 *
2128 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2129 * must be an even number"
2130 */
2131 assert(y % 2 == 0);
2132
2133 rect_limit = 8191;
2134 }
2135
2136 if (x > rect_limit) x = rect_limit;
2137 if (y > rect_limit) y = rect_limit;
2138 if (xmax > rect_limit) xmax = rect_limit;
2139 if (ymax > rect_limit) ymax = rect_limit;
2140
2141 ilo_cp_begin(cp, cmd_len);
2142 ilo_cp_write(cp, cmd | (cmd_len - 2));
2143 ilo_cp_write(cp, y << 16 | x);
2144 ilo_cp_write(cp, ymax << 16 | xmax);
2145
2146 /*
2147 * There is no need to set the origin. It is intended to support front
2148 * buffer rendering.
2149 */
2150 ilo_cp_write(cp, 0);
2151
2152 ilo_cp_end(cp);
2153 }
2154
2155 static int
2156 gen6_get_depth_buffer_format(const struct ilo_gpe *gpe,
2157 enum pipe_format format,
2158 bool hiz,
2159 bool separate_stencil,
2160 bool *has_depth,
2161 bool *has_stencil)
2162 {
2163 int depth_format;
2164
2165 ILO_GPE_VALID_GEN(gpe, 6, 7);
2166
2167 *has_depth = true;
2168 *has_stencil = false;
2169
2170 /*
2171 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2172 *
2173 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2174 * Surface Format of the depth buffer cannot be
2175 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2176 * requires the separate stencil buffer."
2177 *
2178 * From the Ironlake PRM, volume 2 part 1, page 330:
2179 *
2180 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2181 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2182 *
2183 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2184 * is indeed used, the depth values output by the fragment shaders will
2185 * be different when read back.
2186 *
2187 * As for GEN7+, separate_stencil_buffer is always true.
2188 */
2189 switch (format) {
2190 case PIPE_FORMAT_Z16_UNORM:
2191 depth_format = BRW_DEPTHFORMAT_D16_UNORM;
2192 break;
2193 case PIPE_FORMAT_Z32_FLOAT:
2194 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2195 break;
2196 case PIPE_FORMAT_Z24X8_UNORM:
2197 depth_format = (separate_stencil) ?
2198 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2199 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2200 break;
2201 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2202 depth_format = (separate_stencil) ?
2203 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2204 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2205 *has_stencil = true;
2206 break;
2207 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2208 depth_format = (separate_stencil) ?
2209 BRW_DEPTHFORMAT_D32_FLOAT :
2210 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2211 *has_stencil = true;
2212 break;
2213 case PIPE_FORMAT_S8_UINT:
2214 if (separate_stencil) {
2215 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2216 *has_depth = false;
2217 *has_stencil = true;
2218 break;
2219 }
2220 /* fall through */
2221 default:
2222 assert(!"unsupported depth/stencil format");
2223 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2224 *has_depth = false;
2225 *has_stencil = false;
2226 break;
2227 }
2228
2229 return depth_format;
2230 }
2231
2232 void
2233 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_gpe *gpe,
2234 const struct pipe_surface *surface,
2235 const struct pipe_depth_stencil_alpha_state *dsa,
2236 bool hiz,
2237 struct ilo_cp *cp)
2238 {
2239 const uint32_t cmd = (gpe->gen >= ILO_GEN(7)) ?
2240 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2241 const uint8_t cmd_len = 7;
2242 const int max_2d_size = (gpe->gen >= ILO_GEN(7)) ? 16384 : 8192;
2243 struct ilo_resource *res;
2244 uint32_t dw1, dw3;
2245 uint32_t slice_offset, x_offset, y_offset;
2246 int surface_type, depth_format, width, height;
2247 bool separate_stencil, has_depth, has_stencil;
2248
2249 ILO_GPE_VALID_GEN(gpe, 6, 7);
2250
2251 if (gpe->gen >= ILO_GEN(7)) {
2252 separate_stencil = true;
2253 }
2254 else {
2255 /*
2256 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2257 *
2258 * "This field (Separate Stencil Buffer Enable) must be set to the
2259 * same value (enabled or disabled) as Hierarchical Depth Buffer
2260 * Enable."
2261 */
2262 separate_stencil = hiz;
2263 }
2264
2265 if (surface) {
2266 depth_format = gen6_get_depth_buffer_format(gpe,
2267 surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
2268 }
2269 else {
2270 has_depth = false;
2271 has_stencil = false;
2272 }
2273
2274 if (!has_depth && !has_stencil) {
2275 dw1 = BRW_SURFACE_NULL << 29 |
2276 BRW_DEPTHFORMAT_D32_FLOAT << 18;
2277
2278 /* Y-tiled */
2279 if (gpe->gen == ILO_GEN(6)) {
2280 dw1 |= 1 << 27 |
2281 1 << 26;
2282 }
2283
2284 ilo_cp_begin(cp, cmd_len);
2285 ilo_cp_write(cp, cmd | (cmd_len - 2));
2286 ilo_cp_write(cp, dw1);
2287 ilo_cp_write(cp, 0);
2288 ilo_cp_write(cp, 0);
2289 ilo_cp_write(cp, 0);
2290 ilo_cp_write(cp, 0);
2291 ilo_cp_write(cp, 0);
2292 ilo_cp_end(cp);
2293
2294 return;
2295 }
2296
2297 res = ilo_resource(surface->texture);
2298 surface_type = ilo_gpe_gen6_translate_texture(res->base.target);
2299 width = surface->width;
2300 height = surface->height;
2301
2302 /*
2303 * we always treat the resource as non-mipmapped and set the slice/x/y
2304 * offsets manually
2305 */
2306 slice_offset = ilo_resource_get_slice_offset(res,
2307 surface->u.tex.level, surface->u.tex.first_layer,
2308 true, &x_offset, &y_offset);
2309
2310 /*
2311 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2312 *
2313 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2314 * Coordinate Offset X) must be zero to ensure correct alignment"
2315 *
2316 * XXX Skip the check for gen6, which seems to be fine. We need to make
2317 * sure that does not happen eventually.
2318 */
2319 if (gpe->gen >= ILO_GEN(7)) {
2320 assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
2321 x_offset &= ~7;
2322 y_offset &= ~7;
2323 }
2324
2325 width += x_offset;
2326 height += y_offset;
2327
2328 /* required for GEN6+ */
2329 assert(res->tiling == INTEL_TILING_Y);
2330
2331 assert(res->bo_stride > 0 && res->bo_stride < 128 * 1024 &&
2332 res->bo_stride % 128 == 0);
2333 assert(surface->u.tex.first_layer == surface->u.tex.last_layer);
2334 assert(width <= res->bo_stride);
2335
2336 /* we have to treat them as 2D surfaces */
2337 if (surface_type == BRW_SURFACE_CUBE) {
2338 assert(surface->width == surface->height);
2339 /* we will set slice_offset to point to the single face */
2340 surface_type = BRW_SURFACE_2D;
2341 }
2342 else if (surface_type == BRW_SURFACE_1D && height > 1) {
2343 assert(surface->height == 1);
2344 surface_type = BRW_SURFACE_2D;
2345 }
2346
2347 switch (surface_type) {
2348 case BRW_SURFACE_1D:
2349 assert(width <= max_2d_size && height == 1);
2350 break;
2351 case BRW_SURFACE_2D:
2352 assert(width <= max_2d_size && height <= max_2d_size);
2353 break;
2354 case BRW_SURFACE_3D:
2355 assert(width <= 2048 && height <= 2048);
2356 assert(x_offset == 0 && y_offset == 0);
2357 break;
2358 case BRW_SURFACE_CUBE:
2359 assert(width <= max_2d_size && height <= max_2d_size &&
2360 width == height);
2361 assert(x_offset == 0 && y_offset == 0);
2362 break;
2363 default:
2364 assert(!"unexpected depth surface type");
2365 break;
2366 }
2367
2368 dw1 = surface_type << 29 |
2369 depth_format << 18 |
2370 (res->bo_stride - 1);
2371
2372 if (gpe->gen >= ILO_GEN(7)) {
2373 if (has_depth) {
2374 if (dsa->depth.writemask)
2375 dw1 |= 1 << 28;
2376 if (hiz)
2377 dw1 |= 1 << 22;
2378 }
2379
2380 if (has_stencil &&
2381 (dsa->stencil[0].writemask || dsa->stencil[1].writemask))
2382 dw1 |= 1 << 27;
2383
2384 dw3 = (height - 1) << 18 |
2385 (width - 1) << 4;
2386 }
2387 else {
2388 dw1 |= (res->tiling != INTEL_TILING_NONE) << 27 |
2389 (res->tiling == INTEL_TILING_Y) << 26;
2390
2391 if (hiz) {
2392 dw1 |= 1 << 22 |
2393 1 << 21;
2394 }
2395
2396 dw3 = (height - 1) << 19 |
2397 (width - 1) << 6 |
2398 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2399 }
2400
2401 ilo_cp_begin(cp, cmd_len);
2402 ilo_cp_write(cp, cmd | (cmd_len - 2));
2403 ilo_cp_write(cp, dw1);
2404
2405 if (has_depth) {
2406 ilo_cp_write_bo(cp, slice_offset, res->bo,
2407 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2408 }
2409 else {
2410 ilo_cp_write(cp, 0);
2411 }
2412
2413 ilo_cp_write(cp, dw3);
2414 ilo_cp_write(cp, 0);
2415 ilo_cp_write(cp, y_offset << 16 | x_offset);
2416 ilo_cp_write(cp, 0);
2417 ilo_cp_end(cp);
2418 }
2419
2420 static void
2421 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_gpe *gpe,
2422 const struct pipe_surface *surface,
2423 bool hiz,
2424 struct ilo_cp *cp)
2425 {
2426 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(gpe, surface, NULL, hiz, cp);
2427 }
2428
2429 static void
2430 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_gpe *gpe,
2431 int x_offset, int y_offset,
2432 struct ilo_cp *cp)
2433 {
2434 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2435 const uint8_t cmd_len = 2;
2436
2437 ILO_GPE_VALID_GEN(gpe, 6, 7);
2438 assert(x_offset >= 0 && x_offset <= 31);
2439 assert(y_offset >= 0 && y_offset <= 31);
2440
2441 ilo_cp_begin(cp, cmd_len);
2442 ilo_cp_write(cp, cmd | (cmd_len - 2));
2443 ilo_cp_write(cp, x_offset << 8 | y_offset);
2444 ilo_cp_end(cp);
2445 }
2446
2447 static void
2448 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_gpe *gpe,
2449 const struct pipe_poly_stipple *pattern,
2450 struct ilo_cp *cp)
2451 {
2452 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2453 const uint8_t cmd_len = 33;
2454 int i;
2455
2456 ILO_GPE_VALID_GEN(gpe, 6, 7);
2457 assert(Elements(pattern->stipple) == 32);
2458
2459 ilo_cp_begin(cp, cmd_len);
2460 ilo_cp_write(cp, cmd | (cmd_len - 2));
2461 for (i = 0; i < 32; i++)
2462 ilo_cp_write(cp, pattern->stipple[i]);
2463 ilo_cp_end(cp);
2464 }
2465
2466 static void
2467 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_gpe *gpe,
2468 unsigned pattern, unsigned factor,
2469 struct ilo_cp *cp)
2470 {
2471 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2472 const uint8_t cmd_len = 3;
2473 unsigned inverse;
2474
2475 ILO_GPE_VALID_GEN(gpe, 6, 7);
2476 assert((pattern & 0xffff) == pattern);
2477 assert(factor >= 1 && factor <= 256);
2478
2479 ilo_cp_begin(cp, cmd_len);
2480 ilo_cp_write(cp, cmd | (cmd_len - 2));
2481 ilo_cp_write(cp, pattern);
2482
2483 if (gpe->gen >= ILO_GEN(7)) {
2484 /* in U1.16 */
2485 inverse = (unsigned) (65536.0f / factor);
2486 ilo_cp_write(cp, inverse << 15 | factor);
2487 }
2488 else {
2489 /* in U1.13 */
2490 inverse = (unsigned) (8192.0f / factor);
2491 ilo_cp_write(cp, inverse << 16 | factor);
2492 }
2493
2494 ilo_cp_end(cp);
2495 }
2496
2497 static void
2498 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_gpe *gpe,
2499 struct ilo_cp *cp)
2500 {
2501 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2502 const uint8_t cmd_len = 3;
2503
2504 ILO_GPE_VALID_GEN(gpe, 6, 7);
2505
2506 ilo_cp_begin(cp, cmd_len);
2507 ilo_cp_write(cp, cmd | (cmd_len - 2));
2508 ilo_cp_write(cp, 0 << 16 | 0);
2509 ilo_cp_write(cp, 0 << 16 | 0);
2510 ilo_cp_end(cp);
2511 }
2512
2513 static void
2514 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_gpe *gpe,
2515 int index, unsigned svbi,
2516 unsigned max_svbi,
2517 bool load_vertex_count,
2518 struct ilo_cp *cp)
2519 {
2520 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2521 const uint8_t cmd_len = 4;
2522 uint32_t dw1;
2523
2524 ILO_GPE_VALID_GEN(gpe, 6, 6);
2525 assert(index >= 0 && index < 4);
2526
2527 dw1 = index << SVB_INDEX_SHIFT;
2528 if (load_vertex_count)
2529 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2530
2531 ilo_cp_begin(cp, cmd_len);
2532 ilo_cp_write(cp, cmd | (cmd_len - 2));
2533 ilo_cp_write(cp, dw1);
2534 ilo_cp_write(cp, svbi);
2535 ilo_cp_write(cp, max_svbi);
2536 ilo_cp_end(cp);
2537 }
2538
2539 static void
2540 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_gpe *gpe,
2541 int num_samples,
2542 const uint32_t *packed_sample_pos,
2543 bool pixel_location_center,
2544 struct ilo_cp *cp)
2545 {
2546 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2547 const uint8_t cmd_len = (gpe->gen >= ILO_GEN(7)) ? 4 : 3;
2548 uint32_t dw1, dw2, dw3;
2549
2550 ILO_GPE_VALID_GEN(gpe, 6, 7);
2551
2552 dw1 = (pixel_location_center) ?
2553 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2554
2555 switch (num_samples) {
2556 case 0:
2557 case 1:
2558 dw1 |= MS_NUMSAMPLES_1;
2559 dw2 = 0;
2560 dw3 = 0;
2561 break;
2562 case 4:
2563 dw1 |= MS_NUMSAMPLES_4;
2564 dw2 = packed_sample_pos[0];
2565 dw3 = 0;
2566 break;
2567 case 8:
2568 assert(gpe->gen >= ILO_GEN(7));
2569 dw1 |= MS_NUMSAMPLES_8;
2570 dw2 = packed_sample_pos[0];
2571 dw3 = packed_sample_pos[1];
2572 break;
2573 default:
2574 assert(!"unsupported sample count");
2575 dw1 |= MS_NUMSAMPLES_1;
2576 dw2 = 0;
2577 dw3 = 0;
2578 break;
2579 }
2580
2581 ilo_cp_begin(cp, cmd_len);
2582 ilo_cp_write(cp, cmd | (cmd_len - 2));
2583 ilo_cp_write(cp, dw1);
2584 ilo_cp_write(cp, dw2);
2585 if (gpe->gen >= ILO_GEN(7))
2586 ilo_cp_write(cp, dw3);
2587 ilo_cp_end(cp);
2588 }
2589
2590 static void
2591 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_gpe *gpe,
2592 const struct pipe_surface *surface,
2593 struct ilo_cp *cp)
2594 {
2595 const uint32_t cmd = (gpe->gen >= ILO_GEN(7)) ?
2596 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2597 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2598 const uint8_t cmd_len = 3;
2599 struct ilo_resource *res;
2600 uint32_t slice_offset;
2601 int pitch;
2602
2603 ILO_GPE_VALID_GEN(gpe, 6, 7);
2604
2605 if (!surface) {
2606 ilo_cp_begin(cp, cmd_len);
2607 ilo_cp_write(cp, cmd | (cmd_len - 2));
2608 ilo_cp_write(cp, 0);
2609 ilo_cp_write(cp, 0);
2610 ilo_cp_end(cp);
2611
2612 return;
2613 }
2614
2615 res = ilo_resource(surface->texture);
2616
2617 /* TODO */
2618 slice_offset = 0;
2619
2620 /*
2621 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2622 *
2623 * "The pitch must be set to 2x the value computed based on width, as
2624 * the stencil buffer is stored with two rows interleaved."
2625 */
2626 pitch = 2 * res->bo_stride;
2627 assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
2628
2629 ilo_cp_begin(cp, cmd_len);
2630 ilo_cp_write(cp, cmd | (cmd_len - 2));
2631 ilo_cp_write(cp, pitch - 1);
2632 ilo_cp_write_bo(cp, slice_offset, res->bo,
2633 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2634 ilo_cp_end(cp);
2635 }
2636
2637 static void
2638 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_gpe *gpe,
2639 const struct pipe_surface *surface,
2640 struct ilo_cp *cp)
2641 {
2642 const uint32_t cmd = (gpe->gen >= ILO_GEN(7)) ?
2643 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2644 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2645 const uint8_t cmd_len = 3;
2646 struct ilo_resource *res;
2647 uint32_t slice_offset;
2648
2649 ILO_GPE_VALID_GEN(gpe, 6, 7);
2650
2651 if (!surface) {
2652 ilo_cp_begin(cp, cmd_len);
2653 ilo_cp_write(cp, cmd | (cmd_len - 2));
2654 ilo_cp_write(cp, 0);
2655 ilo_cp_write(cp, 0);
2656 ilo_cp_end(cp);
2657
2658 return;
2659 }
2660
2661 res = ilo_resource(surface->texture);
2662
2663 /* TODO */
2664 slice_offset = 0;
2665
2666 assert(res->bo_stride > 0 && res->bo_stride < 128 * 1024 &&
2667 res->bo_stride % 128 == 0);
2668
2669 ilo_cp_begin(cp, cmd_len);
2670 ilo_cp_write(cp, cmd | (cmd_len - 2));
2671 ilo_cp_write(cp, res->bo_stride - 1);
2672 ilo_cp_write_bo(cp, slice_offset, res->bo,
2673 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2674 ilo_cp_end(cp);
2675 }
2676
2677 static void
2678 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_gpe *gpe,
2679 uint32_t clear_val,
2680 struct ilo_cp *cp)
2681 {
2682 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2683 const uint8_t cmd_len = 2;
2684
2685 ILO_GPE_VALID_GEN(gpe, 6, 6);
2686
2687 ilo_cp_begin(cp, cmd_len);
2688 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2689 GEN5_DEPTH_CLEAR_VALID);
2690 ilo_cp_write(cp, clear_val);
2691 ilo_cp_end(cp);
2692 }
2693
2694 static void
2695 gen6_emit_PIPE_CONTROL(const struct ilo_gpe *gpe,
2696 uint32_t dw1,
2697 struct intel_bo *bo, uint32_t bo_offset,
2698 bool write_qword,
2699 struct ilo_cp *cp)
2700 {
2701 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
2702 const uint8_t cmd_len = (write_qword) ? 5 : 4;
2703 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
2704 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
2705
2706 ILO_GPE_VALID_GEN(gpe, 6, 7);
2707
2708 if (dw1 & PIPE_CONTROL_CS_STALL) {
2709 /*
2710 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2711 *
2712 * "1 of the following must also be set (when CS stall is set):
2713 *
2714 * * Depth Cache Flush Enable ([0] of DW1)
2715 * * Stall at Pixel Scoreboard ([1] of DW1)
2716 * * Depth Stall ([13] of DW1)
2717 * * Post-Sync Operation ([13] of DW1)
2718 * * Render Target Cache Flush Enable ([12] of DW1)
2719 * * Notify Enable ([8] of DW1)"
2720 *
2721 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2722 *
2723 * "One of the following must also be set (when CS stall is set):
2724 *
2725 * * Render Target Cache Flush Enable ([12] of DW1)
2726 * * Depth Cache Flush Enable ([0] of DW1)
2727 * * Stall at Pixel Scoreboard ([1] of DW1)
2728 * * Depth Stall ([13] of DW1)
2729 * * Post-Sync Operation ([13] of DW1)"
2730 */
2731 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
2732 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2733 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2734 PIPE_CONTROL_DEPTH_STALL;
2735
2736 /* post-sync op */
2737 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
2738 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2739 PIPE_CONTROL_WRITE_TIMESTAMP;
2740
2741 if (gpe->gen == ILO_GEN(6))
2742 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
2743
2744 assert(dw1 & bit_test);
2745 }
2746
2747 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
2748 /*
2749 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2750 *
2751 * "Following bits must be clear (when Depth Stall is set):
2752 *
2753 * * Render Target Cache Flush Enable ([12] of DW1)
2754 * * Depth Cache Flush Enable ([0] of DW1)"
2755 */
2756 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
2757 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
2758 }
2759
2760 ilo_cp_begin(cp, cmd_len);
2761 ilo_cp_write(cp, cmd | (cmd_len - 2));
2762 ilo_cp_write(cp, dw1);
2763 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
2764 ilo_cp_write(cp, 0);
2765 if (write_qword)
2766 ilo_cp_write(cp, 0);
2767 ilo_cp_end(cp);
2768 }
2769
2770 static void
2771 gen6_emit_3DPRIMITIVE(const struct ilo_gpe *gpe,
2772 const struct pipe_draw_info *info,
2773 bool rectlist,
2774 struct ilo_cp *cp)
2775 {
2776 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
2777 const uint8_t cmd_len = 6;
2778 const int prim = (rectlist) ?
2779 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
2780 const int vb_access = (info->indexed) ?
2781 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
2782 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
2783
2784 ILO_GPE_VALID_GEN(gpe, 6, 6);
2785
2786 ilo_cp_begin(cp, cmd_len);
2787 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2788 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
2789 vb_access);
2790 ilo_cp_write(cp, info->count);
2791 ilo_cp_write(cp, info->start);
2792 ilo_cp_write(cp, info->instance_count);
2793 ilo_cp_write(cp, info->start_instance);
2794 ilo_cp_write(cp, info->index_bias);
2795 ilo_cp_end(cp);
2796 }
2797
2798 static uint32_t
2799 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_gpe *gpe,
2800 const struct ilo_shader **cs,
2801 uint32_t *sampler_state,
2802 int *num_samplers,
2803 uint32_t *binding_table_state,
2804 int *num_surfaces,
2805 int num_ids,
2806 struct ilo_cp *cp)
2807 {
2808 /*
2809 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
2810 *
2811 * "(Interface Descriptor Total Length) This field must have the same
2812 * alignment as the Interface Descriptor Data Start Address.
2813 *
2814 * It must be DQWord (32-byte) aligned..."
2815 *
2816 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
2817 *
2818 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
2819 * aligned address of the Interface Descriptor data."
2820 */
2821 const int state_align = 32 / 4;
2822 const int state_len = (32 / 4) * num_ids;
2823 uint32_t state_offset, *dw;
2824 int i;
2825
2826 ILO_GPE_VALID_GEN(gpe, 6, 6);
2827
2828 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
2829 state_len, state_align, &state_offset);
2830
2831 for (i = 0; i < num_ids; i++) {
2832 int curbe_read_len;
2833
2834 curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
2835
2836 dw[0] = cs[i]->cache_offset;
2837 dw[1] = 1 << 18; /* SPF */
2838 dw[2] = sampler_state[i] |
2839 (num_samplers[i] + 3) / 4 << 2;
2840 dw[3] = binding_table_state[i] |
2841 num_surfaces[i];
2842 dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
2843 0; /* CURBE Read Offset */
2844 dw[5] = 0; /* Barrier ID */
2845 dw[6] = 0;
2846 dw[7] = 0;
2847
2848 dw += 8;
2849 }
2850
2851 return state_offset;
2852 }
2853
2854 void
2855 ilo_gpe_gen6_fill_SF_VIEWPORT(const struct ilo_gpe *gpe,
2856 const struct pipe_viewport_state *viewports,
2857 int num_viewports,
2858 uint32_t *dw, int num_dwords)
2859 {
2860 int i;
2861
2862 ILO_GPE_VALID_GEN(gpe, 6, 7);
2863 assert(num_dwords == 8 * num_viewports);
2864
2865 for (i = 0; i < num_viewports; i++) {
2866 const struct pipe_viewport_state *vp = &viewports[i];
2867
2868 dw[0] = fui(vp->scale[0]);
2869 dw[1] = fui(vp->scale[1]);
2870 dw[2] = fui(vp->scale[2]);
2871 dw[3] = fui(vp->translate[0]);
2872 dw[4] = fui(vp->translate[1]);
2873 dw[5] = fui(vp->translate[2]);
2874
2875 /* padding */
2876 dw[6] = 0;
2877 dw[7] = 0;
2878
2879 dw += 8;
2880 }
2881 }
2882
2883 void
2884 ilo_gpe_gen6_fill_CLIP_VIEWPORT(const struct ilo_gpe *gpe,
2885 const struct pipe_viewport_state *viewports,
2886 int num_viewports,
2887 uint32_t *dw, int num_dwords)
2888 {
2889 int i;
2890
2891 ILO_GPE_VALID_GEN(gpe, 6, 7);
2892 assert(num_dwords == 4 * num_viewports);
2893
2894 /*
2895 * CLIP_VIEWPORT specifies the guard band.
2896 *
2897 * Clipping an object that is not entirely inside or outside the viewport
2898 * (that is, trivially accepted or rejected) is expensive. Guard band test
2899 * allows clipping to be skipped in this stage and let the renderer dicards
2900 * pixels that are outside the viewport.
2901 *
2902 * The reason that we need CLIP_VIEWPORT is that the renderer has a limit
2903 * on the object size. We have to clip normally when the object exceeds
2904 * the limit.
2905 */
2906
2907 for (i = 0; i < num_viewports; i++) {
2908 const struct pipe_viewport_state *vp = &viewports[i];
2909 /*
2910 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
2911 *
2912 * "Per-Device Guardband Extents
2913 *
2914 * * Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
2915 * * Maximum Post-Clamp Delta (X or Y): 16K"
2916 *
2917 * "In addition, in order to be correctly rendered, objects must
2918 * have a screenspace bounding box not exceeding 8K in the X or Y
2919 * direction. This additional restriction must also be
2920 * comprehended by software, i.e., enforced by use of clipping."
2921 *
2922 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
2923 *
2924 * "Per-Device Guardband Extents
2925 *
2926 * * Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
2927 * * Maximum Post-Clamp Delta (X or Y): N/A"
2928 *
2929 * "In addition, in order to be correctly rendered, objects must
2930 * have a screenspace bounding box not exceeding 8K in the X or Y
2931 * direction. This additional restriction must also be comprehended
2932 * by software, i.e., enforced by use of clipping."
2933 *
2934 * Combined, the bounding box of any object can not exceed 8K in both
2935 * width and height.
2936 *
2937 * Below we set the guardband as a squre of length 8K, centered at where
2938 * the viewport is. This makes sure all objects passing the GB test are
2939 * valid to the renderer, and those failing the XY clipping have a
2940 * better chance of passing the GB test.
2941 */
2942 const float xscale = fabs(vp->scale[0]);
2943 const float yscale = fabs(vp->scale[1]);
2944 const int max_extent = (gpe->gen >= ILO_GEN(7)) ? 32768 : 16384;
2945 const int half_len = 8192 / 2;
2946 int center_x = (int) vp->translate[0];
2947 int center_y = (int) vp->translate[1];
2948 float xmin, xmax, ymin, ymax;
2949
2950 /* make sure the guardband is within the valid range */
2951 if (center_x - half_len < -max_extent)
2952 center_x = -max_extent + half_len;
2953 else if (center_x + half_len > max_extent)
2954 center_x = max_extent - half_len;
2955
2956 if (center_y - half_len < -max_extent)
2957 center_y = -max_extent + half_len;
2958 else if (center_y + half_len > max_extent)
2959 center_y = max_extent - half_len;
2960
2961 xmin = (float) (center_x - half_len);
2962 xmax = (float) (center_x + half_len);
2963 ymin = (float) (center_y - half_len);
2964 ymax = (float) (center_y + half_len);
2965
2966 /* screen space to NDC space */
2967 xmin = (xmin - vp->translate[0]) / xscale;
2968 xmax = (xmax - vp->translate[0]) / xscale;
2969 ymin = (ymin - vp->translate[1]) / yscale;
2970 ymax = (ymax - vp->translate[1]) / yscale;
2971
2972 dw[0] = fui(xmin);
2973 dw[1] = fui(xmax);
2974 dw[2] = fui(ymin);
2975 dw[3] = fui(ymax);
2976
2977 dw += 4;
2978 }
2979 }
2980
2981 static void
2982 gen6_fill_CC_VIEWPORT(const struct ilo_gpe *gpe,
2983 const struct pipe_viewport_state *viewports,
2984 int num_viewports,
2985 uint32_t *dw, int num_dwords)
2986 {
2987 int i;
2988
2989 ILO_GPE_VALID_GEN(gpe, 6, 7);
2990 assert(num_dwords == 2 * num_viewports);
2991
2992 for (i = 0; i < num_viewports; i++) {
2993 const struct pipe_viewport_state *vp = &viewports[i];
2994 const float scale = fabs(vp->scale[2]);
2995 const float min = vp->translate[2] - scale;
2996 const float max = vp->translate[2] + scale;
2997
2998 dw[0] = fui(min);
2999 dw[1] = fui(max);
3000
3001 dw += 2;
3002 }
3003 }
3004
3005 static uint32_t
3006 gen6_emit_SF_VIEWPORT(const struct ilo_gpe *gpe,
3007 const struct pipe_viewport_state *viewports,
3008 int num_viewports,
3009 struct ilo_cp *cp)
3010 {
3011 const int state_align = 32 / 4;
3012 const int state_len = 8 * num_viewports;
3013 uint32_t state_offset, *dw;
3014
3015 ILO_GPE_VALID_GEN(gpe, 6, 6);
3016
3017 /*
3018 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3019 *
3020 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3021 * stored as an array of up to 16 elements..."
3022 */
3023 assert(num_viewports && num_viewports <= 16);
3024
3025 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3026 state_len, state_align, &state_offset);
3027
3028 ilo_gpe_gen6_fill_SF_VIEWPORT(gpe,
3029 viewports, num_viewports, dw, state_len);
3030
3031 return state_offset;
3032 }
3033
3034 static uint32_t
3035 gen6_emit_CLIP_VIEWPORT(const struct ilo_gpe *gpe,
3036 const struct pipe_viewport_state *viewports,
3037 int num_viewports,
3038 struct ilo_cp *cp)
3039 {
3040 const int state_align = 32 / 4;
3041 const int state_len = 4 * num_viewports;
3042 uint32_t state_offset, *dw;
3043
3044 ILO_GPE_VALID_GEN(gpe, 6, 6);
3045
3046 /*
3047 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3048 *
3049 * "The viewport-related state is stored as an array of up to 16
3050 * elements..."
3051 */
3052 assert(num_viewports && num_viewports <= 16);
3053
3054 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3055 state_len, state_align, &state_offset);
3056
3057 ilo_gpe_gen6_fill_CLIP_VIEWPORT(gpe,
3058 viewports, num_viewports, dw, state_len);
3059
3060 return state_offset;
3061 }
3062
3063 static uint32_t
3064 gen6_emit_CC_VIEWPORT(const struct ilo_gpe *gpe,
3065 const struct pipe_viewport_state *viewports,
3066 int num_viewports,
3067 struct ilo_cp *cp)
3068 {
3069 const int state_align = 32 / 4;
3070 const int state_len = 2 * num_viewports;
3071 uint32_t state_offset, *dw;
3072
3073 ILO_GPE_VALID_GEN(gpe, 6, 7);
3074
3075 /*
3076 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3077 *
3078 * "The viewport state is stored as an array of up to 16 elements..."
3079 */
3080 assert(num_viewports && num_viewports <= 16);
3081
3082 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3083 state_len, state_align, &state_offset);
3084
3085 gen6_fill_CC_VIEWPORT(gpe, viewports, num_viewports, dw, state_len);
3086
3087 return state_offset;
3088 }
3089
3090 static uint32_t
3091 gen6_emit_COLOR_CALC_STATE(const struct ilo_gpe *gpe,
3092 const struct pipe_stencil_ref *stencil_ref,
3093 float alpha_ref,
3094 const struct pipe_blend_color *blend_color,
3095 struct ilo_cp *cp)
3096 {
3097 const int state_align = 64 / 4;
3098 const int state_len = 6;
3099 uint32_t state_offset, *dw;
3100
3101 ILO_GPE_VALID_GEN(gpe, 6, 7);
3102
3103 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3104 state_len, state_align, &state_offset);
3105
3106 dw[0] = stencil_ref->ref_value[0] << 24 |
3107 stencil_ref->ref_value[1] << 16 |
3108 BRW_ALPHATEST_FORMAT_UNORM8;
3109 dw[1] = float_to_ubyte(alpha_ref);
3110 dw[2] = fui(blend_color->color[0]);
3111 dw[3] = fui(blend_color->color[1]);
3112 dw[4] = fui(blend_color->color[2]);
3113 dw[5] = fui(blend_color->color[3]);
3114
3115 return state_offset;
3116 }
3117
3118 static int
3119 gen6_blend_factor_dst_alpha_forced_one(int factor)
3120 {
3121 switch (factor) {
3122 case BRW_BLENDFACTOR_DST_ALPHA:
3123 return BRW_BLENDFACTOR_ONE;
3124 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3125 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3126 return BRW_BLENDFACTOR_ZERO;
3127 default:
3128 return factor;
3129 }
3130 }
3131
3132 static uint32_t
3133 gen6_emit_BLEND_STATE(const struct ilo_gpe *gpe,
3134 const struct pipe_blend_state *blend,
3135 const struct pipe_framebuffer_state *framebuffer,
3136 const struct pipe_alpha_state *alpha,
3137 struct ilo_cp *cp)
3138 {
3139 const int state_align = 64 / 4;
3140 int state_len;
3141 uint32_t state_offset, *dw;
3142 int num_targets, i;
3143
3144 ILO_GPE_VALID_GEN(gpe, 6, 7);
3145
3146 /*
3147 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3148 *
3149 * "The blend state is stored as an array of up to 8 elements..."
3150 */
3151 num_targets = framebuffer->nr_cbufs;
3152 assert(num_targets <= 8);
3153
3154 if (!num_targets) {
3155 if (!alpha->enabled)
3156 return 0;
3157 /* to be able to reference alpha func */
3158 num_targets = 1;
3159 }
3160
3161 state_len = 2 * num_targets;
3162
3163 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3164 state_len, state_align, &state_offset);
3165
3166 for (i = 0; i < num_targets; i++) {
3167 const int target = (blend->independent_blend_enable) ? i : 0;
3168 const struct pipe_rt_blend_state *rt = &blend->rt[target];
3169 const int num_samples = (target < framebuffer->nr_cbufs) ?
3170 framebuffer->cbufs[target]->texture->nr_samples : 1;
3171 const struct util_format_description *format_desc =
3172 (target < framebuffer->nr_cbufs) ?
3173 util_format_description(framebuffer->cbufs[target]->format) : NULL;
3174 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3175
3176 rt_is_unorm = true;
3177 rt_is_pure_integer = false;
3178 rt_dst_alpha_forced_one = false;
3179
3180 if (format_desc) {
3181 int ch;
3182
3183 switch (format_desc->format) {
3184 case PIPE_FORMAT_B8G8R8X8_UNORM:
3185 /* force alpha to one when the HW format has alpha */
3186 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3187 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3188 rt_dst_alpha_forced_one = true;
3189 break;
3190 default:
3191 break;
3192 }
3193
3194 for (ch = 0; ch < 4; ch++) {
3195 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3196 continue;
3197
3198 if (format_desc->channel[ch].pure_integer) {
3199 rt_is_unorm = false;
3200 rt_is_pure_integer = true;
3201 break;
3202 }
3203
3204 if (!format_desc->channel[ch].normalized ||
3205 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3206 rt_is_unorm = false;
3207 }
3208 }
3209
3210 dw[0] = 0;
3211 dw[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 | 0x3;
3212
3213 /*
3214 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3215 *
3216 * "* Color Buffer Blending and Logic Ops must not be enabled
3217 * simultaneously, or behavior is UNDEFINED.
3218 *
3219 * * Logic Ops are only supported on *_UNORM surfaces (excluding
3220 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3221 *
3222 * Since blend->logicop_enable takes precedence over rt->blend_enable,
3223 * and logicop is ignored for non-UNORM color buffers, no special care
3224 * is needed.
3225 */
3226 if (blend->logicop_enable) {
3227 if (rt_is_unorm) {
3228 dw[1] |= 1 << 22 |
3229 gen6_translate_pipe_logicop(blend->logicop_func) << 18;
3230 }
3231 }
3232 else if (rt->blend_enable && !rt_is_pure_integer) {
3233 int rgb_src, rgb_dst, a_src, a_dst;
3234
3235 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3236 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3237 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3238 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3239
3240 if (rt_dst_alpha_forced_one) {
3241 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3242 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3243 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3244 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3245 }
3246
3247 dw[0] |= 1 << 31 |
3248 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3249 a_src << 20 |
3250 a_dst << 15 |
3251 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3252 rgb_src << 5 |
3253 rgb_dst;
3254
3255 if (rt->rgb_func != rt->alpha_func ||
3256 rgb_src != a_src ||
3257 rgb_dst != a_dst)
3258 dw[0] |= 1 << 30;
3259 }
3260
3261 /*
3262 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3263 *
3264 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3265 * Dither both must be disabled."
3266 *
3267 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3268 * requires that anyway.
3269 */
3270 if (num_samples > 1) {
3271 if (blend->alpha_to_coverage)
3272 dw[1] |= 1 << 31;
3273
3274 if (blend->alpha_to_one) {
3275 const bool dual_blend =
3276 (!blend->logicop_enable && rt->blend_enable &&
3277 util_blend_state_is_dual(blend, target));
3278
3279 /*
3280 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3281 *
3282 * "If Dual Source Blending is enabled, this bit (AlphaToOne
3283 * Enable) must be disabled."
3284 */
3285 if (!dual_blend)
3286 dw[1] |= 1 << 30;
3287 }
3288
3289 if (gpe->gen >= ILO_GEN(7))
3290 dw[1] |= 1 << 29;
3291 }
3292
3293 if (!(rt->colormask & PIPE_MASK_A))
3294 dw[1] |= 1 << 27;
3295 if (!(rt->colormask & PIPE_MASK_R))
3296 dw[1] |= 1 << 26;
3297 if (!(rt->colormask & PIPE_MASK_G))
3298 dw[1] |= 1 << 25;
3299 if (!(rt->colormask & PIPE_MASK_B))
3300 dw[1] |= 1 << 24;
3301
3302 /*
3303 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3304 *
3305 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3306 * alpha value."
3307 */
3308 if (alpha->enabled && !rt_is_pure_integer) {
3309 dw[1] |= 1 << 16 |
3310 gen6_translate_dsa_func(alpha->func) << 13;
3311 }
3312
3313 if (blend->dither)
3314 dw[1] |= 1 << 12;
3315
3316 dw += 2;
3317 }
3318
3319 return state_offset;
3320 }
3321
3322 static uint32_t
3323 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_gpe *gpe,
3324 const struct pipe_depth_stencil_alpha_state *dsa,
3325 struct ilo_cp *cp)
3326 {
3327 const int state_align = 64 / 4;
3328 const int state_len = 3;
3329 uint32_t state_offset, *dw;
3330
3331 ILO_GPE_VALID_GEN(gpe, 6, 7);
3332
3333 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3334 state_len, state_align, &state_offset);
3335
3336 /*
3337 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3338 *
3339 * "If the Depth Buffer is either undefined or does not have a surface
3340 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3341 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3342 *
3343 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3344 *
3345 * "This field (Stencil Test Enable) cannot be enabled if
3346 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3347 *
3348 * TODO We do not check these yet.
3349 */
3350 if (dsa->stencil[0].enabled) {
3351 const struct pipe_stencil_state *stencil = &dsa->stencil[0];
3352
3353 dw[0] = 1 << 31 |
3354 gen6_translate_dsa_func(stencil->func) << 28 |
3355 gen6_translate_pipe_stencil_op(stencil->fail_op) << 25 |
3356 gen6_translate_pipe_stencil_op(stencil->zfail_op) << 22 |
3357 gen6_translate_pipe_stencil_op(stencil->zpass_op) << 19;
3358 if (stencil->writemask)
3359 dw[0] |= 1 << 18;
3360
3361 dw[1] = stencil->valuemask << 24 |
3362 stencil->writemask << 16;
3363
3364 if (dsa->stencil[1].enabled) {
3365 stencil = &dsa->stencil[1];
3366
3367 dw[0] |= 1 << 15 |
3368 gen6_translate_dsa_func(stencil->func) << 12 |
3369 gen6_translate_pipe_stencil_op(stencil->fail_op) << 9 |
3370 gen6_translate_pipe_stencil_op(stencil->zfail_op) << 6 |
3371 gen6_translate_pipe_stencil_op(stencil->zpass_op) << 3;
3372 if (stencil->writemask)
3373 dw[0] |= 1 << 18;
3374
3375 dw[1] |= stencil->valuemask << 8 |
3376 stencil->writemask;
3377 }
3378 }
3379 else {
3380 dw[0] = 0;
3381 dw[1] = 0;
3382 }
3383
3384 /*
3385 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3386 *
3387 * "Enabling the Depth Test function without defining a Depth Buffer is
3388 * UNDEFINED."
3389 *
3390 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3391 *
3392 * "A Depth Buffer must be defined before enabling writes to it, or
3393 * operation is UNDEFINED."
3394 *
3395 * TODO We do not check these yet.
3396 */
3397 dw[2] = dsa->depth.enabled << 31 |
3398 dsa->depth.writemask << 26;
3399 if (dsa->depth.enabled)
3400 dw[2] |= gen6_translate_dsa_func(dsa->depth.func) << 27;
3401 else
3402 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3403
3404 return state_offset;
3405 }
3406
3407 static uint32_t
3408 gen6_emit_SCISSOR_RECT(const struct ilo_gpe *gpe,
3409 const struct pipe_scissor_state *scissors,
3410 int num_scissors,
3411 struct ilo_cp *cp)
3412 {
3413 const int state_align = 32 / 4;
3414 const int state_len = 2 * num_scissors;
3415 uint32_t state_offset, *dw;
3416 int i;
3417
3418 ILO_GPE_VALID_GEN(gpe, 6, 7);
3419
3420 /*
3421 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3422 *
3423 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3424 * stored as an array of up to 16 elements..."
3425 */
3426 assert(num_scissors && num_scissors <= 16);
3427
3428 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3429 state_len, state_align, &state_offset);
3430
3431 for (i = 0; i < num_scissors; i++) {
3432 if (scissors[i].minx < scissors[i].maxx &&
3433 scissors[i].miny < scissors[i].maxy) {
3434 dw[0] = scissors[i].miny << 16 | scissors[i].minx;
3435 dw[1] = (scissors[i].maxy - 1) << 16 | (scissors[i].maxx - 1);
3436 }
3437 else {
3438 /* we have to make min greater than max as they are both inclusive */
3439 dw[0] = 1 << 16 | 1;
3440 dw[1] = 0;
3441 }
3442
3443 dw += 2;
3444 }
3445
3446 return state_offset;
3447 }
3448
3449 static uint32_t
3450 gen6_emit_BINDING_TABLE_STATE(const struct ilo_gpe *gpe,
3451 uint32_t *surface_states,
3452 int num_surface_states,
3453 struct ilo_cp *cp)
3454 {
3455 const int state_align = 32 / 4;
3456 const int state_len = num_surface_states;
3457 uint32_t state_offset, *dw;
3458
3459 ILO_GPE_VALID_GEN(gpe, 6, 7);
3460
3461 /*
3462 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3463 *
3464 * "It is stored as an array of up to 256 elements..."
3465 */
3466 assert(num_surface_states <= 256);
3467
3468 if (!num_surface_states)
3469 return 0;
3470
3471 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3472 state_len, state_align, &state_offset);
3473 memcpy(dw, surface_states,
3474 num_surface_states * sizeof(surface_states[0]));
3475
3476 return state_offset;
3477 }
3478
3479 static void
3480 gen6_fill_null_SURFACE_STATE(const struct ilo_gpe *gpe,
3481 unsigned width, unsigned height,
3482 unsigned depth, unsigned lod,
3483 uint32_t *dw, int num_dwords)
3484 {
3485 ILO_GPE_VALID_GEN(gpe, 6, 6);
3486 assert(num_dwords == 6);
3487
3488 /*
3489 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3490 *
3491 * "A null surface will be used in instances where an actual surface is
3492 * not bound. When a write message is generated to a null surface, no
3493 * actual surface is written to. When a read message (including any
3494 * sampling engine message) is generated to a null surface, the result
3495 * is all zeros. Note that a null surface type is allowed to be used
3496 * with all messages, even if it is not specificially indicated as
3497 * supported. All of the remaining fields in surface state are ignored
3498 * for null surfaces, with the following exceptions:
3499 *
3500 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3501 * depth buffer's corresponding state for all render target
3502 * surfaces, including null.
3503 * * Surface Format must be R8G8B8A8_UNORM."
3504 *
3505 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3506 *
3507 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3508 * true"
3509 */
3510
3511 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3512 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3513
3514 dw[1] = 0;
3515
3516 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3517 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3518 lod << BRW_SURFACE_LOD_SHIFT;
3519
3520 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3521 BRW_SURFACE_TILED;
3522
3523 dw[4] = 0;
3524 dw[5] = 0;
3525 }
3526
3527 static void
3528 gen6_fill_buffer_SURFACE_STATE(const struct ilo_gpe *gpe,
3529 const struct ilo_resource *res,
3530 unsigned offset, unsigned size,
3531 unsigned struct_size,
3532 enum pipe_format elem_format,
3533 bool is_rt, bool render_cache_rw,
3534 uint32_t *dw, int num_dwords)
3535 {
3536 const int elem_size = util_format_get_blocksize(elem_format);
3537 int width, height, depth, pitch;
3538 int surface_format, num_entries;
3539
3540 ILO_GPE_VALID_GEN(gpe, 6, 6);
3541 assert(num_dwords == 6);
3542
3543 /*
3544 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3545 * structure in a buffer.
3546 */
3547
3548 surface_format = ilo_translate_color_format(elem_format);
3549
3550 num_entries = size / struct_size;
3551 /* see if there is enough space to fit another element */
3552 if (size % struct_size >= elem_size)
3553 num_entries++;
3554
3555 /*
3556 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3557 *
3558 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3559 * Address) specifies the base address of first element of the
3560 * surface. The surface is interpreted as a simple array of that
3561 * single element type. The address must be naturally-aligned to the
3562 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3563 * must be 16-byte aligned).
3564 *
3565 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3566 * the base address of the first element of the surface, computed in
3567 * software by adding the surface base address to the byte offset of
3568 * the element in the buffer."
3569 */
3570 if (is_rt)
3571 assert(offset % elem_size == 0);
3572
3573 /*
3574 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3575 *
3576 * "For buffer surfaces, the number of entries in the buffer ranges
3577 * from 1 to 2^27."
3578 */
3579 assert(num_entries >= 1 && num_entries <= 1 << 27);
3580
3581 /*
3582 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3583 *
3584 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3585 * indicates the size of the structure."
3586 */
3587 pitch = struct_size;
3588
3589 /*
3590 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3591 *
3592 * "If Surface Type is SURFTYPE_BUFFER, this field (Tiled Surface) must
3593 * be false (buffers are supported only in linear memory)"
3594 */
3595 assert(res->tiling == INTEL_TILING_NONE);
3596
3597 pitch--;
3598 num_entries--;
3599 /* bits [6:0] */
3600 width = (num_entries & 0x0000007f);
3601 /* bits [19:7] */
3602 height = (num_entries & 0x000fff80) >> 7;
3603 /* bits [26:20] */
3604 depth = (num_entries & 0x07f00000) >> 20;
3605
3606 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
3607 surface_format << BRW_SURFACE_FORMAT_SHIFT;
3608 if (render_cache_rw)
3609 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3610
3611 dw[1] = offset;
3612
3613 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
3614 width << BRW_SURFACE_WIDTH_SHIFT;
3615
3616 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
3617 pitch << BRW_SURFACE_PITCH_SHIFT;
3618
3619 dw[4] = 0;
3620 dw[5] = 0;
3621 }
3622
3623 static void
3624 gen6_fill_normal_SURFACE_STATE(const struct ilo_gpe *gpe,
3625 struct ilo_resource *res,
3626 enum pipe_format format,
3627 unsigned first_level, unsigned num_levels,
3628 unsigned first_layer, unsigned num_layers,
3629 bool is_rt, bool render_cache_rw,
3630 uint32_t *dw, int num_dwords)
3631 {
3632 int surface_type, surface_format;
3633 int width, height, depth, pitch, lod;
3634 unsigned layer_offset, x_offset, y_offset;
3635
3636 ILO_GPE_VALID_GEN(gpe, 6, 6);
3637 assert(num_dwords == 6);
3638
3639 surface_type = ilo_gpe_gen6_translate_texture(res->base.target);
3640 assert(surface_type != BRW_SURFACE_BUFFER);
3641
3642 if (is_rt)
3643 surface_format = ilo_translate_render_format(format);
3644 else
3645 surface_format = ilo_translate_texture_format(format);
3646 assert(surface_format >= 0);
3647
3648 width = res->base.width0;
3649 height = res->base.height0;
3650 pitch = res->bo_stride;
3651
3652 switch (res->base.target) {
3653 case PIPE_TEXTURE_3D:
3654 depth = res->base.depth0;
3655 break;
3656 case PIPE_TEXTURE_CUBE:
3657 case PIPE_TEXTURE_CUBE_ARRAY:
3658 /*
3659 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3660 *
3661 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
3662 * range of this field (Depth) is [0,84], indicating the number of
3663 * cube array elements (equal to the number of underlying 2D array
3664 * elements divided by 6). For other surfaces, this field must be
3665 * zero."
3666 */
3667 if (!is_rt) {
3668 assert(num_layers % 6 == 0);
3669 depth = num_layers / 6;
3670 break;
3671 }
3672 assert(num_layers == 1);
3673 /* fall through */
3674 default:
3675 depth = num_layers;
3676 break;
3677 }
3678
3679 /* sanity check the size */
3680 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
3681 switch (surface_type) {
3682 case BRW_SURFACE_1D:
3683 assert(width <= 8192 && height == 1 && depth <= 512);
3684 break;
3685 case BRW_SURFACE_2D:
3686 assert(width <= 8192 && height <= 8192 && depth <= 512);
3687 break;
3688 case BRW_SURFACE_3D:
3689 assert(width <= 2048 && height <= 2048 && depth <= 2048);
3690 break;
3691 case BRW_SURFACE_CUBE:
3692 assert(width <= 8192 && height <= 8192 && depth <= 85);
3693 assert(width == height);
3694 break;
3695 default:
3696 assert(!"unexpected surface type");
3697 break;
3698 }
3699
3700 /*
3701 * Compute the offset to the layer manually.
3702 *
3703 * For rendering, the hardware requires LOD to be the same for all render
3704 * targets and the depth buffer. We need to compute the offset to the
3705 * layer manually and always set LOD to 0.
3706 */
3707 if (is_rt) {
3708 /* we lose the capability for layered rendering */
3709 assert(num_levels == 1 && num_layers == 1);
3710
3711 layer_offset = ilo_resource_get_slice_offset(res,
3712 first_level, first_layer, true, &x_offset, &y_offset);
3713
3714 assert(x_offset % 4 == 0);
3715 assert(y_offset % 2 == 0);
3716 x_offset /= 4;
3717 y_offset /= 2;
3718
3719 /* derive the size for the LOD */
3720 width = u_minify(res->base.width0, first_level);
3721 height = u_minify(res->base.height0, first_level);
3722 if (surface_type == BRW_SURFACE_3D)
3723 depth = u_minify(res->base.depth0, first_level);
3724
3725 first_level = 0;
3726 first_layer = 0;
3727 lod = 0;
3728 }
3729 else {
3730 layer_offset = 0;
3731 x_offset = 0;
3732 y_offset = 0;
3733 lod = num_levels - 1;
3734 }
3735
3736 /*
3737 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3738 *
3739 * "Linear render target surface base addresses must be element-size
3740 * aligned, for non-YUV surface formats, or a multiple of 2
3741 * element-sizes for YUV surface formats. Other linear surfaces have
3742 * no alignment requirements (byte alignment is sufficient.)"
3743 *
3744 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3745 *
3746 * "For linear render target surfaces, the pitch must be a multiple
3747 * of the element size for non-YUV surface formats. Pitch must be a
3748 * multiple of 2 * element size for YUV surface formats."
3749 *
3750 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
3751 *
3752 * "For linear surfaces, this field (X Offset) must be zero"
3753 */
3754 if (res->tiling == INTEL_TILING_NONE) {
3755 if (is_rt) {
3756 const int elem_size = util_format_get_blocksize(format);
3757 assert(layer_offset % elem_size == 0);
3758 assert(pitch % elem_size == 0);
3759 }
3760
3761 assert(!x_offset);
3762 }
3763
3764 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
3765 surface_format << BRW_SURFACE_FORMAT_SHIFT |
3766 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
3767
3768 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
3769 dw[0] |= 1 << 9 |
3770 BRW_SURFACE_CUBEFACE_ENABLES;
3771 }
3772
3773 if (render_cache_rw)
3774 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3775
3776 dw[1] = layer_offset;
3777
3778 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3779 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3780 lod << BRW_SURFACE_LOD_SHIFT;
3781
3782 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3783 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
3784 ilo_gpe_gen6_translate_winsys_tiling(res->tiling);
3785
3786 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
3787 first_layer << 17 |
3788 (depth - 1) << 8 |
3789 ((res->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
3790 BRW_SURFACE_MULTISAMPLECOUNT_1);
3791
3792 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
3793 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
3794 if (res->valign_4)
3795 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
3796 }
3797
3798 static uint32_t
3799 gen6_emit_SURFACE_STATE(const struct ilo_gpe *gpe,
3800 struct intel_bo *bo, bool for_render,
3801 const uint32_t *dw, int num_dwords,
3802 struct ilo_cp *cp)
3803 {
3804 const int state_align = 32 / 4;
3805 const int state_len = 6;
3806 uint32_t state_offset;
3807 uint32_t read_domains, write_domain;
3808
3809 ILO_GPE_VALID_GEN(gpe, 6, 6);
3810 assert(num_dwords == state_len);
3811
3812 if (for_render) {
3813 read_domains = INTEL_DOMAIN_RENDER;
3814 write_domain = INTEL_DOMAIN_RENDER;
3815 }
3816 else {
3817 read_domains = INTEL_DOMAIN_SAMPLER;
3818 write_domain = 0;
3819 }
3820
3821 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
3822 ilo_cp_write(cp, dw[0]);
3823 ilo_cp_write_bo(cp, dw[1], bo, read_domains, write_domain);
3824 ilo_cp_write(cp, dw[2]);
3825 ilo_cp_write(cp, dw[3]);
3826 ilo_cp_write(cp, dw[4]);
3827 ilo_cp_write(cp, dw[5]);
3828 ilo_cp_end(cp);
3829
3830 return state_offset;
3831 }
3832
3833 static uint32_t
3834 gen6_emit_surf_SURFACE_STATE(const struct ilo_gpe *gpe,
3835 const struct pipe_surface *surface,
3836 struct ilo_cp *cp)
3837 {
3838 struct intel_bo *bo;
3839 uint32_t dw[6];
3840
3841 ILO_GPE_VALID_GEN(gpe, 6, 6);
3842
3843 if (surface && surface->texture) {
3844 struct ilo_resource *res = ilo_resource(surface->texture);
3845
3846 bo = res->bo;
3847
3848 /*
3849 * classic i965 sets render_cache_rw for constant buffers and sol
3850 * surfaces but not render buffers. Why?
3851 */
3852 gen6_fill_normal_SURFACE_STATE(gpe, res, surface->format,
3853 surface->u.tex.level, 1,
3854 surface->u.tex.first_layer,
3855 surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
3856 true, true, dw, Elements(dw));
3857 }
3858 else {
3859 bo = NULL;
3860 gen6_fill_null_SURFACE_STATE(gpe,
3861 surface->width, surface->height, 1, 0, dw, Elements(dw));
3862 }
3863
3864 return gen6_emit_SURFACE_STATE(gpe, bo, true, dw, Elements(dw), cp);
3865 }
3866
3867 static uint32_t
3868 gen6_emit_view_SURFACE_STATE(const struct ilo_gpe *gpe,
3869 const struct pipe_sampler_view *view,
3870 struct ilo_cp *cp)
3871 {
3872 struct ilo_resource *res = ilo_resource(view->texture);
3873 uint32_t dw[6];
3874
3875 ILO_GPE_VALID_GEN(gpe, 6, 6);
3876
3877 gen6_fill_normal_SURFACE_STATE(gpe, res, view->format,
3878 view->u.tex.first_level,
3879 view->u.tex.last_level - view->u.tex.first_level + 1,
3880 view->u.tex.first_layer,
3881 view->u.tex.last_layer - view->u.tex.first_layer + 1,
3882 false, false, dw, Elements(dw));
3883
3884 return gen6_emit_SURFACE_STATE(gpe, res->bo, false, dw, Elements(dw), cp);
3885 }
3886
3887 static uint32_t
3888 gen6_emit_cbuf_SURFACE_STATE(const struct ilo_gpe *gpe,
3889 const struct pipe_constant_buffer *cbuf,
3890 struct ilo_cp *cp)
3891 {
3892 const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
3893 struct ilo_resource *res = ilo_resource(cbuf->buffer);
3894 uint32_t dw[6];
3895
3896 ILO_GPE_VALID_GEN(gpe, 6, 6);
3897
3898 gen6_fill_buffer_SURFACE_STATE(gpe, res,
3899 cbuf->buffer_offset, cbuf->buffer_size,
3900 util_format_get_blocksize(elem_format), elem_format,
3901 false, false, dw, Elements(dw));
3902
3903 return gen6_emit_SURFACE_STATE(gpe, res->bo, false, dw, Elements(dw), cp);
3904 }
3905
3906 static uint32_t
3907 gen6_emit_so_SURFACE_STATE(const struct ilo_gpe *gpe,
3908 const struct pipe_stream_output_target *so,
3909 const struct pipe_stream_output_info *so_info,
3910 int so_index,
3911 struct ilo_cp *cp)
3912 {
3913 struct ilo_resource *res = ilo_resource(so->buffer);
3914 unsigned bo_offset, struct_size;
3915 enum pipe_format elem_format;
3916 uint32_t dw[6];
3917
3918 ILO_GPE_VALID_GEN(gpe, 6, 6);
3919
3920 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
3921 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
3922
3923 switch (so_info->output[so_index].num_components) {
3924 case 1:
3925 elem_format = PIPE_FORMAT_R32_FLOAT;
3926 break;
3927 case 2:
3928 elem_format = PIPE_FORMAT_R32G32_FLOAT;
3929 break;
3930 case 3:
3931 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
3932 break;
3933 case 4:
3934 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
3935 break;
3936 default:
3937 assert(!"unexpected SO components length");
3938 elem_format = PIPE_FORMAT_R32_FLOAT;
3939 break;
3940 }
3941
3942 gen6_fill_buffer_SURFACE_STATE(gpe, res, bo_offset, so->buffer_size,
3943 struct_size, elem_format, false, true, dw, Elements(dw));
3944
3945 return gen6_emit_SURFACE_STATE(gpe, res->bo, false, dw, Elements(dw), cp);
3946 }
3947
3948 static uint32_t
3949 gen6_emit_SAMPLER_STATE(const struct ilo_gpe *gpe,
3950 const struct pipe_sampler_state **samplers,
3951 const struct pipe_sampler_view **sampler_views,
3952 const uint32_t *sampler_border_colors,
3953 int num_samplers,
3954 struct ilo_cp *cp)
3955 {
3956 const int state_align = 32 / 4;
3957 const int state_len = 4 * num_samplers;
3958 uint32_t state_offset, *dw;
3959 int i;
3960
3961 ILO_GPE_VALID_GEN(gpe, 6, 7);
3962
3963 /*
3964 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
3965 *
3966 * "The sampler state is stored as an array of up to 16 elements..."
3967 */
3968 assert(num_samplers <= 16);
3969
3970 if (!num_samplers)
3971 return 0;
3972
3973 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
3974 state_len, state_align, &state_offset);
3975
3976 for (i = 0; i < num_samplers; i++) {
3977 const struct pipe_sampler_state *sampler = samplers[i];
3978 const struct pipe_sampler_view *view = sampler_views[i];
3979 const uint32_t border_color = sampler_border_colors[i];
3980 enum pipe_texture_target target;
3981 int mip_filter, min_filter, mag_filter, max_aniso;
3982 int lod_bias, max_lod, min_lod, base_level;
3983 int wrap_s, wrap_t, wrap_r;
3984 bool clamp_to_edge;
3985
3986 /* there may be holes */
3987 if (!sampler || !view) {
3988 /* disabled sampler */
3989 dw[0] = 1 << 31;
3990 dw[1] = 0;
3991 dw[2] = 0;
3992 dw[3] = 0;
3993 dw += 4;
3994
3995 continue;
3996 }
3997
3998 target = view->texture->target;
3999
4000 /* determine mip/min/mag filters */
4001 mip_filter = gen6_translate_tex_mipfilter(sampler->min_mip_filter);
4002
4003 /*
4004 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4005 *
4006 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4007 * surfaces of type SURFTYPE_3D."
4008 */
4009 if (sampler->max_anisotropy && target != PIPE_TEXTURE_3D) {
4010 min_filter = BRW_MAPFILTER_ANISOTROPIC;
4011 mag_filter = BRW_MAPFILTER_ANISOTROPIC;
4012
4013 if (sampler->max_anisotropy >= 2 && sampler->max_anisotropy <= 16)
4014 max_aniso = sampler->max_anisotropy / 2 - 1;
4015 else if (sampler->max_anisotropy > 16)
4016 max_aniso = BRW_ANISORATIO_16;
4017 else
4018 max_aniso = BRW_ANISORATIO_2;
4019 }
4020 else {
4021 min_filter = gen6_translate_tex_filter(sampler->min_img_filter);
4022 mag_filter = gen6_translate_tex_filter(sampler->mag_img_filter);
4023
4024 /* ignored */
4025 max_aniso = 0;
4026 }
4027
4028 /*
4029 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4030 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
4031 * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
4032 * additionally clamping the texture coordinates to [0.0, 1.0].
4033 *
4034 * The clamping is taken care of in the shaders. There are two filters
4035 * here, but let the minification one has a say.
4036 */
4037 clamp_to_edge = (sampler->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4038
4039 switch (target) {
4040 case PIPE_TEXTURE_CUBE:
4041 /*
4042 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4043 *
4044 * "When using cube map texture coordinates, only
4045 * TEXCOORDMODE_CLAMP and TEXCOORDMODE_CUBE settings are valid,
4046 * and each TC component must have the same Address Control
4047 * mode."
4048 *
4049 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4050 *
4051 * "This field (Cube Surface Control Mode) must be set to
4052 * CUBECTRLMODE_PROGRAMMED"
4053 *
4054 * Therefore, we cannot use "Cube Surface Control Mode" for semless
4055 * cube map filtering.
4056 */
4057 if (sampler->seamless_cube_map &&
4058 (sampler->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4059 sampler->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4060 wrap_s = BRW_TEXCOORDMODE_CUBE;
4061 wrap_t = BRW_TEXCOORDMODE_CUBE;
4062 wrap_r = BRW_TEXCOORDMODE_CUBE;
4063 }
4064 else {
4065 wrap_s = BRW_TEXCOORDMODE_CLAMP;
4066 wrap_t = BRW_TEXCOORDMODE_CLAMP;
4067 wrap_r = BRW_TEXCOORDMODE_CLAMP;
4068 }
4069 break;
4070 case PIPE_TEXTURE_1D:
4071 wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge);
4072 /*
4073 * as noted in the classic i965 driver, the HW may look at these
4074 * values so we need to set them to a safe mode
4075 */
4076 wrap_t = BRW_TEXCOORDMODE_WRAP;
4077 wrap_r = BRW_TEXCOORDMODE_WRAP;
4078 break;
4079 default:
4080 wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge);
4081 wrap_t = gen6_translate_tex_wrap(sampler->wrap_t, clamp_to_edge);
4082 wrap_r = gen6_translate_tex_wrap(sampler->wrap_r, clamp_to_edge);
4083 break;
4084 }
4085
4086 /*
4087 * Here is how the hardware calculate per-pixel LOD, from my reading of
4088 * the PRMs:
4089 *
4090 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4091 * other ways. The number of texels is measured using level
4092 * SurfMinLod.
4093 * 2) Bias is added to LOD.
4094 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4095 * compared with Base to determine whether magnification or
4096 * minification is needed.
4097 * (if preclamp is disabled, LOD is compared with Base before
4098 * clamping)
4099 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4100 * set to floor(MinLod).
4101 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4102 *
4103 * With Gallium interface, Base is always zero and view->u.tex.first_level
4104 * specifies SurfMinLod.
4105 *
4106 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4107 *
4108 * "[DevSNB] Errata: Incorrect behavior is observed in cases where
4109 * the min and mag mode filters are different and SurfMinLOD is
4110 * nonzero. The determination of MagMode uses the following equation
4111 * instead of the one in the above pseudocode: MagMode = (LOD +
4112 * SurfMinLOD - Base <= 0)"
4113 *
4114 * As a way to work around that, we set Base to view->u.tex.first_level
4115 * on GEN6.
4116 */
4117 if (gpe->gen >= ILO_GEN(7)) {
4118 const float scale = 256.0f;
4119
4120 /* [-16.0, 16.0) in S4.8 */
4121 lod_bias = (int)
4122 (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale);
4123 lod_bias &= 0x1fff;
4124
4125 base_level = 0;
4126
4127 /* [0.0, 14.0] in U4.8 */
4128 max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 14.0f) * scale);
4129 min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 14.0f) * scale);
4130 }
4131 else {
4132 const float scale = 64.0f;
4133
4134 /* [-16.0, 16.0) in S4.6 */
4135 lod_bias = (int)
4136 (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale);
4137 lod_bias &= 0x7ff;
4138
4139 base_level = view->u.tex.first_level;
4140
4141 /* [0.0, 13.0] in U4.6 */
4142 max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 13.0f) * scale);
4143 min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 13.0f) * scale);
4144 }
4145
4146 /*
4147 * We want LOD to be clamped to determine magnification/minification,
4148 * and get set to zero when it is magnification or when mipmapping is
4149 * disabled. The hardware would set LOD to floor(MinLod) and that is a
4150 * problem when MinLod is greater than or equal to 1.0f.
4151 *
4152 * We know that with Base being zero, it is always minification when
4153 * MinLod is non-zero. To meet our need, we just need to set MinLod to
4154 * zero and set MagFilter to MinFilter when mipmapping is disabled.
4155 */
4156 if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4157 min_lod = 0;
4158 mag_filter = min_filter;
4159 }
4160
4161 if (!sampler->normalized_coords) {
4162 /* work around a bug in util_blitter */
4163 mip_filter = BRW_MIPFILTER_NONE;
4164
4165 /*
4166 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4167 *
4168 * "The following state must be set as indicated if this field
4169 * (Non-normalized Coordinate Enable) is enabled:
4170 *
4171 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4172 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4173 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4174 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4175 * MAPFILTER_LINEAR.
4176 * - Min Mode Filter must be MAPFILTER_NEAREST or
4177 * MAPFILTER_LINEAR.
4178 * - Mip Mode Filter must be MIPFILTER_NONE.
4179 * - Min LOD must be 0.
4180 * - Max LOD must be 0.
4181 * - MIP Count must be 0.
4182 * - Surface Min LOD must be 0.
4183 * - Texture LOD Bias must be 0."
4184 */
4185 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4186 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4187 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4188 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4189 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4190 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4191
4192 assert(target == PIPE_TEXTURE_RECT);
4193
4194 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4195 mag_filter == BRW_MAPFILTER_LINEAR);
4196 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4197 min_filter == BRW_MAPFILTER_LINEAR);
4198 assert(mip_filter == BRW_MIPFILTER_NONE);
4199 }
4200
4201 if (gpe->gen >= ILO_GEN(7)) {
4202 dw[0] = 1 << 28 |
4203 base_level << 22 |
4204 mip_filter << 20 |
4205 mag_filter << 17 |
4206 min_filter << 14 |
4207 lod_bias << 1;
4208
4209 /* enable EWA filtering unconditionally breaks some piglit tests */
4210 if (sampler->max_anisotropy)
4211 dw[0] |= 1;
4212
4213 dw[1] = min_lod << 20 |
4214 max_lod << 8;
4215
4216 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
4217 dw[1] |= gen6_translate_shadow_func(sampler->compare_func) << 1;
4218
4219 assert(!(border_color & 0x1f));
4220 dw[2] = border_color;
4221
4222 dw[3] = max_aniso << 19 |
4223 wrap_s << 6 |
4224 wrap_t << 3 |
4225 wrap_r;
4226
4227 /* round the coordinates for linear filtering */
4228 if (min_filter != BRW_MAPFILTER_NEAREST) {
4229 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4230 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4231 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4232 }
4233 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4234 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4235 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4236 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4237 }
4238
4239 if (!sampler->normalized_coords)
4240 dw[3] |= 1 << 10;
4241 }
4242 else {
4243 dw[0] = 1 << 28 |
4244 (min_filter != mag_filter) << 27 |
4245 base_level << 22 |
4246 mip_filter << 20 |
4247 mag_filter << 17 |
4248 min_filter << 14 |
4249 lod_bias << 3;
4250
4251 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
4252 dw[0] |= gen6_translate_shadow_func(sampler->compare_func);
4253
4254 dw[1] = min_lod << 22 |
4255 max_lod << 12 |
4256 wrap_s << 6 |
4257 wrap_t << 3 |
4258 wrap_r;
4259
4260 assert(!(border_color & 0x1f));
4261 dw[2] = border_color;
4262
4263 dw[3] = max_aniso << 19;
4264
4265 /* round the coordinates for linear filtering */
4266 if (min_filter != BRW_MAPFILTER_NEAREST) {
4267 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4268 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4269 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4270 }
4271 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4272 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4273 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4274 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4275 }
4276
4277 if (!sampler->normalized_coords)
4278 dw[3] |= 1;
4279 }
4280
4281 dw += 4;
4282 }
4283
4284 return state_offset;
4285 }
4286
4287 static uint32_t
4288 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_gpe *gpe,
4289 const union pipe_color_union *color,
4290 struct ilo_cp *cp)
4291 {
4292 const int state_align = 32 / 4;
4293 const int state_len = 12;
4294 uint32_t state_offset, *dw;
4295 float rgba[4] = {
4296 color->f[0], color->f[1], color->f[2], color->f[3],
4297 };
4298
4299 ILO_GPE_VALID_GEN(gpe, 6, 6);
4300
4301 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4302 state_len, state_align, &state_offset);
4303
4304 /*
4305 * This state is not documented in the Sandy Bridge PRM, but in the
4306 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4307 */
4308
4309 /* IEEE_FP */
4310 dw[1] = fui(rgba[0]);
4311 dw[2] = fui(rgba[1]);
4312 dw[3] = fui(rgba[2]);
4313 dw[4] = fui(rgba[3]);
4314
4315 /* FLOAT_16 */
4316 dw[5] = util_float_to_half(rgba[0]) |
4317 util_float_to_half(rgba[1]) << 16;
4318 dw[6] = util_float_to_half(rgba[2]) |
4319 util_float_to_half(rgba[3]) << 16;
4320
4321 /* clamp to [-1.0f, 1.0f] */
4322 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4323 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4324 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4325 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4326
4327 /* SNORM16 */
4328 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4329 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4330 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4331 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4332
4333 /* SNORM8 */
4334 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4335 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4336 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4337 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4338
4339 /* clamp to [0.0f, 1.0f] */
4340 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4341 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4342 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4343 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4344
4345 /* UNORM8 */
4346 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4347 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4348 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4349 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4350
4351 /* UNORM16 */
4352 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4353 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4354 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4355 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4356
4357 return state_offset;
4358 }
4359
4360 static uint32_t
4361 gen6_emit_push_constant_buffer(const struct ilo_gpe *gpe,
4362 int size, void **pcb,
4363 struct ilo_cp *cp)
4364 {
4365 /*
4366 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4367 * to 32 bytes, and their sizes are specified in 256-bit units.
4368 */
4369 const int state_align = 32 / 4;
4370 const int state_len = align(size, 32) / 4;
4371 uint32_t state_offset;
4372 char *buf;
4373
4374 ILO_GPE_VALID_GEN(gpe, 6, 7);
4375
4376 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4377 state_len, state_align, &state_offset);
4378
4379 /* zero out the unused range */
4380 if (size < state_len * 4)
4381 memset(&buf[size], 0, state_len * 4 - size);
4382
4383 if (pcb)
4384 *pcb = buf;
4385
4386 return state_offset;
4387 }
4388
4389 static int
4390 gen6_estimate_command_size(const struct ilo_gpe *gpe,
4391 enum ilo_gpe_gen6_command cmd,
4392 int arg)
4393 {
4394 static const struct {
4395 int header;
4396 int body;
4397 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4398 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4399 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4400 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4401 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4402 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4403 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4404 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4405 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4406 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4407 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4408 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4409 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4410 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4411 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4412 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4413 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4414 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4415 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4416 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4417 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4418 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4419 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4420 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4421 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4422 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4423 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4424 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4425 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4426 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4427 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4428 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4429 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4430 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4431 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4432 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4433 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4434 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4435 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4436 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4437 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4438 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4439 };
4440 const int header = gen6_command_size_table[cmd].header;
4441 const int body = gen6_command_size_table[arg].body;
4442 const int count = arg;
4443
4444 ILO_GPE_VALID_GEN(gpe, 6, 6);
4445 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4446
4447 return (likely(count)) ? header + body * count : 0;
4448 }
4449
4450 static int
4451 gen6_estimate_state_size(const struct ilo_gpe *gpe,
4452 enum ilo_gpe_gen6_state state,
4453 int arg)
4454 {
4455 static const struct {
4456 int alignment;
4457 int body;
4458 bool is_array;
4459 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4460 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4461 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4462 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4463 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4464 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4465 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4466 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4467 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4468 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4469 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4470 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4471 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4472 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4473 };
4474 const int alignment = gen6_state_size_table[state].alignment;
4475 const int body = gen6_state_size_table[state].body;
4476 const bool is_array = gen6_state_size_table[state].is_array;
4477 const int count = arg;
4478 int estimate;
4479
4480 ILO_GPE_VALID_GEN(gpe, 6, 6);
4481 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4482
4483 if (likely(count)) {
4484 if (is_array) {
4485 estimate = (alignment - 1) + body * count;
4486 }
4487 else {
4488 estimate = (alignment - 1) + body;
4489 /* all states are aligned */
4490 if (count > 1)
4491 estimate += util_align_npot(body, alignment) * (count - 1);
4492 }
4493 }
4494 else {
4495 estimate = 0;
4496 }
4497
4498 return estimate;
4499 }
4500
4501 static const struct ilo_gpe_gen6 gen6_gpe = {
4502 .estimate_command_size = gen6_estimate_command_size,
4503 .estimate_state_size = gen6_estimate_state_size,
4504
4505 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4506 GEN6_SET(STATE_BASE_ADDRESS),
4507 GEN6_SET(STATE_SIP),
4508 GEN6_SET(3DSTATE_VF_STATISTICS),
4509 GEN6_SET(PIPELINE_SELECT),
4510 GEN6_SET(MEDIA_VFE_STATE),
4511 GEN6_SET(MEDIA_CURBE_LOAD),
4512 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4513 GEN6_SET(MEDIA_GATEWAY_STATE),
4514 GEN6_SET(MEDIA_STATE_FLUSH),
4515 GEN6_SET(MEDIA_OBJECT_WALKER),
4516 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4517 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4518 GEN6_SET(3DSTATE_URB),
4519 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4520 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4521 GEN6_SET(3DSTATE_INDEX_BUFFER),
4522 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4523 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4524 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4525 GEN6_SET(3DSTATE_VS),
4526 GEN6_SET(3DSTATE_GS),
4527 GEN6_SET(3DSTATE_CLIP),
4528 GEN6_SET(3DSTATE_SF),
4529 GEN6_SET(3DSTATE_WM),
4530 GEN6_SET(3DSTATE_CONSTANT_VS),
4531 GEN6_SET(3DSTATE_CONSTANT_GS),
4532 GEN6_SET(3DSTATE_CONSTANT_PS),
4533 GEN6_SET(3DSTATE_SAMPLE_MASK),
4534 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4535 GEN6_SET(3DSTATE_DEPTH_BUFFER),
4536 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
4537 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
4538 GEN6_SET(3DSTATE_LINE_STIPPLE),
4539 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
4540 GEN6_SET(3DSTATE_GS_SVB_INDEX),
4541 GEN6_SET(3DSTATE_MULTISAMPLE),
4542 GEN6_SET(3DSTATE_STENCIL_BUFFER),
4543 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
4544 GEN6_SET(3DSTATE_CLEAR_PARAMS),
4545 GEN6_SET(PIPE_CONTROL),
4546 GEN6_SET(3DPRIMITIVE),
4547 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
4548 GEN6_SET(SF_VIEWPORT),
4549 GEN6_SET(CLIP_VIEWPORT),
4550 GEN6_SET(CC_VIEWPORT),
4551 GEN6_SET(COLOR_CALC_STATE),
4552 GEN6_SET(BLEND_STATE),
4553 GEN6_SET(DEPTH_STENCIL_STATE),
4554 GEN6_SET(SCISSOR_RECT),
4555 GEN6_SET(BINDING_TABLE_STATE),
4556 GEN6_SET(surf_SURFACE_STATE),
4557 GEN6_SET(view_SURFACE_STATE),
4558 GEN6_SET(cbuf_SURFACE_STATE),
4559 GEN6_SET(so_SURFACE_STATE),
4560 GEN6_SET(SAMPLER_STATE),
4561 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
4562 GEN6_SET(push_constant_buffer),
4563 #undef GEN6_SET
4564 };
4565
4566 const struct ilo_gpe_gen6 *
4567 ilo_gpe_gen6_get(void)
4568 {
4569 return &gen6_gpe;
4570 }