ilo: construct SCISSOR_RECT in set_scissor_states()
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate winsys tiling to hardware tiling.
43 */
44 int
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
46 {
47 switch (tiling) {
48 case INTEL_TILING_NONE:
49 return 0;
50 case INTEL_TILING_X:
51 return BRW_SURFACE_TILED;
52 case INTEL_TILING_Y:
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
54 default:
55 assert(!"unknown tiling");
56 return 0;
57 }
58 }
59
60 /**
61 * Translate a pipe primitive type to the matching hardware primitive type.
62 */
63 int
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
65 {
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
81 };
82
83 assert(prim_mapping[prim]);
84
85 return prim_mapping[prim];
86 }
87
88 /**
89 * Translate a pipe texture target to the matching hardware surface type.
90 */
91 int
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
93 {
94 switch (target) {
95 case PIPE_BUFFER:
96 return BRW_SURFACE_BUFFER;
97 case PIPE_TEXTURE_1D:
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
109 default:
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
112 }
113 }
114
115 /**
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
118 */
119 static int
120 gen6_translate_depth_format(enum pipe_format format)
121 {
122 switch (format) {
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
133 default:
134 return -1;
135 }
136 }
137
138 /**
139 * Translate a pipe logicop to the matching hardware logicop.
140 */
141 static int
142 gen6_translate_pipe_logicop(unsigned logicop)
143 {
144 switch (logicop) {
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
161 default:
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
164 }
165 }
166
167 /**
168 * Translate a pipe blend function to the matching hardware blend function.
169 */
170 static int
171 gen6_translate_pipe_blend(unsigned blend)
172 {
173 switch (blend) {
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
179 default:
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
182 };
183 }
184
185 /**
186 * Translate a pipe blend factor to the matching hardware blend factor.
187 */
188 static int
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
190 {
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
211 default:
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
214 };
215 }
216
217 /**
218 * Translate a pipe stencil op to the matching hardware stencil op.
219 */
220 static int
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
222 {
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
232 default:
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
235 }
236 }
237
238 /**
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
240 */
241 static int
242 gen6_translate_tex_mipfilter(unsigned filter)
243 {
244 switch (filter) {
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
248 default:
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
251 }
252 }
253
254 /**
255 * Translate a pipe texture filter to the matching hardware mapfilter.
256 */
257 static int
258 gen6_translate_tex_filter(unsigned filter)
259 {
260 switch (filter) {
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
263 default:
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
266 }
267 }
268
269 /**
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
271 * wrapping mode.
272 */
273 static int
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
275 {
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
280 }
281
282 switch (wrap) {
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
291 default:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
294 }
295 }
296
297 /**
298 * Translate a pipe DSA test function to the matching hardware compare
299 * function.
300 */
301 static int
302 gen6_translate_dsa_func(unsigned func)
303 {
304 switch (func) {
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
313 default:
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
316 }
317 }
318
319 /**
320 * Translate a pipe shadow compare function to the matching hardware shadow
321 * function.
322 */
323 static int
324 gen6_translate_shadow_func(unsigned func)
325 {
326 /*
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
329 *
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
332 */
333 switch (func) {
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
342 default:
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
345 }
346 }
347
348 /**
349 * Translate an index size to the matching hardware index format.
350 */
351 static int
352 gen6_translate_index_size(int size)
353 {
354 switch (size) {
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
358 default:
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
361 }
362 }
363
364 static void
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
379
380 ILO_GPE_VALID_GEN(dev, 6, 7);
381
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
385
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
388
389 ilo_cp_write_bo(cp, 1, general_state_bo,
390 INTEL_DOMAIN_RENDER,
391 0);
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
394 0);
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
397 0);
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
399 0,
400 0);
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
403 0);
404
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
407 INTEL_DOMAIN_RENDER,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 1);
413 }
414
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
423 }
424
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
427 0,
428 0);
429 }
430 else {
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
433 }
434
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
438 0);
439 }
440 else {
441 /* skip range check */
442 ilo_cp_write(cp, 1);
443 }
444
445 ilo_cp_end(cp);
446 }
447
448 static void
449 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
450 uint32_t sip,
451 struct ilo_cp *cp)
452 {
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
455
456 ILO_GPE_VALID_GEN(dev, 6, 7);
457
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
461 ilo_cp_end(cp);
462 }
463
464 static void
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
466 bool enable,
467 struct ilo_cp *cp)
468 {
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
471
472 ILO_GPE_VALID_GEN(dev, 6, 7);
473
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
476 ilo_cp_end(cp);
477 }
478
479 static void
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
481 int pipeline,
482 struct ilo_cp *cp)
483 {
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
486
487 ILO_GPE_VALID_GEN(dev, 6, 7);
488
489 /* 3D or media */
490 assert(pipeline == 0x0 || pipeline == 0x1);
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
494 ilo_cp_end(cp);
495 }
496
497 static void
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
499 int max_threads, int num_urb_entries,
500 int urb_entry_size,
501 struct ilo_cp *cp)
502 {
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
505 uint32_t dw2, dw4;
506
507 ILO_GPE_VALID_GEN(dev, 6, 6);
508
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
513
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
516
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
524 ilo_cp_write(cp, 0);
525 ilo_cp_write(cp, 0);
526 ilo_cp_end(cp);
527 }
528
529 static void
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
531 uint32_t buf, int size,
532 struct ilo_cp *cp)
533 {
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
536
537 ILO_GPE_VALID_GEN(dev, 6, 6);
538
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
548 ilo_cp_end(cp);
549 }
550
551 static void
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
553 uint32_t offset, int num_ids,
554 struct ilo_cp *cp)
555 {
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
558
559 ILO_GPE_VALID_GEN(dev, 6, 6);
560
561 assert(offset % 32 == 0);
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
569 ilo_cp_end(cp);
570 }
571
572 static void
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
574 int id, int byte, int thread_count,
575 struct ilo_cp *cp)
576 {
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
579 uint32_t dw1;
580
581 ILO_GPE_VALID_GEN(dev, 6, 6);
582
583 dw1 = id << 16 |
584 byte << 8 |
585 thread_count;
586
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
595 int thread_count_water_mark,
596 int barrier_mask,
597 struct ilo_cp *cp)
598 {
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
601 uint32_t dw1;
602
603 ILO_GPE_VALID_GEN(dev, 6, 6);
604
605 dw1 = thread_count_water_mark << 16 |
606 barrier_mask;
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
611 ilo_cp_end(cp);
612 }
613
614 static void
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
616 struct ilo_cp *cp)
617 {
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
619 }
620
621 static void
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
641 ilo_cp_end(cp);
642 }
643
644 static void
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
649 struct ilo_cp *cp)
650 {
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
653
654 ILO_GPE_VALID_GEN(dev, 6, 6);
655
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
664 ilo_cp_end(cp);
665 }
666
667 static void
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
671 struct ilo_cp *cp)
672 {
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
678
679 ILO_GPE_VALID_GEN(dev, 6, 6);
680
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
684
685 /* the valid range is [1, 5] */
686 if (!vs_alloc_size)
687 vs_alloc_size = 1;
688 if (!gs_alloc_size)
689 gs_alloc_size = 1;
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
691
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
697
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
702
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
709 ilo_cp_end(cp);
710 }
711
712 static void
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
714 const struct pipe_vertex_buffer *vbuffers,
715 const int *instance_divisors,
716 uint32_t vbuffer_mask,
717 struct ilo_cp *cp)
718 {
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
720 uint8_t cmd_len;
721
722 ILO_GPE_VALID_GEN(dev, 6, 7);
723
724 /*
725 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
726 *
727 * "From 1 to 33 VBs can be specified..."
728 *
729 * Because of the type of vbuffer_mask, this is always the case.
730 */
731 assert(vbuffer_mask <= (1UL << 33));
732
733 if (!vbuffer_mask)
734 return;
735
736 cmd_len = 4 * util_bitcount(vbuffer_mask) + 1;
737
738 ilo_cp_begin(cp, cmd_len);
739 ilo_cp_write(cp, cmd | (cmd_len - 2));
740
741 while (vbuffer_mask) {
742 const int index = u_bit_scan(&vbuffer_mask);
743 const struct pipe_vertex_buffer *vb = &vbuffers[index];
744 const int instance_divisor =
745 (instance_divisors) ? instance_divisors[index] : 0;
746 uint32_t dw;
747
748 dw = index << GEN6_VB0_INDEX_SHIFT;
749
750 if (instance_divisor)
751 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
752 else
753 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
754
755 if (dev->gen >= ILO_GEN(7))
756 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
757
758 /* use null vb if there is no buffer or the stride is out of range */
759 if (vb->buffer && vb->stride <= 2048) {
760 const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
761 const uint32_t start_offset = vb->buffer_offset;
762 const uint32_t end_offset = buf->bo->get_size(buf->bo) - 1;
763
764 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
765
766 ilo_cp_write(cp, dw);
767 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
768 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
769 ilo_cp_write(cp, instance_divisor);
770 }
771 else {
772 dw |= 1 << 13;
773
774 ilo_cp_write(cp, dw);
775 ilo_cp_write(cp, 0);
776 ilo_cp_write(cp, 0);
777 ilo_cp_write(cp, instance_divisor);
778 }
779 }
780
781 ilo_cp_end(cp);
782 }
783
784 static void
785 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
786 const struct pipe_vertex_element *velements,
787 int num_velements,
788 bool last_velement_edgeflag,
789 bool prepend_generated_ids,
790 struct ilo_cp *cp)
791 {
792 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
793 uint8_t cmd_len;
794 int format, i;
795
796 ILO_GPE_VALID_GEN(dev, 6, 7);
797
798 /*
799 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
800 *
801 * "Up to 34 (DevSNB+) vertex elements are supported."
802 */
803 assert(num_velements + prepend_generated_ids <= 34);
804
805 if (!num_velements && !prepend_generated_ids) {
806 cmd_len = 3;
807 format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
808
809 ilo_cp_begin(cp, cmd_len);
810 ilo_cp_write(cp, cmd | (cmd_len - 2));
811 ilo_cp_write(cp,
812 0 << GEN6_VE0_INDEX_SHIFT |
813 GEN6_VE0_VALID |
814 format << BRW_VE0_FORMAT_SHIFT |
815 0 << BRW_VE0_SRC_OFFSET_SHIFT);
816 ilo_cp_write(cp,
817 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
818 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
819 BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
820 BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
821 ilo_cp_end(cp);
822
823 return;
824 }
825
826 cmd_len = 2 * (num_velements + prepend_generated_ids) + 1;
827
828 ilo_cp_begin(cp, cmd_len);
829 ilo_cp_write(cp, cmd | (cmd_len - 2));
830
831 if (prepend_generated_ids) {
832 ilo_cp_write(cp, GEN6_VE0_VALID);
833 ilo_cp_write(cp,
834 BRW_VE1_COMPONENT_STORE_VID << BRW_VE1_COMPONENT_0_SHIFT |
835 BRW_VE1_COMPONENT_STORE_IID << BRW_VE1_COMPONENT_1_SHIFT |
836 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
837 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT);
838 }
839
840 for (i = 0; i < num_velements; i++) {
841 const struct pipe_vertex_element *ve = &velements[i];
842 int comp[4] = {
843 BRW_VE1_COMPONENT_STORE_SRC,
844 BRW_VE1_COMPONENT_STORE_SRC,
845 BRW_VE1_COMPONENT_STORE_SRC,
846 BRW_VE1_COMPONENT_STORE_SRC,
847 };
848 int edgeflag_enable;
849
850 if (last_velement_edgeflag && i == num_velements - 1) {
851 /*
852 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
853 *
854 * "* This bit (Edge Flag Enable) must only be ENABLED on the
855 * last valid VERTEX_ELEMENT structure.
856 *
857 * * When set, Component 0 Control must be set to
858 * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
859 * VFCOMP_NOSTORE.
860 *
861 * * The Source Element Format must be set to the UINT format.
862 *
863 * * [DevSNB]: Edge Flags are not supported for QUADLIST
864 * primitives. Software may elect to convert QUADLIST
865 * primitives to some set of corresponding edge-flag-supported
866 * primitive types (e.g., POLYGONs) prior to submission to the
867 * 3D pipeline."
868 *
869 * Only a limitied set of primitive types could have Edge Flag Enable
870 * set. The caller should not set last_velement_edgeflag for such
871 * primitive types.
872 */
873 comp[1] = BRW_VE1_COMPONENT_NOSTORE;
874 comp[2] = BRW_VE1_COMPONENT_NOSTORE;
875 comp[3] = BRW_VE1_COMPONENT_NOSTORE;
876
877 switch (ve->src_format) {
878 case PIPE_FORMAT_R32_FLOAT:
879 format = ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT);
880 break;
881 default:
882 assert(ve->src_format == PIPE_FORMAT_R8_UINT);
883 format = ilo_translate_vertex_format(ve->src_format);
884 break;
885 }
886
887 edgeflag_enable = GEN6_VE0_EDGE_FLAG_ENABLE;
888 }
889 else {
890 switch (util_format_get_nr_components(ve->src_format)) {
891 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
892 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
893 case 3: comp[3] = (util_format_is_pure_integer(ve->src_format)) ?
894 BRW_VE1_COMPONENT_STORE_1_INT :
895 BRW_VE1_COMPONENT_STORE_1_FLT;
896 }
897
898 format = ilo_translate_vertex_format(ve->src_format);
899
900 edgeflag_enable = 0;
901 }
902
903 ilo_cp_write(cp,
904 ve->vertex_buffer_index << GEN6_VE0_INDEX_SHIFT |
905 GEN6_VE0_VALID |
906 format << BRW_VE0_FORMAT_SHIFT |
907 edgeflag_enable |
908 ve->src_offset << BRW_VE0_SRC_OFFSET_SHIFT);
909
910 ilo_cp_write(cp,
911 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
912 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
913 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
914 comp[3] << BRW_VE1_COMPONENT_3_SHIFT);
915 }
916
917 ilo_cp_end(cp);
918 }
919
920 static void
921 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
922 const struct pipe_index_buffer *ib,
923 bool enable_cut_index,
924 struct ilo_cp *cp)
925 {
926 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
927 const uint8_t cmd_len = 3;
928 const struct ilo_buffer *buf = ilo_buffer(ib->buffer);
929 uint32_t start_offset, end_offset;
930 int format;
931
932 ILO_GPE_VALID_GEN(dev, 6, 7);
933
934 if (!buf)
935 return;
936
937 format = gen6_translate_index_size(ib->index_size);
938
939 start_offset = ib->offset;
940 /* start_offset must be aligned to index size */
941 if (start_offset % ib->index_size) {
942 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
943 assert(!"unaligned index buffer offset");
944 start_offset -= start_offset % ib->index_size;
945 }
946
947 /* end_offset must also be aligned */
948 end_offset = buf->bo->get_size(buf->bo);
949 end_offset -= (end_offset % ib->index_size);
950 /* it is inclusive */
951 end_offset -= 1;
952
953 ilo_cp_begin(cp, cmd_len);
954 ilo_cp_write(cp, cmd | (cmd_len - 2) |
955 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
956 format << 8);
957 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
958 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
959 ilo_cp_end(cp);
960 }
961
962 static void
963 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
964 uint32_t clip_viewport,
965 uint32_t sf_viewport,
966 uint32_t cc_viewport,
967 struct ilo_cp *cp)
968 {
969 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
970 const uint8_t cmd_len = 4;
971
972 ILO_GPE_VALID_GEN(dev, 6, 6);
973
974 ilo_cp_begin(cp, cmd_len);
975 ilo_cp_write(cp, cmd | (cmd_len - 2) |
976 GEN6_CLIP_VIEWPORT_MODIFY |
977 GEN6_SF_VIEWPORT_MODIFY |
978 GEN6_CC_VIEWPORT_MODIFY);
979 ilo_cp_write(cp, clip_viewport);
980 ilo_cp_write(cp, sf_viewport);
981 ilo_cp_write(cp, cc_viewport);
982 ilo_cp_end(cp);
983 }
984
985 static void
986 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
987 uint32_t blend_state,
988 uint32_t depth_stencil_state,
989 uint32_t color_calc_state,
990 struct ilo_cp *cp)
991 {
992 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
993 const uint8_t cmd_len = 4;
994
995 ILO_GPE_VALID_GEN(dev, 6, 6);
996
997 ilo_cp_begin(cp, cmd_len);
998 ilo_cp_write(cp, cmd | (cmd_len - 2));
999 ilo_cp_write(cp, blend_state | 1);
1000 ilo_cp_write(cp, depth_stencil_state | 1);
1001 ilo_cp_write(cp, color_calc_state | 1);
1002 ilo_cp_end(cp);
1003 }
1004
1005 static void
1006 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1007 uint32_t scissor_rect,
1008 struct ilo_cp *cp)
1009 {
1010 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1011 const uint8_t cmd_len = 2;
1012
1013 ILO_GPE_VALID_GEN(dev, 6, 7);
1014
1015 ilo_cp_begin(cp, cmd_len);
1016 ilo_cp_write(cp, cmd | (cmd_len - 2));
1017 ilo_cp_write(cp, scissor_rect);
1018 ilo_cp_end(cp);
1019 }
1020
1021 static void
1022 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1023 const struct ilo_shader *vs,
1024 int num_samplers,
1025 struct ilo_cp *cp)
1026 {
1027 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1028 const uint8_t cmd_len = 6;
1029 uint32_t dw2, dw4, dw5;
1030 int vue_read_len, max_threads;
1031
1032 ILO_GPE_VALID_GEN(dev, 6, 7);
1033
1034 if (!vs) {
1035 ilo_cp_begin(cp, cmd_len);
1036 ilo_cp_write(cp, cmd | (cmd_len - 2));
1037 ilo_cp_write(cp, 0);
1038 ilo_cp_write(cp, 0);
1039 ilo_cp_write(cp, 0);
1040 ilo_cp_write(cp, 0);
1041 ilo_cp_write(cp, 0);
1042 ilo_cp_end(cp);
1043 return;
1044 }
1045
1046 /*
1047 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1048 *
1049 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1050 * 128-bit vertex elements to be passed into the payload for each
1051 * vertex."
1052 *
1053 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1054 * data to be read and passed to the thread."
1055 */
1056 vue_read_len = (vs->in.count + 1) / 2;
1057 if (!vue_read_len)
1058 vue_read_len = 1;
1059
1060 switch (dev->gen) {
1061 case ILO_GEN(6):
1062 /*
1063 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1064 *
1065 * "Device # of EUs #Threads/EU
1066 * SNB GT2 12 5
1067 * SNB GT1 6 4"
1068 */
1069 max_threads = (dev->gt == 2) ? 60 : 24;
1070 break;
1071 case ILO_GEN(7):
1072 /*
1073 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1074 *
1075 * "Device # of EUs #Threads/EU
1076 * Ivy Bridge (GT2) 16 8
1077 * Ivy Bridge (GT1) 6 6"
1078 */
1079 max_threads = (dev->gt == 2) ? 128 : 36;
1080 break;
1081 case ILO_GEN(7.5):
1082 /* see brwCreateContext() */
1083 max_threads = (dev->gt == 2) ? 280 : 70;
1084 break;
1085 default:
1086 max_threads = 1;
1087 break;
1088 }
1089
1090 dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1091 if (false)
1092 dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
1093
1094 dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1095 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1096 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1097
1098 dw5 = GEN6_VS_STATISTICS_ENABLE |
1099 GEN6_VS_ENABLE;
1100
1101 if (dev->gen >= ILO_GEN(7.5))
1102 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1103 else
1104 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1105
1106 ilo_cp_begin(cp, cmd_len);
1107 ilo_cp_write(cp, cmd | (cmd_len - 2));
1108 ilo_cp_write(cp, vs->cache_offset);
1109 ilo_cp_write(cp, dw2);
1110 ilo_cp_write(cp, 0); /* scratch */
1111 ilo_cp_write(cp, dw4);
1112 ilo_cp_write(cp, dw5);
1113 ilo_cp_end(cp);
1114 }
1115
1116 static void
1117 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1118 const struct ilo_shader *gs,
1119 const struct ilo_shader *vs,
1120 uint32_t vs_offset,
1121 struct ilo_cp *cp)
1122 {
1123 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1124 const uint8_t cmd_len = 7;
1125 uint32_t dw1, dw2, dw4, dw5, dw6;
1126 int i;
1127
1128 ILO_GPE_VALID_GEN(dev, 6, 6);
1129
1130 if (!gs && (!vs || !vs->stream_output)) {
1131 dw1 = 0;
1132 dw2 = 0;
1133 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1134 dw5 = GEN6_GS_STATISTICS_ENABLE;
1135 dw6 = 0;
1136 }
1137 else {
1138 int max_threads, vue_read_len;
1139
1140 /*
1141 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1142 *
1143 * "Maximum Number of Threads valid range is [0,27] when Rendering
1144 * Enabled bit is set."
1145 *
1146 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1147 *
1148 * "Programming Note: If the GS stage is enabled, software must
1149 * always allocate at least one GS URB Entry. This is true even if
1150 * the GS thread never needs to output vertices to the pipeline,
1151 * e.g., when only performing stream output. This is an artifact of
1152 * the need to pass the GS thread an initial destination URB
1153 * handle."
1154 *
1155 * As such, we always enable rendering, and limit the number of threads.
1156 */
1157 if (dev->gt == 2) {
1158 /* maximum is 60, but limited to 28 */
1159 max_threads = 28;
1160 }
1161 else {
1162 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1163 max_threads = 21;
1164 }
1165
1166 if (max_threads > 28)
1167 max_threads = 28;
1168
1169 dw2 = GEN6_GS_SPF_MODE;
1170
1171 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1172 GEN6_GS_STATISTICS_ENABLE |
1173 GEN6_GS_SO_STATISTICS_ENABLE |
1174 GEN6_GS_RENDERING_ENABLE;
1175
1176 /*
1177 * we cannot make use of GEN6_GS_REORDER because it will reorder
1178 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1179 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1180 * (2N+2, 2N+1, 2N+3)).
1181 */
1182 dw6 = GEN6_GS_ENABLE;
1183
1184 if (gs) {
1185 /* VS ouputs must match GS inputs */
1186 assert(gs->in.count == vs->out.count);
1187 for (i = 0; i < gs->in.count; i++) {
1188 assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
1189 assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
1190 }
1191
1192 /*
1193 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1194 *
1195 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1196 * Length) to 0 indicating no Vertex URB data to be read and
1197 * passed to the thread."
1198 */
1199 vue_read_len = (gs->in.count + 1) / 2;
1200 if (!vue_read_len)
1201 vue_read_len = 1;
1202
1203 dw1 = gs->cache_offset;
1204 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1205 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1206 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1207
1208 if (gs->in.discard_adj)
1209 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1210
1211 if (gs->stream_output) {
1212 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1213 if (gs->svbi_post_inc) {
1214 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1215 gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1216 }
1217 }
1218 }
1219 else {
1220 /*
1221 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1222 *
1223 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1224 * Length) to 0 indicating no Vertex URB data to be read and
1225 * passed to the thread."
1226 */
1227 vue_read_len = (vs->out.count + 1) / 2;
1228 if (!vue_read_len)
1229 vue_read_len = 1;
1230
1231 dw1 = vs_offset;
1232 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1233 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1234 vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1235
1236 if (vs->in.discard_adj)
1237 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1238
1239 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1240 if (vs->svbi_post_inc) {
1241 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1242 vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1243 }
1244 }
1245 }
1246
1247 ilo_cp_begin(cp, cmd_len);
1248 ilo_cp_write(cp, cmd | (cmd_len - 2));
1249 ilo_cp_write(cp, dw1);
1250 ilo_cp_write(cp, dw2);
1251 ilo_cp_write(cp, 0);
1252 ilo_cp_write(cp, dw4);
1253 ilo_cp_write(cp, dw5);
1254 ilo_cp_write(cp, dw6);
1255 ilo_cp_end(cp);
1256 }
1257
1258 static void
1259 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1260 const struct pipe_rasterizer_state *rasterizer,
1261 bool has_linear_interp,
1262 bool enable_guardband,
1263 int num_viewports,
1264 struct ilo_cp *cp)
1265 {
1266 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1267 const uint8_t cmd_len = 4;
1268 uint32_t dw1, dw2, dw3;
1269
1270 ILO_GPE_VALID_GEN(dev, 6, 7);
1271
1272 if (!rasterizer) {
1273 ilo_cp_begin(cp, cmd_len);
1274 ilo_cp_write(cp, cmd | (cmd_len - 2));
1275 ilo_cp_write(cp, 0);
1276 ilo_cp_write(cp, 0);
1277 ilo_cp_write(cp, 0);
1278 ilo_cp_end(cp);
1279
1280 return;
1281 }
1282
1283 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1284
1285 if (dev->gen >= ILO_GEN(7)) {
1286 /*
1287 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1288 *
1289 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1290 * enabled only for the cases where the incoming primitive topology
1291 * into the clipper guaranteed to be Trilist."
1292 *
1293 * What does this mean?
1294 */
1295 dw1 |= 0 << 19 |
1296 GEN7_CLIP_EARLY_CULL;
1297
1298 if (rasterizer->front_ccw)
1299 dw1 |= GEN7_CLIP_WINDING_CCW;
1300
1301 switch (rasterizer->cull_face) {
1302 case PIPE_FACE_NONE:
1303 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1304 break;
1305 case PIPE_FACE_FRONT:
1306 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1307 break;
1308 case PIPE_FACE_BACK:
1309 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1310 break;
1311 case PIPE_FACE_FRONT_AND_BACK:
1312 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1313 break;
1314 }
1315 }
1316
1317 dw2 = GEN6_CLIP_ENABLE |
1318 GEN6_CLIP_XY_TEST |
1319 rasterizer->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1320 GEN6_CLIP_MODE_NORMAL;
1321
1322 if (rasterizer->clip_halfz)
1323 dw2 |= GEN6_CLIP_API_D3D;
1324 else
1325 dw2 |= GEN6_CLIP_API_OGL;
1326
1327 if (rasterizer->depth_clip)
1328 dw2 |= GEN6_CLIP_Z_TEST;
1329
1330 /*
1331 * There are several reasons that guard band test should be disabled
1332 *
1333 * - when the renderer does not perform 2D clipping
1334 * - GL wide points (to avoid partially visibie object)
1335 * - GL wide or AA lines (to avoid partially visibie object)
1336 */
1337 if (enable_guardband && true /* API_GL */) {
1338 if (rasterizer->point_size_per_vertex || rasterizer->point_size > 1.0f)
1339 enable_guardband = false;
1340 if (rasterizer->line_smooth || rasterizer->line_width > 1.0f)
1341 enable_guardband = false;
1342 }
1343
1344 if (enable_guardband)
1345 dw2 |= GEN6_CLIP_GB_TEST;
1346
1347 if (has_linear_interp)
1348 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1349
1350 if (rasterizer->flatshade_first) {
1351 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1352 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1353 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1354 }
1355 else {
1356 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1357 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1358 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1359 }
1360
1361 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1362 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
1363 GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1364 (num_viewports - 1);
1365
1366 ilo_cp_begin(cp, cmd_len);
1367 ilo_cp_write(cp, cmd | (cmd_len - 2));
1368 ilo_cp_write(cp, dw1);
1369 ilo_cp_write(cp, dw2);
1370 ilo_cp_write(cp, dw3);
1371 ilo_cp_end(cp);
1372 }
1373
1374 /**
1375 * Fill in DW2 to DW7 of 3DSTATE_SF.
1376 */
1377 void
1378 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
1379 const struct pipe_rasterizer_state *rasterizer,
1380 int num_samples,
1381 enum pipe_format depth_format,
1382 bool separate_stencil,
1383 uint32_t *dw, int num_dwords)
1384 {
1385 float offset_const, offset_scale, offset_clamp;
1386 int format, line_width, point_width;
1387
1388 ILO_GPE_VALID_GEN(dev, 6, 7);
1389 assert(num_dwords == 6);
1390
1391 if (!rasterizer) {
1392 dw[0] = 0;
1393 dw[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1394 dw[2] = 0;
1395 dw[3] = 0;
1396 dw[4] = 0;
1397 dw[5] = 0;
1398
1399 return;
1400 }
1401
1402 /*
1403 * Scale the constant term. The minimum representable value used by the HW
1404 * is not large enouch to be the minimum resolvable difference.
1405 */
1406 offset_const = rasterizer->offset_units * 2.0f;
1407
1408 offset_scale = rasterizer->offset_scale;
1409 offset_clamp = rasterizer->offset_clamp;
1410
1411 if (separate_stencil) {
1412 switch (depth_format) {
1413 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1414 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1415 break;
1416 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1417 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1418 break;
1419 case PIPE_FORMAT_S8_UINT:
1420 depth_format = PIPE_FORMAT_NONE;
1421 break;
1422 default:
1423 break;
1424 }
1425 }
1426
1427 format = gen6_translate_depth_format(depth_format);
1428 /* FLOAT surface is assumed when there is no depth buffer */
1429 if (format < 0)
1430 format = BRW_DEPTHFORMAT_D32_FLOAT;
1431
1432 /*
1433 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1434 * pixels in the minor direction. We have to make the lines slightly
1435 * thicker, 0.5 pixel on both sides, so that they intersect that many
1436 * pixels are considered into the lines.
1437 *
1438 * Line width is in U3.7.
1439 */
1440 line_width = (int) ((rasterizer->line_width +
1441 (float) rasterizer->line_smooth) * 128.0f + 0.5f);
1442 line_width = CLAMP(line_width, 0, 1023);
1443
1444 /*
1445 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1446 *
1447 * "Software must not program a value of 0.0 when running in
1448 * MSRASTMODE_ON_xxx modes - zero-width lines are not available when
1449 * multisampling rasterization is enabled."
1450 */
1451 if (rasterizer->multisample) {
1452 if (!line_width)
1453 line_width = 128; /* 1.0f */
1454 }
1455 else if (line_width == 128 && !rasterizer->line_smooth) {
1456 /* use GIQ rules */
1457 line_width = 0;
1458 }
1459
1460 /* in U8.3 */
1461 point_width = (int) (rasterizer->point_size * 8.0f + 0.5f);
1462 point_width = CLAMP(point_width, 1, 2047);
1463
1464 /*
1465 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1466 *
1467 * "This bit (Statistics Enable) should be set whenever clipping is
1468 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1469 * should be cleared if clipping is disabled or Statistics Enable in
1470 * CLIP_STATE is clear."
1471 */
1472 dw[0] = GEN6_SF_STATISTICS_ENABLE |
1473 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1474
1475 /* XXX GEN6 path seems to work fine for GEN7 */
1476 if (false && dev->gen >= ILO_GEN(7)) {
1477 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1478
1479 /*
1480 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1481 *
1482 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1483 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1484 * Depth Offset Enable Point) should be set whenever non zero depth
1485 * bias (Slope, Bias) values are used. Setting this bit may have
1486 * some degradation of performance for some workloads."
1487 */
1488 if (rasterizer->offset_tri ||
1489 rasterizer->offset_line ||
1490 rasterizer->offset_point) {
1491 /* XXX need to scale offset_const according to the depth format */
1492 dw[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1493
1494 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1495 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1496 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1497 }
1498 else {
1499 offset_const = 0.0f;
1500 offset_scale = 0.0f;
1501 offset_clamp = 0.0f;
1502 }
1503 }
1504 else {
1505 if (dev->gen >= ILO_GEN(7))
1506 dw[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1507
1508 if (rasterizer->offset_tri)
1509 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1510 if (rasterizer->offset_line)
1511 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1512 if (rasterizer->offset_point)
1513 dw[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1514 }
1515
1516 switch (rasterizer->fill_front) {
1517 case PIPE_POLYGON_MODE_FILL:
1518 dw[0] |= GEN6_SF_FRONT_SOLID;
1519 break;
1520 case PIPE_POLYGON_MODE_LINE:
1521 dw[0] |= GEN6_SF_FRONT_WIREFRAME;
1522 break;
1523 case PIPE_POLYGON_MODE_POINT:
1524 dw[0] |= GEN6_SF_FRONT_POINT;
1525 break;
1526 }
1527
1528 switch (rasterizer->fill_back) {
1529 case PIPE_POLYGON_MODE_FILL:
1530 dw[0] |= GEN6_SF_BACK_SOLID;
1531 break;
1532 case PIPE_POLYGON_MODE_LINE:
1533 dw[0] |= GEN6_SF_BACK_WIREFRAME;
1534 break;
1535 case PIPE_POLYGON_MODE_POINT:
1536 dw[0] |= GEN6_SF_BACK_POINT;
1537 break;
1538 }
1539
1540 if (rasterizer->front_ccw)
1541 dw[0] |= GEN6_SF_WINDING_CCW;
1542
1543 dw[1] = 0;
1544
1545 if (rasterizer->line_smooth) {
1546 /*
1547 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1548 *
1549 * "This field (Anti-aliasing Enable) must be disabled if any of the
1550 * render targets have integer (UINT or SINT) surface format."
1551 *
1552 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1553 *
1554 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1555 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1556 *
1557 * TODO We do not check those yet.
1558 */
1559 dw[1] |= GEN6_SF_LINE_AA_ENABLE |
1560 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1561 }
1562
1563 switch (rasterizer->cull_face) {
1564 case PIPE_FACE_NONE:
1565 dw[1] |= GEN6_SF_CULL_NONE;
1566 break;
1567 case PIPE_FACE_FRONT:
1568 dw[1] |= GEN6_SF_CULL_FRONT;
1569 break;
1570 case PIPE_FACE_BACK:
1571 dw[1] |= GEN6_SF_CULL_BACK;
1572 break;
1573 case PIPE_FACE_FRONT_AND_BACK:
1574 dw[1] |= GEN6_SF_CULL_BOTH;
1575 break;
1576 }
1577
1578 dw[1] |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1579
1580 if (rasterizer->scissor)
1581 dw[1] |= GEN6_SF_SCISSOR_ENABLE;
1582
1583 if (num_samples > 1 && rasterizer->multisample)
1584 dw[1] |= GEN6_SF_MSRAST_ON_PATTERN;
1585
1586 dw[2] = GEN6_SF_LINE_AA_MODE_TRUE |
1587 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1588
1589 if (rasterizer->line_last_pixel)
1590 dw[2] |= 1 << 31;
1591
1592 if (rasterizer->flatshade_first) {
1593 dw[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1594 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1595 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1596 }
1597 else {
1598 dw[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1599 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1600 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1601 }
1602
1603 if (!rasterizer->point_size_per_vertex)
1604 dw[2] |= GEN6_SF_USE_STATE_POINT_WIDTH;
1605
1606 dw[2] |= point_width;
1607
1608 dw[3] = fui(offset_const);
1609 dw[4] = fui(offset_scale);
1610 dw[5] = fui(offset_clamp);
1611 }
1612
1613 /**
1614 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1615 */
1616 void
1617 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
1618 const struct pipe_rasterizer_state *rasterizer,
1619 const struct ilo_shader *fs,
1620 const struct ilo_shader *last_sh,
1621 uint32_t *dw, int num_dwords)
1622 {
1623 uint32_t point_sprite_enable, const_interp_enable;
1624 uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
1625 int vue_offset, vue_len;
1626 int dst, max_src, i;
1627
1628 ILO_GPE_VALID_GEN(dev, 6, 7);
1629 assert(num_dwords == 13);
1630
1631 if (!fs) {
1632 if (dev->gen >= ILO_GEN(7))
1633 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1634 else
1635 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1636
1637 for (i = 1; i < num_dwords; i++)
1638 dw[i] = 0;
1639
1640 return;
1641 }
1642
1643 if (last_sh) {
1644 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1645 assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
1646 assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
1647 vue_offset = 2;
1648 vue_len = last_sh->out.count - vue_offset;
1649 }
1650 else {
1651 vue_offset = 0;
1652 vue_len = fs->in.count;
1653 }
1654
1655 point_sprite_enable = 0;
1656 const_interp_enable = 0;
1657 max_src = (last_sh) ? 0 : fs->in.count - 1;
1658
1659 for (dst = 0; dst < fs->in.count; dst++) {
1660 const int semantic = fs->in.semantic_names[dst];
1661 const int index = fs->in.semantic_indices[dst];
1662 const int interp = fs->in.interp[dst];
1663 int src;
1664 uint16_t ctrl;
1665
1666 /*
1667 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1668 *
1669 * "This field (Point Sprite Texture Coordinate Enable) must be
1670 * programmed to 0 when non-point primitives are rendered."
1671 *
1672 * TODO We do not check that yet.
1673 */
1674 if (semantic == TGSI_SEMANTIC_GENERIC &&
1675 (rasterizer->sprite_coord_enable & (1 << index)))
1676 point_sprite_enable |= 1 << dst;
1677
1678 if (interp == TGSI_INTERPOLATE_CONSTANT ||
1679 (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
1680 const_interp_enable |= 1 << dst;
1681
1682 if (!last_sh) {
1683 attr_ctrl[dst] = 0;
1684 continue;
1685 }
1686
1687 /* find the matching VS/GS OUT for FS IN[i] */
1688 ctrl = 0;
1689 for (src = 0; src < vue_len; src++) {
1690 if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
1691 last_sh->out.semantic_indices[vue_offset + src] != index)
1692 continue;
1693
1694 ctrl = src;
1695
1696 if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
1697 src < vue_len - 1) {
1698 const int next = src + 1;
1699
1700 if (last_sh->out.semantic_names[vue_offset + next] ==
1701 TGSI_SEMANTIC_BCOLOR &&
1702 last_sh->out.semantic_indices[vue_offset + next] == index) {
1703 ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
1704 ATTRIBUTE_SWIZZLE_SHIFT;
1705 src++;
1706 }
1707 }
1708
1709 break;
1710 }
1711
1712 /* if there is no COLOR, try BCOLOR */
1713 if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
1714 for (src = 0; src < vue_len; src++) {
1715 if (last_sh->out.semantic_names[vue_offset + src] !=
1716 TGSI_SEMANTIC_BCOLOR ||
1717 last_sh->out.semantic_indices[vue_offset + src] != index)
1718 continue;
1719
1720 ctrl = src;
1721 break;
1722 }
1723 }
1724
1725 if (src < vue_len) {
1726 attr_ctrl[dst] = ctrl;
1727 if (max_src < src)
1728 max_src = src;
1729 }
1730 else {
1731 /*
1732 * The previous shader stage does not output this attribute. The
1733 * value is supposed to be undefined for fs, unless the attribute
1734 * goes through point sprite replacement or the attribute is
1735 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1736 * attribute is picked.
1737 *
1738 * We should update the fs code and omit the output of
1739 * TGSI_SEMANTIC_POSITION here.
1740 */
1741 attr_ctrl[dst] = 0;
1742 }
1743 }
1744
1745 for (; dst < Elements(attr_ctrl); dst++)
1746 attr_ctrl[dst] = 0;
1747
1748 /* only the first 16 attributes can be remapped */
1749 for (dst = 16; dst < Elements(attr_ctrl); dst++)
1750 assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
1751
1752 /*
1753 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1754 *
1755 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1756 * 0 indicating no Vertex URB data to be read.
1757 *
1758 * This field should be set to the minimum length required to read the
1759 * maximum source attribute. The maximum source attribute is indicated
1760 * by the maximum value of the enabled Attribute # Source Attribute if
1761 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1762 * enable is not set.
1763 *
1764 * read_length = ceiling((max_source_attr+1)/2)
1765 *
1766 * [errata] Corruption/Hang possible if length programmed larger than
1767 * recommended"
1768 */
1769 vue_len = max_src + 1;
1770
1771 assert(fs->in.count <= 32);
1772 assert(vue_offset % 2 == 0);
1773
1774 if (dev->gen >= ILO_GEN(7)) {
1775 dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1776 (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1777 vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1778
1779 if (last_sh)
1780 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1781 }
1782 else {
1783 dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1784 (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1785 vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1786
1787 if (last_sh)
1788 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1789 }
1790
1791 switch (rasterizer->sprite_coord_mode) {
1792 case PIPE_SPRITE_COORD_UPPER_LEFT:
1793 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1794 break;
1795 case PIPE_SPRITE_COORD_LOWER_LEFT:
1796 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1797 break;
1798 }
1799
1800 for (i = 0; i < 8; i++)
1801 dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
1802
1803 dw[9] = point_sprite_enable;
1804 dw[10] = const_interp_enable;
1805
1806 /* WrapShortest enables */
1807 dw[11] = 0;
1808 dw[12] = 0;
1809 }
1810
1811 static void
1812 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1813 const struct pipe_rasterizer_state *rasterizer,
1814 const struct ilo_shader *fs,
1815 const struct ilo_shader *last_sh,
1816 struct ilo_cp *cp)
1817 {
1818 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1819 const uint8_t cmd_len = 20;
1820 uint32_t dw_raster[6], dw_sbe[13];
1821
1822 ILO_GPE_VALID_GEN(dev, 6, 6);
1823
1824 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1825 1, PIPE_FORMAT_NONE, false, dw_raster, Elements(dw_raster));
1826 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1827 fs, last_sh, dw_sbe, Elements(dw_sbe));
1828
1829 ilo_cp_begin(cp, cmd_len);
1830 ilo_cp_write(cp, cmd | (cmd_len - 2));
1831 ilo_cp_write(cp, dw_sbe[0]);
1832 ilo_cp_write_multi(cp, dw_raster, 6);
1833 ilo_cp_write_multi(cp, &dw_sbe[1], 12);
1834 ilo_cp_end(cp);
1835 }
1836
1837 static void
1838 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1839 const struct ilo_shader *fs,
1840 int num_samplers,
1841 const struct pipe_rasterizer_state *rasterizer,
1842 bool dual_blend, bool cc_may_kill,
1843 struct ilo_cp *cp)
1844 {
1845 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1846 const uint8_t cmd_len = 9;
1847 const int num_samples = 1;
1848 uint32_t dw2, dw4, dw5, dw6;
1849 int max_threads;
1850
1851 ILO_GPE_VALID_GEN(dev, 6, 6);
1852
1853 /* see brwCreateContext() */
1854 max_threads = (dev->gt == 2) ? 80 : 40;
1855
1856 if (!fs) {
1857 ilo_cp_begin(cp, cmd_len);
1858 ilo_cp_write(cp, cmd | (cmd_len - 2));
1859 ilo_cp_write(cp, 0);
1860 ilo_cp_write(cp, 0);
1861 ilo_cp_write(cp, 0);
1862 ilo_cp_write(cp, 0);
1863 /* honor the valid range even if dispatching is disabled */
1864 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1865 ilo_cp_write(cp, 0);
1866 ilo_cp_write(cp, 0);
1867 ilo_cp_write(cp, 0);
1868 ilo_cp_end(cp);
1869
1870 return;
1871 }
1872
1873 dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1874 if (false)
1875 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
1876
1877 dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
1878 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
1879 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
1880
1881 if (true) {
1882 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1883 }
1884 else {
1885 /*
1886 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1887 *
1888 * "This bit (Statistics Enable) must be disabled if either of these
1889 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1890 * Resolve Enable or Depth Buffer Resolve Enable."
1891 */
1892 dw4 |= GEN6_WM_DEPTH_CLEAR;
1893 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1894 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1895 }
1896
1897 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
1898 GEN6_WM_LINE_AA_WIDTH_2_0;
1899
1900 /*
1901 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1902 *
1903 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1904 * PS kernel or color calculator has the ability to kill (discard)
1905 * pixels or samples, other than due to depth or stencil testing.
1906 * This bit is required to be ENABLED in the following situations:
1907 *
1908 * The API pixel shader program contains "killpix" or "discard"
1909 * instructions, or other code in the pixel shader kernel that can
1910 * cause the final pixel mask to differ from the pixel mask received
1911 * on dispatch.
1912 *
1913 * A sampler with chroma key enabled with kill pixel mode is used by
1914 * the pixel shader.
1915 *
1916 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
1917 * enabled.
1918 *
1919 * The pixel shader kernel generates and outputs oMask.
1920 *
1921 * Note: As ClipDistance clipping is fully supported in hardware and
1922 * therefore not via PS instructions, there should be no need to
1923 * ENABLE this bit due to ClipDistance clipping."
1924 */
1925 if (fs->has_kill || cc_may_kill)
1926 dw5 |= GEN6_WM_KILL_ENABLE;
1927
1928 /*
1929 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1930 *
1931 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
1932 * field must be set to disabled."
1933 *
1934 * TODO This is not checked yet.
1935 */
1936 if (fs->out.has_pos)
1937 dw5 |= GEN6_WM_COMPUTED_DEPTH;
1938
1939 if (fs->in.has_pos)
1940 dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
1941
1942 /*
1943 * Set this bit if
1944 *
1945 * a) fs writes colors and color is not masked, or
1946 * b) fs writes depth, or
1947 * c) fs or cc kills
1948 */
1949 if (true)
1950 dw5 |= GEN6_WM_DISPATCH_ENABLE;
1951
1952 /* same value as in 3DSTATE_SF */
1953 if (rasterizer->line_smooth)
1954 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
1955
1956 if (rasterizer->poly_stipple_enable)
1957 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
1958 if (rasterizer->line_stipple_enable)
1959 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
1960
1961 if (dual_blend)
1962 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1963
1964 if (fs->dispatch_16)
1965 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
1966 else
1967 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
1968
1969 dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
1970 GEN6_WM_POSOFFSET_NONE |
1971 GEN6_WM_POSITION_ZW_PIXEL |
1972 fs->in.barycentric_interpolation_mode <<
1973 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
1974
1975 if (rasterizer->bottom_edge_rule)
1976 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
1977
1978 if (num_samples > 1) {
1979 if (rasterizer->multisample)
1980 dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
1981 else
1982 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
1983 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
1984 }
1985 else {
1986 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
1987 GEN6_WM_MSDISPMODE_PERSAMPLE;
1988 }
1989
1990 ilo_cp_begin(cp, cmd_len);
1991 ilo_cp_write(cp, cmd | (cmd_len - 2));
1992 ilo_cp_write(cp, fs->cache_offset);
1993 ilo_cp_write(cp, dw2);
1994 ilo_cp_write(cp, 0); /* scratch */
1995 ilo_cp_write(cp, dw4);
1996 ilo_cp_write(cp, dw5);
1997 ilo_cp_write(cp, dw6);
1998 ilo_cp_write(cp, 0); /* kernel 1 */
1999 ilo_cp_write(cp, 0); /* kernel 2 */
2000 ilo_cp_end(cp);
2001 }
2002
2003 static unsigned
2004 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
2005 const uint32_t *bufs, const int *sizes,
2006 int num_bufs, int max_read_length,
2007 uint32_t *dw, int num_dwords)
2008 {
2009 unsigned enabled = 0x0;
2010 int total_read_length, i;
2011
2012 assert(num_dwords == 4);
2013
2014 total_read_length = 0;
2015 for (i = 0; i < 4; i++) {
2016 if (i < num_bufs && sizes[i]) {
2017 /* in 256-bit units minus one */
2018 const int read_len = (sizes[i] + 31) / 32 - 1;
2019
2020 assert(bufs[i] % 32 == 0);
2021 assert(read_len < 32);
2022
2023 enabled |= 1 << i;
2024 dw[i] = bufs[i] | read_len;
2025
2026 total_read_length += read_len + 1;
2027 }
2028 else {
2029 dw[i] = 0;
2030 }
2031 }
2032
2033 assert(total_read_length <= max_read_length);
2034
2035 return enabled;
2036 }
2037
2038 static void
2039 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
2040 const uint32_t *bufs, const int *sizes,
2041 int num_bufs,
2042 struct ilo_cp *cp)
2043 {
2044 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2045 const uint8_t cmd_len = 5;
2046 uint32_t buf_dw[4], buf_enabled;
2047
2048 ILO_GPE_VALID_GEN(dev, 6, 6);
2049 assert(num_bufs <= 4);
2050
2051 /*
2052 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2053 *
2054 * "The sum of all four read length fields (each incremented to
2055 * represent the actual read length) must be less than or equal to 32"
2056 */
2057 buf_enabled = gen6_fill_3dstate_constant(dev,
2058 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2059
2060 ilo_cp_begin(cp, cmd_len);
2061 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2062 ilo_cp_write(cp, buf_dw[0]);
2063 ilo_cp_write(cp, buf_dw[1]);
2064 ilo_cp_write(cp, buf_dw[2]);
2065 ilo_cp_write(cp, buf_dw[3]);
2066 ilo_cp_end(cp);
2067 }
2068
2069 static void
2070 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
2071 const uint32_t *bufs, const int *sizes,
2072 int num_bufs,
2073 struct ilo_cp *cp)
2074 {
2075 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2076 const uint8_t cmd_len = 5;
2077 uint32_t buf_dw[4], buf_enabled;
2078
2079 ILO_GPE_VALID_GEN(dev, 6, 6);
2080 assert(num_bufs <= 4);
2081
2082 /*
2083 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2084 *
2085 * "The sum of all four read length fields (each incremented to
2086 * represent the actual read length) must be less than or equal to 64"
2087 */
2088 buf_enabled = gen6_fill_3dstate_constant(dev,
2089 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2090
2091 ilo_cp_begin(cp, cmd_len);
2092 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2093 ilo_cp_write(cp, buf_dw[0]);
2094 ilo_cp_write(cp, buf_dw[1]);
2095 ilo_cp_write(cp, buf_dw[2]);
2096 ilo_cp_write(cp, buf_dw[3]);
2097 ilo_cp_end(cp);
2098 }
2099
2100 static void
2101 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
2102 const uint32_t *bufs, const int *sizes,
2103 int num_bufs,
2104 struct ilo_cp *cp)
2105 {
2106 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2107 const uint8_t cmd_len = 5;
2108 uint32_t buf_dw[4], buf_enabled;
2109
2110 ILO_GPE_VALID_GEN(dev, 6, 6);
2111 assert(num_bufs <= 4);
2112
2113 /*
2114 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2115 *
2116 * "The sum of all four read length fields (each incremented to
2117 * represent the actual read length) must be less than or equal to 64"
2118 */
2119 buf_enabled = gen6_fill_3dstate_constant(dev,
2120 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2121
2122 ilo_cp_begin(cp, cmd_len);
2123 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2124 ilo_cp_write(cp, buf_dw[0]);
2125 ilo_cp_write(cp, buf_dw[1]);
2126 ilo_cp_write(cp, buf_dw[2]);
2127 ilo_cp_write(cp, buf_dw[3]);
2128 ilo_cp_end(cp);
2129 }
2130
2131 static void
2132 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
2133 unsigned sample_mask,
2134 struct ilo_cp *cp)
2135 {
2136 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2137 const uint8_t cmd_len = 2;
2138 const unsigned valid_mask = 0xf;
2139
2140 ILO_GPE_VALID_GEN(dev, 6, 6);
2141
2142 sample_mask &= valid_mask;
2143
2144 ilo_cp_begin(cp, cmd_len);
2145 ilo_cp_write(cp, cmd | (cmd_len - 2));
2146 ilo_cp_write(cp, sample_mask);
2147 ilo_cp_end(cp);
2148 }
2149
2150 static void
2151 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
2152 unsigned x, unsigned y,
2153 unsigned width, unsigned height,
2154 struct ilo_cp *cp)
2155 {
2156 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2157 const uint8_t cmd_len = 4;
2158 unsigned xmax = x + width - 1;
2159 unsigned ymax = y + height - 1;
2160 int rect_limit;
2161
2162 ILO_GPE_VALID_GEN(dev, 6, 7);
2163
2164 if (dev->gen >= ILO_GEN(7)) {
2165 rect_limit = 16383;
2166 }
2167 else {
2168 /*
2169 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2170 *
2171 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2172 * must be an even number"
2173 */
2174 assert(y % 2 == 0);
2175
2176 rect_limit = 8191;
2177 }
2178
2179 if (x > rect_limit) x = rect_limit;
2180 if (y > rect_limit) y = rect_limit;
2181 if (xmax > rect_limit) xmax = rect_limit;
2182 if (ymax > rect_limit) ymax = rect_limit;
2183
2184 ilo_cp_begin(cp, cmd_len);
2185 ilo_cp_write(cp, cmd | (cmd_len - 2));
2186 ilo_cp_write(cp, y << 16 | x);
2187 ilo_cp_write(cp, ymax << 16 | xmax);
2188
2189 /*
2190 * There is no need to set the origin. It is intended to support front
2191 * buffer rendering.
2192 */
2193 ilo_cp_write(cp, 0);
2194
2195 ilo_cp_end(cp);
2196 }
2197
2198 static int
2199 gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
2200 enum pipe_format format,
2201 bool hiz,
2202 bool separate_stencil,
2203 bool *has_depth,
2204 bool *has_stencil)
2205 {
2206 int depth_format;
2207
2208 ILO_GPE_VALID_GEN(dev, 6, 7);
2209
2210 *has_depth = true;
2211 *has_stencil = false;
2212
2213 /*
2214 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2215 *
2216 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2217 * Surface Format of the depth buffer cannot be
2218 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2219 * requires the separate stencil buffer."
2220 *
2221 * From the Ironlake PRM, volume 2 part 1, page 330:
2222 *
2223 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2224 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2225 *
2226 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2227 * is indeed used, the depth values output by the fragment shaders will
2228 * be different when read back.
2229 *
2230 * As for GEN7+, separate_stencil_buffer is always true.
2231 */
2232 switch (format) {
2233 case PIPE_FORMAT_Z16_UNORM:
2234 depth_format = BRW_DEPTHFORMAT_D16_UNORM;
2235 break;
2236 case PIPE_FORMAT_Z32_FLOAT:
2237 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2238 break;
2239 case PIPE_FORMAT_Z24X8_UNORM:
2240 depth_format = (separate_stencil) ?
2241 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2242 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2243 break;
2244 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2245 depth_format = (separate_stencil) ?
2246 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2247 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2248 *has_stencil = true;
2249 break;
2250 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2251 depth_format = (separate_stencil) ?
2252 BRW_DEPTHFORMAT_D32_FLOAT :
2253 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2254 *has_stencil = true;
2255 break;
2256 case PIPE_FORMAT_S8_UINT:
2257 if (separate_stencil) {
2258 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2259 *has_depth = false;
2260 *has_stencil = true;
2261 break;
2262 }
2263 /* fall through */
2264 default:
2265 assert(!"unsupported depth/stencil format");
2266 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2267 *has_depth = false;
2268 *has_stencil = false;
2269 break;
2270 }
2271
2272 return depth_format;
2273 }
2274
2275 void
2276 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2277 const struct pipe_surface *surface,
2278 const struct pipe_depth_stencil_alpha_state *dsa,
2279 bool hiz,
2280 struct ilo_cp *cp)
2281 {
2282 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2283 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2284 const uint8_t cmd_len = 7;
2285 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
2286 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
2287 struct ilo_texture *tex;
2288 uint32_t dw1, dw3, dw4, dw6;
2289 uint32_t slice_offset, x_offset, y_offset;
2290 int surface_type, depth_format;
2291 unsigned lod, first_layer, num_layers;
2292 unsigned width, height, depth;
2293 bool separate_stencil, has_depth, has_stencil;
2294
2295 ILO_GPE_VALID_GEN(dev, 6, 7);
2296
2297 if (dev->gen >= ILO_GEN(7)) {
2298 separate_stencil = true;
2299 }
2300 else {
2301 /*
2302 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2303 *
2304 * "This field (Separate Stencil Buffer Enable) must be set to the
2305 * same value (enabled or disabled) as Hierarchical Depth Buffer
2306 * Enable."
2307 */
2308 separate_stencil = hiz;
2309 }
2310
2311 if (surface) {
2312 depth_format = gen6_get_depth_buffer_format(dev,
2313 surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
2314 }
2315 else {
2316 has_depth = false;
2317 has_stencil = false;
2318 }
2319
2320 if (!has_depth && !has_stencil) {
2321 dw1 = BRW_SURFACE_NULL << 29 |
2322 BRW_DEPTHFORMAT_D32_FLOAT << 18;
2323
2324 /* Y-tiled */
2325 if (dev->gen == ILO_GEN(6)) {
2326 dw1 |= 1 << 27 |
2327 1 << 26;
2328 }
2329
2330 ilo_cp_begin(cp, cmd_len);
2331 ilo_cp_write(cp, cmd | (cmd_len - 2));
2332 ilo_cp_write(cp, dw1);
2333 ilo_cp_write(cp, 0);
2334 ilo_cp_write(cp, 0);
2335 ilo_cp_write(cp, 0);
2336 ilo_cp_write(cp, 0);
2337 ilo_cp_write(cp, 0);
2338 ilo_cp_end(cp);
2339
2340 return;
2341 }
2342
2343 tex = ilo_texture(surface->texture);
2344
2345 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
2346 lod = surface->u.tex.level;
2347 first_layer = surface->u.tex.first_layer;
2348 num_layers = surface->u.tex.last_layer - first_layer + 1;
2349
2350 width = tex->base.width0;
2351 height = tex->base.height0;
2352 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
2353 tex->base.depth0 : num_layers;
2354
2355 if (surface_type == BRW_SURFACE_CUBE) {
2356 /*
2357 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2358 *
2359 * "For Other Surfaces (Cube Surfaces):
2360 * This field (Minimum Array Element) is ignored."
2361 *
2362 * "For Other Surfaces (Cube Surfaces):
2363 * This field (Render Target View Extent) is ignored."
2364 *
2365 * As such, we cannot set first_layer and num_layers on cube surfaces.
2366 * To work around that, treat it as a 2D surface.
2367 */
2368 surface_type = BRW_SURFACE_2D;
2369 }
2370
2371 /*
2372 * we always treat the resource as non-mipmapped and set the slice/x/y
2373 * offsets manually
2374 */
2375 if (true) {
2376 /* no layered rendering */
2377 assert(num_layers == 1);
2378
2379 slice_offset = ilo_texture_get_slice_offset(tex,
2380 lod, first_layer, &x_offset, &y_offset);
2381
2382 /*
2383 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2384 *
2385 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2386 * Coordinate Offset X) must be zero to ensure correct alignment"
2387 *
2388 * XXX Skip the check for gen6, which seems to be fine. We need to make
2389 * sure that does not happen eventually.
2390 */
2391 if (dev->gen >= ILO_GEN(7)) {
2392 assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
2393 x_offset &= ~7;
2394 y_offset &= ~7;
2395 }
2396
2397 /* the size of the layer */
2398 width = u_minify(width, lod);
2399 height = u_minify(height, lod);
2400 if (surface_type == BRW_SURFACE_3D)
2401 depth = u_minify(depth, lod);
2402 else
2403 depth = 1;
2404
2405 lod = 0;
2406 first_layer = 0;
2407
2408 width += x_offset;
2409 height += y_offset;
2410
2411 /* we have to treat them as 2D surfaces */
2412 if (surface_type == BRW_SURFACE_CUBE) {
2413 assert(tex->base.width0 == tex->base.height0);
2414 /* we will set slice_offset to point to the single face */
2415 surface_type = BRW_SURFACE_2D;
2416 }
2417 else if (surface_type == BRW_SURFACE_1D && height > 1) {
2418 assert(tex->base.height0 == 1);
2419 surface_type = BRW_SURFACE_2D;
2420 }
2421 }
2422 else {
2423 slice_offset = 0;
2424 x_offset = 0;
2425 y_offset = 0;
2426 }
2427
2428 /* required for GEN6+ */
2429 assert(tex->tiling == INTEL_TILING_Y);
2430 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2431 tex->bo_stride % 128 == 0);
2432 assert(width <= tex->bo_stride);
2433
2434 switch (surface_type) {
2435 case BRW_SURFACE_1D:
2436 assert(width <= max_2d_size && height == 1 &&
2437 depth <= max_array_size);
2438 assert(first_layer < max_array_size - 1 &&
2439 num_layers <= max_array_size);
2440 break;
2441 case BRW_SURFACE_2D:
2442 assert(width <= max_2d_size && height <= max_2d_size &&
2443 depth <= max_array_size);
2444 assert(first_layer < max_array_size - 1 &&
2445 num_layers <= max_array_size);
2446 break;
2447 case BRW_SURFACE_3D:
2448 assert(width <= 2048 && height <= 2048 && depth <= 2048);
2449 assert(first_layer < 2048 && num_layers <= max_array_size);
2450 assert(x_offset == 0 && y_offset == 0);
2451 break;
2452 case BRW_SURFACE_CUBE:
2453 assert(width <= max_2d_size && height <= max_2d_size && depth == 1);
2454 assert(first_layer == 0 && num_layers == 1);
2455 assert(width == height);
2456 assert(x_offset == 0 && y_offset == 0);
2457 break;
2458 default:
2459 assert(!"unexpected depth surface type");
2460 break;
2461 }
2462
2463 dw1 = surface_type << 29 |
2464 depth_format << 18 |
2465 (tex->bo_stride - 1);
2466
2467 if (dev->gen >= ILO_GEN(7)) {
2468 if (has_depth) {
2469 if (dsa->depth.writemask)
2470 dw1 |= 1 << 28;
2471 if (hiz)
2472 dw1 |= 1 << 22;
2473 }
2474
2475 if (has_stencil &&
2476 (dsa->stencil[0].writemask || dsa->stencil[1].writemask))
2477 dw1 |= 1 << 27;
2478
2479 dw3 = (height - 1) << 18 |
2480 (width - 1) << 4 |
2481 lod;
2482
2483 dw4 = (depth - 1) << 21 |
2484 first_layer << 10;
2485
2486 dw6 = (num_layers - 1) << 21;
2487 }
2488 else {
2489 dw1 |= (tex->tiling != INTEL_TILING_NONE) << 27 |
2490 (tex->tiling == INTEL_TILING_Y) << 26;
2491
2492 if (hiz) {
2493 dw1 |= 1 << 22 |
2494 1 << 21;
2495 }
2496
2497 dw3 = (height - 1) << 19 |
2498 (width - 1) << 6 |
2499 lod << 2 |
2500 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2501
2502 dw4 = (depth - 1) << 21 |
2503 first_layer << 10 |
2504 (num_layers - 1) << 1;
2505
2506 dw6 = 0;
2507 }
2508
2509 ilo_cp_begin(cp, cmd_len);
2510 ilo_cp_write(cp, cmd | (cmd_len - 2));
2511 ilo_cp_write(cp, dw1);
2512
2513 if (has_depth) {
2514 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2515 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2516 }
2517 else {
2518 ilo_cp_write(cp, 0);
2519 }
2520
2521 ilo_cp_write(cp, dw3);
2522 ilo_cp_write(cp, dw4);
2523 ilo_cp_write(cp, y_offset << 16 | x_offset);
2524 ilo_cp_write(cp, dw6);
2525 ilo_cp_end(cp);
2526 }
2527
2528 static void
2529 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2530 const struct pipe_surface *surface,
2531 bool hiz,
2532 struct ilo_cp *cp)
2533 {
2534 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(dev, surface, NULL, hiz, cp);
2535 }
2536
2537 static void
2538 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
2539 int x_offset, int y_offset,
2540 struct ilo_cp *cp)
2541 {
2542 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2543 const uint8_t cmd_len = 2;
2544
2545 ILO_GPE_VALID_GEN(dev, 6, 7);
2546 assert(x_offset >= 0 && x_offset <= 31);
2547 assert(y_offset >= 0 && y_offset <= 31);
2548
2549 ilo_cp_begin(cp, cmd_len);
2550 ilo_cp_write(cp, cmd | (cmd_len - 2));
2551 ilo_cp_write(cp, x_offset << 8 | y_offset);
2552 ilo_cp_end(cp);
2553 }
2554
2555 static void
2556 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
2557 const struct pipe_poly_stipple *pattern,
2558 struct ilo_cp *cp)
2559 {
2560 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2561 const uint8_t cmd_len = 33;
2562 int i;
2563
2564 ILO_GPE_VALID_GEN(dev, 6, 7);
2565 assert(Elements(pattern->stipple) == 32);
2566
2567 ilo_cp_begin(cp, cmd_len);
2568 ilo_cp_write(cp, cmd | (cmd_len - 2));
2569 for (i = 0; i < 32; i++)
2570 ilo_cp_write(cp, pattern->stipple[i]);
2571 ilo_cp_end(cp);
2572 }
2573
2574 static void
2575 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
2576 unsigned pattern, unsigned factor,
2577 struct ilo_cp *cp)
2578 {
2579 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2580 const uint8_t cmd_len = 3;
2581 unsigned inverse;
2582
2583 ILO_GPE_VALID_GEN(dev, 6, 7);
2584 assert((pattern & 0xffff) == pattern);
2585 assert(factor >= 1 && factor <= 256);
2586
2587 ilo_cp_begin(cp, cmd_len);
2588 ilo_cp_write(cp, cmd | (cmd_len - 2));
2589 ilo_cp_write(cp, pattern);
2590
2591 if (dev->gen >= ILO_GEN(7)) {
2592 /* in U1.16 */
2593 inverse = (unsigned) (65536.0f / factor);
2594 ilo_cp_write(cp, inverse << 15 | factor);
2595 }
2596 else {
2597 /* in U1.13 */
2598 inverse = (unsigned) (8192.0f / factor);
2599 ilo_cp_write(cp, inverse << 16 | factor);
2600 }
2601
2602 ilo_cp_end(cp);
2603 }
2604
2605 static void
2606 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
2607 struct ilo_cp *cp)
2608 {
2609 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2610 const uint8_t cmd_len = 3;
2611
2612 ILO_GPE_VALID_GEN(dev, 6, 7);
2613
2614 ilo_cp_begin(cp, cmd_len);
2615 ilo_cp_write(cp, cmd | (cmd_len - 2));
2616 ilo_cp_write(cp, 0 << 16 | 0);
2617 ilo_cp_write(cp, 0 << 16 | 0);
2618 ilo_cp_end(cp);
2619 }
2620
2621 static void
2622 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
2623 int index, unsigned svbi,
2624 unsigned max_svbi,
2625 bool load_vertex_count,
2626 struct ilo_cp *cp)
2627 {
2628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2629 const uint8_t cmd_len = 4;
2630 uint32_t dw1;
2631
2632 ILO_GPE_VALID_GEN(dev, 6, 6);
2633 assert(index >= 0 && index < 4);
2634
2635 dw1 = index << SVB_INDEX_SHIFT;
2636 if (load_vertex_count)
2637 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2638
2639 ilo_cp_begin(cp, cmd_len);
2640 ilo_cp_write(cp, cmd | (cmd_len - 2));
2641 ilo_cp_write(cp, dw1);
2642 ilo_cp_write(cp, svbi);
2643 ilo_cp_write(cp, max_svbi);
2644 ilo_cp_end(cp);
2645 }
2646
2647 static void
2648 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
2649 int num_samples,
2650 const uint32_t *packed_sample_pos,
2651 bool pixel_location_center,
2652 struct ilo_cp *cp)
2653 {
2654 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2655 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
2656 uint32_t dw1, dw2, dw3;
2657
2658 ILO_GPE_VALID_GEN(dev, 6, 7);
2659
2660 dw1 = (pixel_location_center) ?
2661 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2662
2663 switch (num_samples) {
2664 case 0:
2665 case 1:
2666 dw1 |= MS_NUMSAMPLES_1;
2667 dw2 = 0;
2668 dw3 = 0;
2669 break;
2670 case 4:
2671 dw1 |= MS_NUMSAMPLES_4;
2672 dw2 = packed_sample_pos[0];
2673 dw3 = 0;
2674 break;
2675 case 8:
2676 assert(dev->gen >= ILO_GEN(7));
2677 dw1 |= MS_NUMSAMPLES_8;
2678 dw2 = packed_sample_pos[0];
2679 dw3 = packed_sample_pos[1];
2680 break;
2681 default:
2682 assert(!"unsupported sample count");
2683 dw1 |= MS_NUMSAMPLES_1;
2684 dw2 = 0;
2685 dw3 = 0;
2686 break;
2687 }
2688
2689 ilo_cp_begin(cp, cmd_len);
2690 ilo_cp_write(cp, cmd | (cmd_len - 2));
2691 ilo_cp_write(cp, dw1);
2692 ilo_cp_write(cp, dw2);
2693 if (dev->gen >= ILO_GEN(7))
2694 ilo_cp_write(cp, dw3);
2695 ilo_cp_end(cp);
2696 }
2697
2698 static void
2699 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
2700 const struct pipe_surface *surface,
2701 struct ilo_cp *cp)
2702 {
2703 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2704 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2705 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2706 const uint8_t cmd_len = 3;
2707 struct ilo_texture *tex;
2708 uint32_t slice_offset, x_offset, y_offset;
2709 int pitch;
2710
2711 ILO_GPE_VALID_GEN(dev, 6, 7);
2712
2713 tex = (surface) ? ilo_texture(surface->texture) : NULL;
2714 if (tex && surface->format != PIPE_FORMAT_S8_UINT)
2715 tex = tex->separate_s8;
2716
2717 if (!tex) {
2718 ilo_cp_begin(cp, cmd_len);
2719 ilo_cp_write(cp, cmd | (cmd_len - 2));
2720 ilo_cp_write(cp, 0);
2721 ilo_cp_write(cp, 0);
2722 ilo_cp_end(cp);
2723
2724 return;
2725 }
2726
2727 if (true) {
2728 slice_offset = ilo_texture_get_slice_offset(tex,
2729 surface->u.tex.level, surface->u.tex.first_layer,
2730 &x_offset, &y_offset);
2731 /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
2732 }
2733 else {
2734 slice_offset = 0;
2735 x_offset = 0;
2736 y_offset = 0;
2737 }
2738
2739 /*
2740 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2741 *
2742 * "The pitch must be set to 2x the value computed based on width, as
2743 * the stencil buffer is stored with two rows interleaved."
2744 *
2745 * According to the classic driver, we need to do the same for GEN7+ even
2746 * though the Ivy Bridge PRM does not say anything about it.
2747 */
2748 pitch = 2 * tex->bo_stride;
2749 assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
2750
2751 ilo_cp_begin(cp, cmd_len);
2752 ilo_cp_write(cp, cmd | (cmd_len - 2));
2753 ilo_cp_write(cp, pitch - 1);
2754 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2755 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2756 ilo_cp_end(cp);
2757 }
2758
2759 static void
2760 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2761 const struct pipe_surface *surface,
2762 struct ilo_cp *cp)
2763 {
2764 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2765 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2766 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2767 const uint8_t cmd_len = 3;
2768 struct ilo_texture *tex;
2769 uint32_t slice_offset;
2770
2771 ILO_GPE_VALID_GEN(dev, 6, 7);
2772
2773 if (!surface) {
2774 ilo_cp_begin(cp, cmd_len);
2775 ilo_cp_write(cp, cmd | (cmd_len - 2));
2776 ilo_cp_write(cp, 0);
2777 ilo_cp_write(cp, 0);
2778 ilo_cp_end(cp);
2779
2780 return;
2781 }
2782
2783 tex = ilo_texture(surface->texture);
2784
2785 /* TODO */
2786 slice_offset = 0;
2787
2788 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2789 tex->bo_stride % 128 == 0);
2790
2791 ilo_cp_begin(cp, cmd_len);
2792 ilo_cp_write(cp, cmd | (cmd_len - 2));
2793 ilo_cp_write(cp, tex->bo_stride - 1);
2794 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2795 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2796 ilo_cp_end(cp);
2797 }
2798
2799 static void
2800 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
2801 uint32_t clear_val,
2802 struct ilo_cp *cp)
2803 {
2804 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2805 const uint8_t cmd_len = 2;
2806
2807 ILO_GPE_VALID_GEN(dev, 6, 6);
2808
2809 ilo_cp_begin(cp, cmd_len);
2810 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2811 GEN5_DEPTH_CLEAR_VALID);
2812 ilo_cp_write(cp, clear_val);
2813 ilo_cp_end(cp);
2814 }
2815
2816 static void
2817 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
2818 uint32_t dw1,
2819 struct intel_bo *bo, uint32_t bo_offset,
2820 bool write_qword,
2821 struct ilo_cp *cp)
2822 {
2823 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
2824 const uint8_t cmd_len = (write_qword) ? 5 : 4;
2825 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
2826 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
2827
2828 ILO_GPE_VALID_GEN(dev, 6, 7);
2829
2830 if (dw1 & PIPE_CONTROL_CS_STALL) {
2831 /*
2832 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2833 *
2834 * "1 of the following must also be set (when CS stall is set):
2835 *
2836 * * Depth Cache Flush Enable ([0] of DW1)
2837 * * Stall at Pixel Scoreboard ([1] of DW1)
2838 * * Depth Stall ([13] of DW1)
2839 * * Post-Sync Operation ([13] of DW1)
2840 * * Render Target Cache Flush Enable ([12] of DW1)
2841 * * Notify Enable ([8] of DW1)"
2842 *
2843 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2844 *
2845 * "One of the following must also be set (when CS stall is set):
2846 *
2847 * * Render Target Cache Flush Enable ([12] of DW1)
2848 * * Depth Cache Flush Enable ([0] of DW1)
2849 * * Stall at Pixel Scoreboard ([1] of DW1)
2850 * * Depth Stall ([13] of DW1)
2851 * * Post-Sync Operation ([13] of DW1)"
2852 */
2853 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
2854 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2855 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2856 PIPE_CONTROL_DEPTH_STALL;
2857
2858 /* post-sync op */
2859 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
2860 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2861 PIPE_CONTROL_WRITE_TIMESTAMP;
2862
2863 if (dev->gen == ILO_GEN(6))
2864 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
2865
2866 assert(dw1 & bit_test);
2867 }
2868
2869 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
2870 /*
2871 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2872 *
2873 * "Following bits must be clear (when Depth Stall is set):
2874 *
2875 * * Render Target Cache Flush Enable ([12] of DW1)
2876 * * Depth Cache Flush Enable ([0] of DW1)"
2877 */
2878 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
2879 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
2880 }
2881
2882 ilo_cp_begin(cp, cmd_len);
2883 ilo_cp_write(cp, cmd | (cmd_len - 2));
2884 ilo_cp_write(cp, dw1);
2885 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
2886 ilo_cp_write(cp, 0);
2887 if (write_qword)
2888 ilo_cp_write(cp, 0);
2889 ilo_cp_end(cp);
2890 }
2891
2892 static void
2893 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
2894 const struct pipe_draw_info *info,
2895 bool rectlist,
2896 struct ilo_cp *cp)
2897 {
2898 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
2899 const uint8_t cmd_len = 6;
2900 const int prim = (rectlist) ?
2901 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
2902 const int vb_access = (info->indexed) ?
2903 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
2904 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
2905
2906 ILO_GPE_VALID_GEN(dev, 6, 6);
2907
2908 ilo_cp_begin(cp, cmd_len);
2909 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2910 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
2911 vb_access);
2912 ilo_cp_write(cp, info->count);
2913 ilo_cp_write(cp, info->start);
2914 ilo_cp_write(cp, info->instance_count);
2915 ilo_cp_write(cp, info->start_instance);
2916 ilo_cp_write(cp, info->index_bias);
2917 ilo_cp_end(cp);
2918 }
2919
2920 static uint32_t
2921 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
2922 const struct ilo_shader **cs,
2923 uint32_t *sampler_state,
2924 int *num_samplers,
2925 uint32_t *binding_table_state,
2926 int *num_surfaces,
2927 int num_ids,
2928 struct ilo_cp *cp)
2929 {
2930 /*
2931 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
2932 *
2933 * "(Interface Descriptor Total Length) This field must have the same
2934 * alignment as the Interface Descriptor Data Start Address.
2935 *
2936 * It must be DQWord (32-byte) aligned..."
2937 *
2938 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
2939 *
2940 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
2941 * aligned address of the Interface Descriptor data."
2942 */
2943 const int state_align = 32 / 4;
2944 const int state_len = (32 / 4) * num_ids;
2945 uint32_t state_offset, *dw;
2946 int i;
2947
2948 ILO_GPE_VALID_GEN(dev, 6, 6);
2949
2950 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
2951 state_len, state_align, &state_offset);
2952
2953 for (i = 0; i < num_ids; i++) {
2954 int curbe_read_len;
2955
2956 curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
2957
2958 dw[0] = cs[i]->cache_offset;
2959 dw[1] = 1 << 18; /* SPF */
2960 dw[2] = sampler_state[i] |
2961 (num_samplers[i] + 3) / 4 << 2;
2962 dw[3] = binding_table_state[i] |
2963 num_surfaces[i];
2964 dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
2965 0; /* CURBE Read Offset */
2966 dw[5] = 0; /* Barrier ID */
2967 dw[6] = 0;
2968 dw[7] = 0;
2969
2970 dw += 8;
2971 }
2972
2973 return state_offset;
2974 }
2975
2976 static void
2977 viewport_get_guardband(const struct ilo_dev_info *dev,
2978 int center_x, int center_y,
2979 int *min_gbx, int *max_gbx,
2980 int *min_gby, int *max_gby)
2981 {
2982 /*
2983 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
2984 *
2985 * "Per-Device Guardband Extents
2986 *
2987 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
2988 * - Maximum Post-Clamp Delta (X or Y): 16K"
2989 *
2990 * "In addition, in order to be correctly rendered, objects must have a
2991 * screenspace bounding box not exceeding 8K in the X or Y direction.
2992 * This additional restriction must also be comprehended by software,
2993 * i.e., enforced by use of clipping."
2994 *
2995 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
2996 *
2997 * "Per-Device Guardband Extents
2998 *
2999 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3000 * - Maximum Post-Clamp Delta (X or Y): N/A"
3001 *
3002 * "In addition, in order to be correctly rendered, objects must have a
3003 * screenspace bounding box not exceeding 8K in the X or Y direction.
3004 * This additional restriction must also be comprehended by software,
3005 * i.e., enforced by use of clipping."
3006 *
3007 * Combined, the bounding box of any object can not exceed 8K in both
3008 * width and height.
3009 *
3010 * Below we set the guardband as a squre of length 8K, centered at where
3011 * the viewport is. This makes sure all objects passing the GB test are
3012 * valid to the renderer, and those failing the XY clipping have a
3013 * better chance of passing the GB test.
3014 */
3015 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
3016 const int half_len = 8192 / 2;
3017
3018 /* make sure the guardband is within the valid range */
3019 if (center_x - half_len < -max_extent)
3020 center_x = -max_extent + half_len;
3021 else if (center_x + half_len > max_extent - 1)
3022 center_x = max_extent - half_len;
3023
3024 if (center_y - half_len < -max_extent)
3025 center_y = -max_extent + half_len;
3026 else if (center_y + half_len > max_extent - 1)
3027 center_y = max_extent - half_len;
3028
3029 *min_gbx = (float) (center_x - half_len);
3030 *max_gbx = (float) (center_x + half_len);
3031 *min_gby = (float) (center_y - half_len);
3032 *max_gby = (float) (center_y + half_len);
3033 }
3034
3035 void
3036 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
3037 const struct pipe_viewport_state *state,
3038 struct ilo_viewport_cso *vp)
3039 {
3040 const float scale_x = fabs(state->scale[0]);
3041 const float scale_y = fabs(state->scale[1]);
3042 const float scale_z = fabs(state->scale[2]);
3043 int min_gbx, max_gbx, min_gby, max_gby;
3044
3045 ILO_GPE_VALID_GEN(dev, 6, 7);
3046
3047 viewport_get_guardband(dev,
3048 (int) state->translate[0],
3049 (int) state->translate[1],
3050 &min_gbx, &max_gbx, &min_gby, &max_gby);
3051
3052 /* matrix form */
3053 vp->m00 = state->scale[0];
3054 vp->m11 = state->scale[1];
3055 vp->m22 = state->scale[2];
3056 vp->m30 = state->translate[0];
3057 vp->m31 = state->translate[1];
3058 vp->m32 = state->translate[2];
3059
3060 /* guardband in NDC space */
3061 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
3062 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
3063 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
3064 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
3065
3066 /* viewport in screen space */
3067 vp->min_x = scale_x * -1.0f + state->translate[0];
3068 vp->max_x = scale_x * 1.0f + state->translate[0];
3069 vp->min_y = scale_y * -1.0f + state->translate[1];
3070 vp->max_y = scale_y * 1.0f + state->translate[1];
3071 vp->min_z = scale_z * -1.0f + state->translate[2];
3072 vp->max_z = scale_z * 1.0f + state->translate[2];
3073 }
3074
3075 static uint32_t
3076 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
3077 const struct ilo_viewport_cso *viewports,
3078 unsigned num_viewports,
3079 struct ilo_cp *cp)
3080 {
3081 const int state_align = 32 / 4;
3082 const int state_len = 8 * num_viewports;
3083 uint32_t state_offset, *dw;
3084 unsigned i;
3085
3086 ILO_GPE_VALID_GEN(dev, 6, 6);
3087
3088 /*
3089 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3090 *
3091 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3092 * stored as an array of up to 16 elements..."
3093 */
3094 assert(num_viewports && num_viewports <= 16);
3095
3096 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3097 state_len, state_align, &state_offset);
3098
3099 for (i = 0; i < num_viewports; i++) {
3100 const struct ilo_viewport_cso *vp = &viewports[i];
3101
3102 dw[0] = fui(vp->m00);
3103 dw[1] = fui(vp->m11);
3104 dw[2] = fui(vp->m22);
3105 dw[3] = fui(vp->m30);
3106 dw[4] = fui(vp->m31);
3107 dw[5] = fui(vp->m32);
3108 dw[6] = 0;
3109 dw[7] = 0;
3110
3111 dw += 8;
3112 }
3113
3114 return state_offset;
3115 }
3116
3117 static uint32_t
3118 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
3119 const struct ilo_viewport_cso *viewports,
3120 unsigned num_viewports,
3121 struct ilo_cp *cp)
3122 {
3123 const int state_align = 32 / 4;
3124 const int state_len = 4 * num_viewports;
3125 uint32_t state_offset, *dw;
3126 unsigned i;
3127
3128 ILO_GPE_VALID_GEN(dev, 6, 6);
3129
3130 /*
3131 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3132 *
3133 * "The viewport-related state is stored as an array of up to 16
3134 * elements..."
3135 */
3136 assert(num_viewports && num_viewports <= 16);
3137
3138 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3139 state_len, state_align, &state_offset);
3140
3141 for (i = 0; i < num_viewports; i++) {
3142 const struct ilo_viewport_cso *vp = &viewports[i];
3143
3144 dw[0] = fui(vp->min_gbx);
3145 dw[1] = fui(vp->max_gbx);
3146 dw[2] = fui(vp->min_gby);
3147 dw[3] = fui(vp->max_gby);
3148
3149 dw += 4;
3150 }
3151
3152 return state_offset;
3153 }
3154
3155 static uint32_t
3156 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
3157 const struct ilo_viewport_cso *viewports,
3158 unsigned num_viewports,
3159 struct ilo_cp *cp)
3160 {
3161 const int state_align = 32 / 4;
3162 const int state_len = 2 * num_viewports;
3163 uint32_t state_offset, *dw;
3164 unsigned i;
3165
3166 ILO_GPE_VALID_GEN(dev, 6, 7);
3167
3168 /*
3169 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3170 *
3171 * "The viewport state is stored as an array of up to 16 elements..."
3172 */
3173 assert(num_viewports && num_viewports <= 16);
3174
3175 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3176 state_len, state_align, &state_offset);
3177
3178 for (i = 0; i < num_viewports; i++) {
3179 const struct ilo_viewport_cso *vp = &viewports[i];
3180
3181 dw[0] = fui(vp->min_z);
3182 dw[1] = fui(vp->max_z);
3183
3184 dw += 2;
3185 }
3186
3187 return state_offset;
3188 }
3189
3190 static uint32_t
3191 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
3192 const struct pipe_stencil_ref *stencil_ref,
3193 float alpha_ref,
3194 const struct pipe_blend_color *blend_color,
3195 struct ilo_cp *cp)
3196 {
3197 const int state_align = 64 / 4;
3198 const int state_len = 6;
3199 uint32_t state_offset, *dw;
3200
3201 ILO_GPE_VALID_GEN(dev, 6, 7);
3202
3203 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3204 state_len, state_align, &state_offset);
3205
3206 dw[0] = stencil_ref->ref_value[0] << 24 |
3207 stencil_ref->ref_value[1] << 16 |
3208 BRW_ALPHATEST_FORMAT_UNORM8;
3209 dw[1] = float_to_ubyte(alpha_ref);
3210 dw[2] = fui(blend_color->color[0]);
3211 dw[3] = fui(blend_color->color[1]);
3212 dw[4] = fui(blend_color->color[2]);
3213 dw[5] = fui(blend_color->color[3]);
3214
3215 return state_offset;
3216 }
3217
3218 static int
3219 gen6_blend_factor_dst_alpha_forced_one(int factor)
3220 {
3221 switch (factor) {
3222 case BRW_BLENDFACTOR_DST_ALPHA:
3223 return BRW_BLENDFACTOR_ONE;
3224 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3225 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3226 return BRW_BLENDFACTOR_ZERO;
3227 default:
3228 return factor;
3229 }
3230 }
3231
3232 static uint32_t
3233 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
3234 const struct pipe_blend_state *blend,
3235 const struct ilo_fb_state *fb,
3236 const struct pipe_alpha_state *alpha,
3237 struct ilo_cp *cp)
3238 {
3239 const int state_align = 64 / 4;
3240 int state_len;
3241 uint32_t state_offset, *dw;
3242 int num_targets, i;
3243
3244 ILO_GPE_VALID_GEN(dev, 6, 7);
3245
3246 /*
3247 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3248 *
3249 * "The blend state is stored as an array of up to 8 elements..."
3250 */
3251 num_targets = fb->state.nr_cbufs;
3252 assert(num_targets <= 8);
3253
3254 if (!num_targets) {
3255 if (!alpha->enabled)
3256 return 0;
3257 /* to be able to reference alpha func */
3258 num_targets = 1;
3259 }
3260
3261 state_len = 2 * num_targets;
3262
3263 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3264 state_len, state_align, &state_offset);
3265
3266 for (i = 0; i < num_targets; i++) {
3267 const int target = (blend->independent_blend_enable) ? i : 0;
3268 const struct pipe_rt_blend_state *rt = &blend->rt[target];
3269 const int num_samples = fb->num_samples;
3270 const struct util_format_description *format_desc =
3271 (target < fb->state.nr_cbufs) ?
3272 util_format_description(fb->state.cbufs[target]->format) : NULL;
3273 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3274
3275 rt_is_unorm = true;
3276 rt_is_pure_integer = false;
3277 rt_dst_alpha_forced_one = false;
3278
3279 if (format_desc) {
3280 int ch;
3281
3282 switch (format_desc->format) {
3283 case PIPE_FORMAT_B8G8R8X8_UNORM:
3284 /* force alpha to one when the HW format has alpha */
3285 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3286 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3287 rt_dst_alpha_forced_one = true;
3288 break;
3289 default:
3290 break;
3291 }
3292
3293 for (ch = 0; ch < 4; ch++) {
3294 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3295 continue;
3296
3297 if (format_desc->channel[ch].pure_integer) {
3298 rt_is_unorm = false;
3299 rt_is_pure_integer = true;
3300 break;
3301 }
3302
3303 if (!format_desc->channel[ch].normalized ||
3304 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3305 rt_is_unorm = false;
3306 }
3307 }
3308
3309 dw[0] = 0;
3310 dw[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 | 0x3;
3311
3312 /*
3313 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3314 *
3315 * "* Color Buffer Blending and Logic Ops must not be enabled
3316 * simultaneously, or behavior is UNDEFINED.
3317 *
3318 * * Logic Ops are only supported on *_UNORM surfaces (excluding
3319 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3320 *
3321 * Since blend->logicop_enable takes precedence over rt->blend_enable,
3322 * and logicop is ignored for non-UNORM color buffers, no special care
3323 * is needed.
3324 */
3325 if (blend->logicop_enable) {
3326 if (rt_is_unorm) {
3327 dw[1] |= 1 << 22 |
3328 gen6_translate_pipe_logicop(blend->logicop_func) << 18;
3329 }
3330 }
3331 else if (rt->blend_enable && !rt_is_pure_integer) {
3332 int rgb_src, rgb_dst, a_src, a_dst;
3333
3334 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3335 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3336 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3337 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3338
3339 if (rt_dst_alpha_forced_one) {
3340 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3341 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3342 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3343 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3344 }
3345
3346 dw[0] |= 1 << 31 |
3347 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3348 a_src << 20 |
3349 a_dst << 15 |
3350 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3351 rgb_src << 5 |
3352 rgb_dst;
3353
3354 if (rt->rgb_func != rt->alpha_func ||
3355 rgb_src != a_src ||
3356 rgb_dst != a_dst)
3357 dw[0] |= 1 << 30;
3358 }
3359
3360 /*
3361 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3362 *
3363 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3364 * Dither both must be disabled."
3365 *
3366 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3367 * requires that anyway.
3368 */
3369 if (num_samples > 1) {
3370 if (blend->alpha_to_coverage)
3371 dw[1] |= 1 << 31;
3372
3373 if (blend->alpha_to_one) {
3374 const bool dual_blend =
3375 (!blend->logicop_enable && rt->blend_enable &&
3376 util_blend_state_is_dual(blend, target));
3377
3378 /*
3379 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3380 *
3381 * "If Dual Source Blending is enabled, this bit (AlphaToOne
3382 * Enable) must be disabled."
3383 */
3384 if (!dual_blend)
3385 dw[1] |= 1 << 30;
3386 }
3387
3388 if (dev->gen >= ILO_GEN(7))
3389 dw[1] |= 1 << 29;
3390 }
3391
3392 if (!(rt->colormask & PIPE_MASK_A))
3393 dw[1] |= 1 << 27;
3394 if (!(rt->colormask & PIPE_MASK_R))
3395 dw[1] |= 1 << 26;
3396 if (!(rt->colormask & PIPE_MASK_G))
3397 dw[1] |= 1 << 25;
3398 if (!(rt->colormask & PIPE_MASK_B))
3399 dw[1] |= 1 << 24;
3400
3401 /*
3402 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3403 *
3404 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3405 * alpha value."
3406 */
3407 if (alpha->enabled && !rt_is_pure_integer) {
3408 dw[1] |= 1 << 16 |
3409 gen6_translate_dsa_func(alpha->func) << 13;
3410 }
3411
3412 if (blend->dither)
3413 dw[1] |= 1 << 12;
3414
3415 dw += 2;
3416 }
3417
3418 return state_offset;
3419 }
3420
3421 static uint32_t
3422 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
3423 const struct pipe_depth_stencil_alpha_state *dsa,
3424 struct ilo_cp *cp)
3425 {
3426 const struct pipe_depth_state *depth = &dsa->depth;
3427 const struct pipe_stencil_state *stencil0 = &dsa->stencil[0];
3428 const struct pipe_stencil_state *stencil1 = &dsa->stencil[1];
3429 const int state_align = 64 / 4;
3430 const int state_len = 3;
3431 uint32_t state_offset, *dw;
3432
3433 ILO_GPE_VALID_GEN(dev, 6, 7);
3434
3435 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3436 state_len, state_align, &state_offset);
3437
3438 /*
3439 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3440 *
3441 * "If the Depth Buffer is either undefined or does not have a surface
3442 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3443 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3444 *
3445 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3446 *
3447 * "This field (Stencil Test Enable) cannot be enabled if
3448 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3449 *
3450 * TODO We do not check these yet.
3451 */
3452 if (stencil0->enabled) {
3453 dw[0] = 1 << 31 |
3454 gen6_translate_dsa_func(stencil0->func) << 28 |
3455 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
3456 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
3457 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
3458 if (stencil0->writemask)
3459 dw[0] |= 1 << 18;
3460
3461 dw[1] = stencil0->valuemask << 24 |
3462 stencil0->writemask << 16;
3463
3464 if (stencil1->enabled) {
3465 dw[0] |= 1 << 15 |
3466 gen6_translate_dsa_func(stencil1->func) << 12 |
3467 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
3468 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
3469 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
3470 if (stencil1->writemask)
3471 dw[0] |= 1 << 18;
3472
3473 dw[1] |= stencil1->valuemask << 8 |
3474 stencil1->writemask;
3475 }
3476 }
3477 else {
3478 dw[0] = 0;
3479 dw[1] = 0;
3480 }
3481
3482 /*
3483 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3484 *
3485 * "Enabling the Depth Test function without defining a Depth Buffer is
3486 * UNDEFINED."
3487 *
3488 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3489 *
3490 * "A Depth Buffer must be defined before enabling writes to it, or
3491 * operation is UNDEFINED."
3492 *
3493 * TODO We do not check these yet.
3494 */
3495 dw[2] = depth->enabled << 31 |
3496 depth->writemask << 26;
3497 if (depth->enabled)
3498 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
3499 else
3500 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3501
3502 return state_offset;
3503 }
3504
3505 void
3506 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
3507 unsigned start_slot,
3508 unsigned num_states,
3509 const struct pipe_scissor_state *states,
3510 struct ilo_scissor_state *scissor)
3511 {
3512 unsigned i;
3513
3514 ILO_GPE_VALID_GEN(dev, 6, 7);
3515
3516 for (i = 0; i < num_states; i++) {
3517 uint16_t min_x, min_y, max_x, max_y;
3518
3519 /* both max and min are inclusive in SCISSOR_RECT */
3520 if (states[i].minx < states[i].maxx &&
3521 states[i].miny < states[i].maxy) {
3522 min_x = states[i].minx;
3523 min_y = states[i].miny;
3524 max_x = states[i].maxx - 1;
3525 max_y = states[i].maxy - 1;
3526 }
3527 else {
3528 /* we have to make min greater than max */
3529 min_x = 1;
3530 min_y = 1;
3531 max_x = 0;
3532 max_y = 0;
3533 }
3534
3535 scissor->payload[start_slot * 2 + 0] = min_y << 16 | min_x;
3536 scissor->payload[start_slot * 2 + 1] = max_y << 16 | max_x;
3537 start_slot++;
3538 }
3539 }
3540
3541 void
3542 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
3543 struct ilo_scissor_state *scissor)
3544 {
3545 unsigned i;
3546
3547 for (i = 0; i < Elements(scissor->payload); i += 2) {
3548 scissor->payload[i + 0] = 1 << 16 | 1;
3549 scissor->payload[i + 1] = 0;
3550 }
3551 }
3552
3553 static uint32_t
3554 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
3555 const struct ilo_scissor_state *scissor,
3556 unsigned num_viewports,
3557 struct ilo_cp *cp)
3558 {
3559 const int state_align = 32 / 4;
3560 const int state_len = 2 * num_viewports;
3561 uint32_t state_offset, *dw;
3562
3563 ILO_GPE_VALID_GEN(dev, 6, 7);
3564
3565 /*
3566 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3567 *
3568 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3569 * stored as an array of up to 16 elements..."
3570 */
3571 assert(num_viewports && num_viewports <= 16);
3572
3573 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3574 state_len, state_align, &state_offset);
3575
3576 memcpy(dw, scissor->payload, state_len * 4);
3577
3578 return state_offset;
3579 }
3580
3581 static uint32_t
3582 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
3583 uint32_t *surface_states,
3584 int num_surface_states,
3585 struct ilo_cp *cp)
3586 {
3587 const int state_align = 32 / 4;
3588 const int state_len = num_surface_states;
3589 uint32_t state_offset, *dw;
3590
3591 ILO_GPE_VALID_GEN(dev, 6, 7);
3592
3593 /*
3594 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3595 *
3596 * "It is stored as an array of up to 256 elements..."
3597 */
3598 assert(num_surface_states <= 256);
3599
3600 if (!num_surface_states)
3601 return 0;
3602
3603 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3604 state_len, state_align, &state_offset);
3605 memcpy(dw, surface_states,
3606 num_surface_states * sizeof(surface_states[0]));
3607
3608 return state_offset;
3609 }
3610
3611 static void
3612 gen6_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
3613 unsigned width, unsigned height,
3614 unsigned depth, unsigned lod,
3615 uint32_t *dw, int num_dwords)
3616 {
3617 ILO_GPE_VALID_GEN(dev, 6, 6);
3618 assert(num_dwords == 6);
3619
3620 /*
3621 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3622 *
3623 * "A null surface will be used in instances where an actual surface is
3624 * not bound. When a write message is generated to a null surface, no
3625 * actual surface is written to. When a read message (including any
3626 * sampling engine message) is generated to a null surface, the result
3627 * is all zeros. Note that a null surface type is allowed to be used
3628 * with all messages, even if it is not specificially indicated as
3629 * supported. All of the remaining fields in surface state are ignored
3630 * for null surfaces, with the following exceptions:
3631 *
3632 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3633 * depth buffer's corresponding state for all render target
3634 * surfaces, including null.
3635 * * Surface Format must be R8G8B8A8_UNORM."
3636 *
3637 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3638 *
3639 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3640 * true"
3641 */
3642
3643 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3644 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3645
3646 dw[1] = 0;
3647
3648 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3649 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3650 lod << BRW_SURFACE_LOD_SHIFT;
3651
3652 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3653 BRW_SURFACE_TILED;
3654
3655 dw[4] = 0;
3656 dw[5] = 0;
3657 }
3658
3659 static void
3660 gen6_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
3661 const struct ilo_buffer *buf,
3662 unsigned offset, unsigned size,
3663 unsigned struct_size,
3664 enum pipe_format elem_format,
3665 bool is_rt, bool render_cache_rw,
3666 uint32_t *dw, int num_dwords)
3667 {
3668 const int elem_size = util_format_get_blocksize(elem_format);
3669 int width, height, depth, pitch;
3670 int surface_format, num_entries;
3671
3672 ILO_GPE_VALID_GEN(dev, 6, 6);
3673 assert(num_dwords == 6);
3674
3675 /*
3676 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3677 * structure in a buffer.
3678 */
3679
3680 surface_format = ilo_translate_color_format(elem_format);
3681
3682 num_entries = size / struct_size;
3683 /* see if there is enough space to fit another element */
3684 if (size % struct_size >= elem_size)
3685 num_entries++;
3686
3687 /*
3688 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3689 *
3690 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3691 * Address) specifies the base address of first element of the
3692 * surface. The surface is interpreted as a simple array of that
3693 * single element type. The address must be naturally-aligned to the
3694 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3695 * must be 16-byte aligned).
3696 *
3697 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3698 * the base address of the first element of the surface, computed in
3699 * software by adding the surface base address to the byte offset of
3700 * the element in the buffer."
3701 */
3702 if (is_rt)
3703 assert(offset % elem_size == 0);
3704
3705 /*
3706 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3707 *
3708 * "For buffer surfaces, the number of entries in the buffer ranges
3709 * from 1 to 2^27."
3710 */
3711 assert(num_entries >= 1 && num_entries <= 1 << 27);
3712
3713 /*
3714 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3715 *
3716 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3717 * indicates the size of the structure."
3718 */
3719 pitch = struct_size;
3720
3721 pitch--;
3722 num_entries--;
3723 /* bits [6:0] */
3724 width = (num_entries & 0x0000007f);
3725 /* bits [19:7] */
3726 height = (num_entries & 0x000fff80) >> 7;
3727 /* bits [26:20] */
3728 depth = (num_entries & 0x07f00000) >> 20;
3729
3730 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
3731 surface_format << BRW_SURFACE_FORMAT_SHIFT;
3732 if (render_cache_rw)
3733 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3734
3735 dw[1] = offset;
3736
3737 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
3738 width << BRW_SURFACE_WIDTH_SHIFT;
3739
3740 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
3741 pitch << BRW_SURFACE_PITCH_SHIFT;
3742
3743 dw[4] = 0;
3744 dw[5] = 0;
3745 }
3746
3747 static void
3748 gen6_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
3749 struct ilo_texture *tex,
3750 enum pipe_format format,
3751 unsigned first_level, unsigned num_levels,
3752 unsigned first_layer, unsigned num_layers,
3753 bool is_rt, bool render_cache_rw,
3754 uint32_t *dw, int num_dwords)
3755 {
3756 int surface_type, surface_format;
3757 int width, height, depth, pitch, lod;
3758 unsigned layer_offset, x_offset, y_offset;
3759
3760 ILO_GPE_VALID_GEN(dev, 6, 6);
3761 assert(num_dwords == 6);
3762
3763 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
3764 assert(surface_type != BRW_SURFACE_BUFFER);
3765
3766 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
3767 format = PIPE_FORMAT_Z32_FLOAT;
3768
3769 if (is_rt)
3770 surface_format = ilo_translate_render_format(format);
3771 else
3772 surface_format = ilo_translate_texture_format(format);
3773 assert(surface_format >= 0);
3774
3775 width = tex->base.width0;
3776 height = tex->base.height0;
3777 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
3778 tex->base.depth0 : num_layers;
3779 pitch = tex->bo_stride;
3780
3781 if (surface_type == BRW_SURFACE_CUBE) {
3782 /*
3783 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3784 *
3785 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
3786 * range of this field (Depth) is [0,84], indicating the number of
3787 * cube array elements (equal to the number of underlying 2D array
3788 * elements divided by 6). For other surfaces, this field must be
3789 * zero."
3790 *
3791 * When is_rt is true, we treat the texture as a 2D one to avoid the
3792 * restriction.
3793 */
3794 if (is_rt) {
3795 surface_type = BRW_SURFACE_2D;
3796 }
3797 else {
3798 assert(num_layers % 6 == 0);
3799 depth = num_layers / 6;
3800 }
3801 }
3802
3803 /* sanity check the size */
3804 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
3805 switch (surface_type) {
3806 case BRW_SURFACE_1D:
3807 assert(width <= 8192 && height == 1 && depth <= 512);
3808 assert(first_layer < 512 && num_layers <= 512);
3809 break;
3810 case BRW_SURFACE_2D:
3811 assert(width <= 8192 && height <= 8192 && depth <= 512);
3812 assert(first_layer < 512 && num_layers <= 512);
3813 break;
3814 case BRW_SURFACE_3D:
3815 assert(width <= 2048 && height <= 2048 && depth <= 2048);
3816 assert(first_layer < 2048 && num_layers <= 512);
3817 if (!is_rt)
3818 assert(first_layer == 0);
3819 break;
3820 case BRW_SURFACE_CUBE:
3821 assert(width <= 8192 && height <= 8192 && depth <= 85);
3822 assert(width == height);
3823 assert(first_layer < 512 && num_layers <= 512);
3824 if (is_rt)
3825 assert(first_layer == 0);
3826 break;
3827 default:
3828 assert(!"unexpected surface type");
3829 break;
3830 }
3831
3832 /* non-full array spacing is supported only on GEN7+ */
3833 assert(tex->array_spacing_full);
3834 /* non-interleaved samples are supported only on GEN7+ */
3835 if (tex->base.nr_samples > 1)
3836 assert(tex->interleaved);
3837
3838 if (is_rt) {
3839 /*
3840 * Compute the offset to the layer manually.
3841 *
3842 * For rendering, the hardware requires LOD to be the same for all
3843 * render targets and the depth buffer. We need to compute the offset
3844 * to the layer manually and always set LOD to 0.
3845 */
3846 if (true) {
3847 /* we lose the capability for layered rendering */
3848 assert(num_layers == 1);
3849
3850 layer_offset = ilo_texture_get_slice_offset(tex,
3851 first_level, first_layer, &x_offset, &y_offset);
3852
3853 assert(x_offset % 4 == 0);
3854 assert(y_offset % 2 == 0);
3855 x_offset /= 4;
3856 y_offset /= 2;
3857
3858 /* derive the size for the LOD */
3859 width = u_minify(width, first_level);
3860 height = u_minify(height, first_level);
3861 if (surface_type == BRW_SURFACE_3D)
3862 depth = u_minify(depth, first_level);
3863 else
3864 depth = 1;
3865
3866 first_level = 0;
3867 first_layer = 0;
3868 lod = 0;
3869 }
3870 else {
3871 layer_offset = 0;
3872 x_offset = 0;
3873 y_offset = 0;
3874 }
3875
3876 assert(num_levels == 1);
3877 lod = first_level;
3878 }
3879 else {
3880 layer_offset = 0;
3881 x_offset = 0;
3882 y_offset = 0;
3883
3884 lod = num_levels - 1;
3885 }
3886
3887 /*
3888 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3889 *
3890 * "Linear render target surface base addresses must be element-size
3891 * aligned, for non-YUV surface formats, or a multiple of 2
3892 * element-sizes for YUV surface formats. Other linear surfaces have
3893 * no alignment requirements (byte alignment is sufficient.)"
3894 *
3895 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3896 *
3897 * "For linear render target surfaces, the pitch must be a multiple
3898 * of the element size for non-YUV surface formats. Pitch must be a
3899 * multiple of 2 * element size for YUV surface formats."
3900 *
3901 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
3902 *
3903 * "For linear surfaces, this field (X Offset) must be zero"
3904 */
3905 if (tex->tiling == INTEL_TILING_NONE) {
3906 if (is_rt) {
3907 const int elem_size = util_format_get_blocksize(format);
3908 assert(layer_offset % elem_size == 0);
3909 assert(pitch % elem_size == 0);
3910 }
3911
3912 assert(!x_offset);
3913 }
3914
3915 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
3916 surface_format << BRW_SURFACE_FORMAT_SHIFT |
3917 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
3918
3919 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
3920 dw[0] |= 1 << 9 |
3921 BRW_SURFACE_CUBEFACE_ENABLES;
3922 }
3923
3924 if (render_cache_rw)
3925 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3926
3927 dw[1] = layer_offset;
3928
3929 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3930 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3931 lod << BRW_SURFACE_LOD_SHIFT;
3932
3933 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3934 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
3935 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
3936
3937 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
3938 first_layer << 17 |
3939 (num_layers - 1) << 8 |
3940 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
3941 BRW_SURFACE_MULTISAMPLECOUNT_1);
3942
3943 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
3944 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
3945 if (tex->valign_4)
3946 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
3947 }
3948
3949 static uint32_t
3950 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
3951 struct intel_bo *bo, bool for_render,
3952 const uint32_t *dw, int num_dwords,
3953 struct ilo_cp *cp)
3954 {
3955 const int state_align = 32 / 4;
3956 const int state_len = 6;
3957 uint32_t state_offset;
3958 uint32_t read_domains, write_domain;
3959
3960 ILO_GPE_VALID_GEN(dev, 6, 6);
3961 assert(num_dwords == state_len);
3962
3963 if (for_render) {
3964 read_domains = INTEL_DOMAIN_RENDER;
3965 write_domain = INTEL_DOMAIN_RENDER;
3966 }
3967 else {
3968 read_domains = INTEL_DOMAIN_SAMPLER;
3969 write_domain = 0;
3970 }
3971
3972 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
3973 ilo_cp_write(cp, dw[0]);
3974 ilo_cp_write_bo(cp, dw[1], bo, read_domains, write_domain);
3975 ilo_cp_write(cp, dw[2]);
3976 ilo_cp_write(cp, dw[3]);
3977 ilo_cp_write(cp, dw[4]);
3978 ilo_cp_write(cp, dw[5]);
3979 ilo_cp_end(cp);
3980
3981 return state_offset;
3982 }
3983
3984 static uint32_t
3985 gen6_emit_surf_SURFACE_STATE(const struct ilo_dev_info *dev,
3986 const struct pipe_surface *surface,
3987 struct ilo_cp *cp)
3988 {
3989 struct intel_bo *bo;
3990 uint32_t dw[6];
3991
3992 ILO_GPE_VALID_GEN(dev, 6, 6);
3993
3994 if (surface && surface->texture) {
3995 struct ilo_texture *tex = ilo_texture(surface->texture);
3996
3997 bo = tex->bo;
3998
3999 /*
4000 * classic i965 sets render_cache_rw for constant buffers and sol
4001 * surfaces but not render buffers. Why?
4002 */
4003 gen6_fill_normal_SURFACE_STATE(dev, tex, surface->format,
4004 surface->u.tex.level, 1,
4005 surface->u.tex.first_layer,
4006 surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
4007 true, true, dw, Elements(dw));
4008 }
4009 else {
4010 bo = NULL;
4011 gen6_fill_null_SURFACE_STATE(dev,
4012 surface->width, surface->height, 1, 0, dw, Elements(dw));
4013 }
4014
4015 return gen6_emit_SURFACE_STATE(dev, bo, true, dw, Elements(dw), cp);
4016 }
4017
4018 static uint32_t
4019 gen6_emit_view_SURFACE_STATE(const struct ilo_dev_info *dev,
4020 const struct pipe_sampler_view *view,
4021 struct ilo_cp *cp)
4022 {
4023 struct intel_bo *bo;
4024 uint32_t dw[6];
4025
4026 ILO_GPE_VALID_GEN(dev, 6, 6);
4027
4028 if (view->texture->target == PIPE_BUFFER) {
4029 const unsigned elem_size = util_format_get_blocksize(view->format);
4030 const unsigned first_elem = view->u.buf.first_element;
4031 const unsigned num_elems = view->u.buf.last_element - first_elem + 1;
4032 struct ilo_buffer *buf = ilo_buffer(view->texture);
4033
4034 gen6_fill_buffer_SURFACE_STATE(dev, buf,
4035 first_elem * elem_size, num_elems * elem_size,
4036 elem_size, view->format, false, false, dw, Elements(dw));
4037
4038 bo = buf->bo;
4039 }
4040 else {
4041 struct ilo_texture *tex = ilo_texture(view->texture);
4042
4043 gen6_fill_normal_SURFACE_STATE(dev, tex, view->format,
4044 view->u.tex.first_level,
4045 view->u.tex.last_level - view->u.tex.first_level + 1,
4046 view->u.tex.first_layer,
4047 view->u.tex.last_layer - view->u.tex.first_layer + 1,
4048 false, false, dw, Elements(dw));
4049
4050 bo = tex->bo;
4051 }
4052
4053 return gen6_emit_SURFACE_STATE(dev, bo, false, dw, Elements(dw), cp);
4054 }
4055
4056 static uint32_t
4057 gen6_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev,
4058 const struct pipe_constant_buffer *cbuf,
4059 struct ilo_cp *cp)
4060 {
4061 const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4062 struct ilo_buffer *buf = ilo_buffer(cbuf->buffer);
4063 uint32_t dw[6];
4064
4065 ILO_GPE_VALID_GEN(dev, 6, 6);
4066
4067 gen6_fill_buffer_SURFACE_STATE(dev, buf,
4068 cbuf->buffer_offset, cbuf->buffer_size,
4069 util_format_get_blocksize(elem_format), elem_format,
4070 false, false, dw, Elements(dw));
4071
4072 return gen6_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp);
4073 }
4074
4075 static uint32_t
4076 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
4077 const struct pipe_stream_output_target *so,
4078 const struct pipe_stream_output_info *so_info,
4079 int so_index,
4080 struct ilo_cp *cp)
4081 {
4082 struct ilo_buffer *buf = ilo_buffer(so->buffer);
4083 unsigned bo_offset, struct_size;
4084 enum pipe_format elem_format;
4085 uint32_t dw[6];
4086
4087 ILO_GPE_VALID_GEN(dev, 6, 6);
4088
4089 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
4090 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
4091
4092 switch (so_info->output[so_index].num_components) {
4093 case 1:
4094 elem_format = PIPE_FORMAT_R32_FLOAT;
4095 break;
4096 case 2:
4097 elem_format = PIPE_FORMAT_R32G32_FLOAT;
4098 break;
4099 case 3:
4100 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
4101 break;
4102 case 4:
4103 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4104 break;
4105 default:
4106 assert(!"unexpected SO components length");
4107 elem_format = PIPE_FORMAT_R32_FLOAT;
4108 break;
4109 }
4110
4111 gen6_fill_buffer_SURFACE_STATE(dev, buf, bo_offset, so->buffer_size,
4112 struct_size, elem_format, false, true, dw, Elements(dw));
4113
4114 return gen6_emit_SURFACE_STATE(dev, buf->bo, false, dw, Elements(dw), cp);
4115 }
4116
4117 static uint32_t
4118 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
4119 const struct pipe_sampler_state **samplers,
4120 const struct pipe_sampler_view **sampler_views,
4121 const uint32_t *sampler_border_colors,
4122 int num_samplers,
4123 struct ilo_cp *cp)
4124 {
4125 const int state_align = 32 / 4;
4126 const int state_len = 4 * num_samplers;
4127 uint32_t state_offset, *dw;
4128 int i;
4129
4130 ILO_GPE_VALID_GEN(dev, 6, 7);
4131
4132 /*
4133 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4134 *
4135 * "The sampler state is stored as an array of up to 16 elements..."
4136 */
4137 assert(num_samplers <= 16);
4138
4139 if (!num_samplers)
4140 return 0;
4141
4142 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
4143 state_len, state_align, &state_offset);
4144
4145 for (i = 0; i < num_samplers; i++) {
4146 const struct pipe_sampler_state *sampler = samplers[i];
4147 const struct pipe_sampler_view *view = sampler_views[i];
4148 const uint32_t border_color = sampler_border_colors[i];
4149 enum pipe_texture_target target;
4150 int mip_filter, min_filter, mag_filter, max_aniso;
4151 int lod_bias, max_lod, min_lod, base_level;
4152 int wrap_s, wrap_t, wrap_r;
4153 bool clamp_to_edge;
4154
4155 /* there may be holes */
4156 if (!sampler || !view) {
4157 /* disabled sampler */
4158 dw[0] = 1 << 31;
4159 dw[1] = 0;
4160 dw[2] = 0;
4161 dw[3] = 0;
4162 dw += 4;
4163
4164 continue;
4165 }
4166
4167 target = view->texture->target;
4168
4169 /* determine mip/min/mag filters */
4170 mip_filter = gen6_translate_tex_mipfilter(sampler->min_mip_filter);
4171
4172 /*
4173 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4174 *
4175 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4176 * surfaces of type SURFTYPE_3D."
4177 */
4178 if (sampler->max_anisotropy && target != PIPE_TEXTURE_3D) {
4179 min_filter = BRW_MAPFILTER_ANISOTROPIC;
4180 mag_filter = BRW_MAPFILTER_ANISOTROPIC;
4181
4182 if (sampler->max_anisotropy >= 2 && sampler->max_anisotropy <= 16)
4183 max_aniso = sampler->max_anisotropy / 2 - 1;
4184 else if (sampler->max_anisotropy > 16)
4185 max_aniso = BRW_ANISORATIO_16;
4186 else
4187 max_aniso = BRW_ANISORATIO_2;
4188 }
4189 else {
4190 min_filter = gen6_translate_tex_filter(sampler->min_img_filter);
4191 mag_filter = gen6_translate_tex_filter(sampler->mag_img_filter);
4192
4193 /* ignored */
4194 max_aniso = 0;
4195 }
4196
4197 /*
4198 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4199 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
4200 * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
4201 * additionally clamping the texture coordinates to [0.0, 1.0].
4202 *
4203 * The clamping is taken care of in the shaders. There are two filters
4204 * here, but let the minification one has a say.
4205 */
4206 clamp_to_edge = (sampler->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4207
4208 switch (target) {
4209 case PIPE_TEXTURE_CUBE:
4210 /*
4211 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4212 *
4213 * "When using cube map texture coordinates, only
4214 * TEXCOORDMODE_CLAMP and TEXCOORDMODE_CUBE settings are valid,
4215 * and each TC component must have the same Address Control
4216 * mode."
4217 *
4218 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4219 *
4220 * "This field (Cube Surface Control Mode) must be set to
4221 * CUBECTRLMODE_PROGRAMMED"
4222 *
4223 * Therefore, we cannot use "Cube Surface Control Mode" for semless
4224 * cube map filtering.
4225 */
4226 if (sampler->seamless_cube_map &&
4227 (sampler->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4228 sampler->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4229 wrap_s = BRW_TEXCOORDMODE_CUBE;
4230 wrap_t = BRW_TEXCOORDMODE_CUBE;
4231 wrap_r = BRW_TEXCOORDMODE_CUBE;
4232 }
4233 else {
4234 wrap_s = BRW_TEXCOORDMODE_CLAMP;
4235 wrap_t = BRW_TEXCOORDMODE_CLAMP;
4236 wrap_r = BRW_TEXCOORDMODE_CLAMP;
4237 }
4238 break;
4239 case PIPE_TEXTURE_1D:
4240 wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge);
4241 /*
4242 * as noted in the classic i965 driver, the HW may look at these
4243 * values so we need to set them to a safe mode
4244 */
4245 wrap_t = BRW_TEXCOORDMODE_WRAP;
4246 wrap_r = BRW_TEXCOORDMODE_WRAP;
4247 break;
4248 default:
4249 wrap_s = gen6_translate_tex_wrap(sampler->wrap_s, clamp_to_edge);
4250 wrap_t = gen6_translate_tex_wrap(sampler->wrap_t, clamp_to_edge);
4251 wrap_r = gen6_translate_tex_wrap(sampler->wrap_r, clamp_to_edge);
4252 break;
4253 }
4254
4255 /*
4256 * Here is how the hardware calculate per-pixel LOD, from my reading of
4257 * the PRMs:
4258 *
4259 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4260 * other ways. The number of texels is measured using level
4261 * SurfMinLod.
4262 * 2) Bias is added to LOD.
4263 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4264 * compared with Base to determine whether magnification or
4265 * minification is needed.
4266 * (if preclamp is disabled, LOD is compared with Base before
4267 * clamping)
4268 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4269 * set to floor(MinLod).
4270 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4271 *
4272 * With Gallium interface, Base is always zero and view->u.tex.first_level
4273 * specifies SurfMinLod.
4274 *
4275 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4276 *
4277 * "[DevSNB] Errata: Incorrect behavior is observed in cases where
4278 * the min and mag mode filters are different and SurfMinLOD is
4279 * nonzero. The determination of MagMode uses the following equation
4280 * instead of the one in the above pseudocode: MagMode = (LOD +
4281 * SurfMinLOD - Base <= 0)"
4282 *
4283 * As a way to work around that, we set Base to view->u.tex.first_level
4284 * on GEN6.
4285 */
4286 if (dev->gen >= ILO_GEN(7)) {
4287 const float scale = 256.0f;
4288
4289 /* [-16.0, 16.0) in S4.8 */
4290 lod_bias = (int)
4291 (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale);
4292 lod_bias &= 0x1fff;
4293
4294 base_level = 0;
4295
4296 /* [0.0, 14.0] in U4.8 */
4297 max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 14.0f) * scale);
4298 min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 14.0f) * scale);
4299 }
4300 else {
4301 const float scale = 64.0f;
4302
4303 /* [-16.0, 16.0) in S4.6 */
4304 lod_bias = (int)
4305 (CLAMP(sampler->lod_bias, -16.0f, 15.9f) * scale);
4306 lod_bias &= 0x7ff;
4307
4308 base_level = view->u.tex.first_level;
4309
4310 /* [0.0, 13.0] in U4.6 */
4311 max_lod = (int) (CLAMP(sampler->max_lod, 0.0f, 13.0f) * scale);
4312 min_lod = (int) (CLAMP(sampler->min_lod, 0.0f, 13.0f) * scale);
4313 }
4314
4315 /*
4316 * We want LOD to be clamped to determine magnification/minification,
4317 * and get set to zero when it is magnification or when mipmapping is
4318 * disabled. The hardware would set LOD to floor(MinLod) and that is a
4319 * problem when MinLod is greater than or equal to 1.0f.
4320 *
4321 * We know that with Base being zero, it is always minification when
4322 * MinLod is non-zero. To meet our need, we just need to set MinLod to
4323 * zero and set MagFilter to MinFilter when mipmapping is disabled.
4324 */
4325 if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4326 min_lod = 0;
4327 mag_filter = min_filter;
4328 }
4329
4330 if (!sampler->normalized_coords) {
4331 /* work around a bug in util_blitter */
4332 mip_filter = BRW_MIPFILTER_NONE;
4333
4334 /*
4335 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4336 *
4337 * "The following state must be set as indicated if this field
4338 * (Non-normalized Coordinate Enable) is enabled:
4339 *
4340 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4341 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4342 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4343 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4344 * MAPFILTER_LINEAR.
4345 * - Min Mode Filter must be MAPFILTER_NEAREST or
4346 * MAPFILTER_LINEAR.
4347 * - Mip Mode Filter must be MIPFILTER_NONE.
4348 * - Min LOD must be 0.
4349 * - Max LOD must be 0.
4350 * - MIP Count must be 0.
4351 * - Surface Min LOD must be 0.
4352 * - Texture LOD Bias must be 0."
4353 */
4354 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4355 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4356 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4357 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4358 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4359 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4360
4361 assert(target == PIPE_TEXTURE_RECT);
4362
4363 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4364 mag_filter == BRW_MAPFILTER_LINEAR);
4365 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4366 min_filter == BRW_MAPFILTER_LINEAR);
4367 assert(mip_filter == BRW_MIPFILTER_NONE);
4368 }
4369
4370 if (dev->gen >= ILO_GEN(7)) {
4371 dw[0] = 1 << 28 |
4372 base_level << 22 |
4373 mip_filter << 20 |
4374 mag_filter << 17 |
4375 min_filter << 14 |
4376 lod_bias << 1;
4377
4378 /* enable EWA filtering unconditionally breaks some piglit tests */
4379 if (sampler->max_anisotropy)
4380 dw[0] |= 1;
4381
4382 dw[1] = min_lod << 20 |
4383 max_lod << 8;
4384
4385 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
4386 dw[1] |= gen6_translate_shadow_func(sampler->compare_func) << 1;
4387
4388 assert(!(border_color & 0x1f));
4389 dw[2] = border_color;
4390
4391 dw[3] = max_aniso << 19 |
4392 wrap_s << 6 |
4393 wrap_t << 3 |
4394 wrap_r;
4395
4396 /* round the coordinates for linear filtering */
4397 if (min_filter != BRW_MAPFILTER_NEAREST) {
4398 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4399 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4400 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4401 }
4402 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4403 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4404 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4405 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4406 }
4407
4408 if (!sampler->normalized_coords)
4409 dw[3] |= 1 << 10;
4410 }
4411 else {
4412 dw[0] = 1 << 28 |
4413 (min_filter != mag_filter) << 27 |
4414 base_level << 22 |
4415 mip_filter << 20 |
4416 mag_filter << 17 |
4417 min_filter << 14 |
4418 lod_bias << 3;
4419
4420 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE)
4421 dw[0] |= gen6_translate_shadow_func(sampler->compare_func);
4422
4423 dw[1] = min_lod << 22 |
4424 max_lod << 12 |
4425 wrap_s << 6 |
4426 wrap_t << 3 |
4427 wrap_r;
4428
4429 assert(!(border_color & 0x1f));
4430 dw[2] = border_color;
4431
4432 dw[3] = max_aniso << 19;
4433
4434 /* round the coordinates for linear filtering */
4435 if (min_filter != BRW_MAPFILTER_NEAREST) {
4436 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4437 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4438 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4439 }
4440 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4441 dw[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4442 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4443 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4444 }
4445
4446 if (!sampler->normalized_coords)
4447 dw[3] |= 1;
4448 }
4449
4450 dw += 4;
4451 }
4452
4453 return state_offset;
4454 }
4455
4456 static uint32_t
4457 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
4458 const union pipe_color_union *color,
4459 struct ilo_cp *cp)
4460 {
4461 const int state_align = 32 / 4;
4462 const int state_len = 12;
4463 uint32_t state_offset, *dw;
4464 float rgba[4] = {
4465 color->f[0], color->f[1], color->f[2], color->f[3],
4466 };
4467
4468 ILO_GPE_VALID_GEN(dev, 6, 6);
4469
4470 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4471 state_len, state_align, &state_offset);
4472
4473 /*
4474 * This state is not documented in the Sandy Bridge PRM, but in the
4475 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4476 */
4477
4478 /* IEEE_FP */
4479 dw[1] = fui(rgba[0]);
4480 dw[2] = fui(rgba[1]);
4481 dw[3] = fui(rgba[2]);
4482 dw[4] = fui(rgba[3]);
4483
4484 /* FLOAT_16 */
4485 dw[5] = util_float_to_half(rgba[0]) |
4486 util_float_to_half(rgba[1]) << 16;
4487 dw[6] = util_float_to_half(rgba[2]) |
4488 util_float_to_half(rgba[3]) << 16;
4489
4490 /* clamp to [-1.0f, 1.0f] */
4491 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4492 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4493 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4494 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4495
4496 /* SNORM16 */
4497 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4498 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4499 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4500 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4501
4502 /* SNORM8 */
4503 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4504 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4505 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4506 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4507
4508 /* clamp to [0.0f, 1.0f] */
4509 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4510 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4511 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4512 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4513
4514 /* UNORM8 */
4515 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4516 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4517 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4518 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4519
4520 /* UNORM16 */
4521 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4522 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4523 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4524 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4525
4526 return state_offset;
4527 }
4528
4529 static uint32_t
4530 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
4531 int size, void **pcb,
4532 struct ilo_cp *cp)
4533 {
4534 /*
4535 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4536 * to 32 bytes, and their sizes are specified in 256-bit units.
4537 */
4538 const int state_align = 32 / 4;
4539 const int state_len = align(size, 32) / 4;
4540 uint32_t state_offset;
4541 char *buf;
4542
4543 ILO_GPE_VALID_GEN(dev, 6, 7);
4544
4545 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4546 state_len, state_align, &state_offset);
4547
4548 /* zero out the unused range */
4549 if (size < state_len * 4)
4550 memset(&buf[size], 0, state_len * 4 - size);
4551
4552 if (pcb)
4553 *pcb = buf;
4554
4555 return state_offset;
4556 }
4557
4558 static int
4559 gen6_estimate_command_size(const struct ilo_dev_info *dev,
4560 enum ilo_gpe_gen6_command cmd,
4561 int arg)
4562 {
4563 static const struct {
4564 int header;
4565 int body;
4566 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4567 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4568 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4569 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4570 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4571 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4572 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4573 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4574 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4575 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4576 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4577 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4578 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4579 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4580 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4581 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4582 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4583 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4584 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4585 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4586 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4587 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4588 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4589 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4590 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4591 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4592 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4593 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4594 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4595 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4596 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4597 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4598 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4599 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4600 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4601 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4602 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4603 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4604 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4605 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4606 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4607 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4608 };
4609 const int header = gen6_command_size_table[cmd].header;
4610 const int body = gen6_command_size_table[arg].body;
4611 const int count = arg;
4612
4613 ILO_GPE_VALID_GEN(dev, 6, 6);
4614 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4615
4616 return (likely(count)) ? header + body * count : 0;
4617 }
4618
4619 static int
4620 gen6_estimate_state_size(const struct ilo_dev_info *dev,
4621 enum ilo_gpe_gen6_state state,
4622 int arg)
4623 {
4624 static const struct {
4625 int alignment;
4626 int body;
4627 bool is_array;
4628 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4629 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4630 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4631 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4632 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4633 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4634 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4635 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4636 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4637 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4638 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4639 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4640 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4641 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4642 };
4643 const int alignment = gen6_state_size_table[state].alignment;
4644 const int body = gen6_state_size_table[state].body;
4645 const bool is_array = gen6_state_size_table[state].is_array;
4646 const int count = arg;
4647 int estimate;
4648
4649 ILO_GPE_VALID_GEN(dev, 6, 6);
4650 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4651
4652 if (likely(count)) {
4653 if (is_array) {
4654 estimate = (alignment - 1) + body * count;
4655 }
4656 else {
4657 estimate = (alignment - 1) + body;
4658 /* all states are aligned */
4659 if (count > 1)
4660 estimate += util_align_npot(body, alignment) * (count - 1);
4661 }
4662 }
4663 else {
4664 estimate = 0;
4665 }
4666
4667 return estimate;
4668 }
4669
4670 static const struct ilo_gpe_gen6 gen6_gpe = {
4671 .estimate_command_size = gen6_estimate_command_size,
4672 .estimate_state_size = gen6_estimate_state_size,
4673
4674 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4675 GEN6_SET(STATE_BASE_ADDRESS),
4676 GEN6_SET(STATE_SIP),
4677 GEN6_SET(3DSTATE_VF_STATISTICS),
4678 GEN6_SET(PIPELINE_SELECT),
4679 GEN6_SET(MEDIA_VFE_STATE),
4680 GEN6_SET(MEDIA_CURBE_LOAD),
4681 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4682 GEN6_SET(MEDIA_GATEWAY_STATE),
4683 GEN6_SET(MEDIA_STATE_FLUSH),
4684 GEN6_SET(MEDIA_OBJECT_WALKER),
4685 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4686 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4687 GEN6_SET(3DSTATE_URB),
4688 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4689 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4690 GEN6_SET(3DSTATE_INDEX_BUFFER),
4691 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4692 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4693 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4694 GEN6_SET(3DSTATE_VS),
4695 GEN6_SET(3DSTATE_GS),
4696 GEN6_SET(3DSTATE_CLIP),
4697 GEN6_SET(3DSTATE_SF),
4698 GEN6_SET(3DSTATE_WM),
4699 GEN6_SET(3DSTATE_CONSTANT_VS),
4700 GEN6_SET(3DSTATE_CONSTANT_GS),
4701 GEN6_SET(3DSTATE_CONSTANT_PS),
4702 GEN6_SET(3DSTATE_SAMPLE_MASK),
4703 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4704 GEN6_SET(3DSTATE_DEPTH_BUFFER),
4705 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
4706 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
4707 GEN6_SET(3DSTATE_LINE_STIPPLE),
4708 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
4709 GEN6_SET(3DSTATE_GS_SVB_INDEX),
4710 GEN6_SET(3DSTATE_MULTISAMPLE),
4711 GEN6_SET(3DSTATE_STENCIL_BUFFER),
4712 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
4713 GEN6_SET(3DSTATE_CLEAR_PARAMS),
4714 GEN6_SET(PIPE_CONTROL),
4715 GEN6_SET(3DPRIMITIVE),
4716 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
4717 GEN6_SET(SF_VIEWPORT),
4718 GEN6_SET(CLIP_VIEWPORT),
4719 GEN6_SET(CC_VIEWPORT),
4720 GEN6_SET(COLOR_CALC_STATE),
4721 GEN6_SET(BLEND_STATE),
4722 GEN6_SET(DEPTH_STENCIL_STATE),
4723 GEN6_SET(SCISSOR_RECT),
4724 GEN6_SET(BINDING_TABLE_STATE),
4725 GEN6_SET(surf_SURFACE_STATE),
4726 GEN6_SET(view_SURFACE_STATE),
4727 GEN6_SET(cbuf_SURFACE_STATE),
4728 GEN6_SET(so_SURFACE_STATE),
4729 GEN6_SET(SAMPLER_STATE),
4730 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
4731 GEN6_SET(push_constant_buffer),
4732 #undef GEN6_SET
4733 };
4734
4735 const struct ilo_gpe_gen6 *
4736 ilo_gpe_gen6_get(void)
4737 {
4738 return &gen6_gpe;
4739 }