536dbf8872d18f2ecdd0f6a6c6a7801163f5c7ad
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate winsys tiling to hardware tiling.
43 */
44 int
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
46 {
47 switch (tiling) {
48 case INTEL_TILING_NONE:
49 return 0;
50 case INTEL_TILING_X:
51 return BRW_SURFACE_TILED;
52 case INTEL_TILING_Y:
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
54 default:
55 assert(!"unknown tiling");
56 return 0;
57 }
58 }
59
60 /**
61 * Translate a pipe primitive type to the matching hardware primitive type.
62 */
63 int
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
65 {
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
81 };
82
83 assert(prim_mapping[prim]);
84
85 return prim_mapping[prim];
86 }
87
88 /**
89 * Translate a pipe texture target to the matching hardware surface type.
90 */
91 int
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
93 {
94 switch (target) {
95 case PIPE_BUFFER:
96 return BRW_SURFACE_BUFFER;
97 case PIPE_TEXTURE_1D:
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
109 default:
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
112 }
113 }
114
115 /**
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
118 */
119 static int
120 gen6_translate_depth_format(enum pipe_format format)
121 {
122 switch (format) {
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
133 default:
134 return -1;
135 }
136 }
137
138 /**
139 * Translate a pipe logicop to the matching hardware logicop.
140 */
141 static int
142 gen6_translate_pipe_logicop(unsigned logicop)
143 {
144 switch (logicop) {
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
161 default:
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
164 }
165 }
166
167 /**
168 * Translate a pipe blend function to the matching hardware blend function.
169 */
170 static int
171 gen6_translate_pipe_blend(unsigned blend)
172 {
173 switch (blend) {
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
179 default:
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
182 };
183 }
184
185 /**
186 * Translate a pipe blend factor to the matching hardware blend factor.
187 */
188 static int
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
190 {
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
211 default:
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
214 };
215 }
216
217 /**
218 * Translate a pipe stencil op to the matching hardware stencil op.
219 */
220 static int
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
222 {
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
232 default:
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
235 }
236 }
237
238 /**
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
240 */
241 static int
242 gen6_translate_tex_mipfilter(unsigned filter)
243 {
244 switch (filter) {
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
248 default:
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
251 }
252 }
253
254 /**
255 * Translate a pipe texture filter to the matching hardware mapfilter.
256 */
257 static int
258 gen6_translate_tex_filter(unsigned filter)
259 {
260 switch (filter) {
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
263 default:
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
266 }
267 }
268
269 /**
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
271 * wrapping mode.
272 */
273 static int
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
275 {
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
280 }
281
282 switch (wrap) {
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
291 default:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
294 }
295 }
296
297 /**
298 * Translate a pipe DSA test function to the matching hardware compare
299 * function.
300 */
301 static int
302 gen6_translate_dsa_func(unsigned func)
303 {
304 switch (func) {
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
313 default:
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
316 }
317 }
318
319 /**
320 * Translate a pipe shadow compare function to the matching hardware shadow
321 * function.
322 */
323 static int
324 gen6_translate_shadow_func(unsigned func)
325 {
326 /*
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
329 *
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
332 */
333 switch (func) {
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
342 default:
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
345 }
346 }
347
348 /**
349 * Translate an index size to the matching hardware index format.
350 */
351 static int
352 gen6_translate_index_size(int size)
353 {
354 switch (size) {
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
358 default:
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
361 }
362 }
363
364 static void
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
379
380 ILO_GPE_VALID_GEN(dev, 6, 7);
381
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
385
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
388
389 ilo_cp_write_bo(cp, 1, general_state_bo,
390 INTEL_DOMAIN_RENDER,
391 0);
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
394 0);
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
397 0);
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
399 0,
400 0);
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
403 0);
404
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
407 INTEL_DOMAIN_RENDER,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 1);
413 }
414
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
423 }
424
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
427 0,
428 0);
429 }
430 else {
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
433 }
434
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
438 0);
439 }
440 else {
441 /* skip range check */
442 ilo_cp_write(cp, 1);
443 }
444
445 ilo_cp_end(cp);
446 }
447
448 static void
449 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
450 uint32_t sip,
451 struct ilo_cp *cp)
452 {
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
455
456 ILO_GPE_VALID_GEN(dev, 6, 7);
457
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
461 ilo_cp_end(cp);
462 }
463
464 static void
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
466 bool enable,
467 struct ilo_cp *cp)
468 {
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
471
472 ILO_GPE_VALID_GEN(dev, 6, 7);
473
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
476 ilo_cp_end(cp);
477 }
478
479 static void
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
481 int pipeline,
482 struct ilo_cp *cp)
483 {
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
486
487 ILO_GPE_VALID_GEN(dev, 6, 7);
488
489 /* 3D or media */
490 assert(pipeline == 0x0 || pipeline == 0x1);
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
494 ilo_cp_end(cp);
495 }
496
497 static void
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
499 int max_threads, int num_urb_entries,
500 int urb_entry_size,
501 struct ilo_cp *cp)
502 {
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
505 uint32_t dw2, dw4;
506
507 ILO_GPE_VALID_GEN(dev, 6, 6);
508
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
513
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
516
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
524 ilo_cp_write(cp, 0);
525 ilo_cp_write(cp, 0);
526 ilo_cp_end(cp);
527 }
528
529 static void
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
531 uint32_t buf, int size,
532 struct ilo_cp *cp)
533 {
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
536
537 ILO_GPE_VALID_GEN(dev, 6, 6);
538
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
548 ilo_cp_end(cp);
549 }
550
551 static void
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
553 uint32_t offset, int num_ids,
554 struct ilo_cp *cp)
555 {
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
558
559 ILO_GPE_VALID_GEN(dev, 6, 6);
560
561 assert(offset % 32 == 0);
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
569 ilo_cp_end(cp);
570 }
571
572 static void
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
574 int id, int byte, int thread_count,
575 struct ilo_cp *cp)
576 {
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
579 uint32_t dw1;
580
581 ILO_GPE_VALID_GEN(dev, 6, 6);
582
583 dw1 = id << 16 |
584 byte << 8 |
585 thread_count;
586
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
595 int thread_count_water_mark,
596 int barrier_mask,
597 struct ilo_cp *cp)
598 {
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
601 uint32_t dw1;
602
603 ILO_GPE_VALID_GEN(dev, 6, 6);
604
605 dw1 = thread_count_water_mark << 16 |
606 barrier_mask;
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
611 ilo_cp_end(cp);
612 }
613
614 static void
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
616 struct ilo_cp *cp)
617 {
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
619 }
620
621 static void
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
641 ilo_cp_end(cp);
642 }
643
644 static void
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
649 struct ilo_cp *cp)
650 {
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
653
654 ILO_GPE_VALID_GEN(dev, 6, 6);
655
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
664 ilo_cp_end(cp);
665 }
666
667 static void
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
671 struct ilo_cp *cp)
672 {
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
678
679 ILO_GPE_VALID_GEN(dev, 6, 6);
680
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
684
685 /* the valid range is [1, 5] */
686 if (!vs_alloc_size)
687 vs_alloc_size = 1;
688 if (!gs_alloc_size)
689 gs_alloc_size = 1;
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
691
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
697
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
702
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
709 ilo_cp_end(cp);
710 }
711
712 static void
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
714 const struct pipe_vertex_buffer *vbuffers,
715 uint64_t vbuffer_mask,
716 const struct ilo_ve_state *ve,
717 struct ilo_cp *cp)
718 {
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
720 uint8_t cmd_len;
721 unsigned hw_idx;
722
723 ILO_GPE_VALID_GEN(dev, 6, 7);
724
725 /*
726 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
727 *
728 * "From 1 to 33 VBs can be specified..."
729 */
730 assert(vbuffer_mask <= (1UL << 33));
731
732 if (!vbuffer_mask)
733 return;
734
735 cmd_len = 1;
736
737 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
738 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
739
740 if (vbuffer_mask & (1 << pipe_idx))
741 cmd_len += 4;
742 }
743
744 ilo_cp_begin(cp, cmd_len);
745 ilo_cp_write(cp, cmd | (cmd_len - 2));
746
747 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
748 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
749 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
750 const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
751 uint32_t dw;
752
753 if (!(vbuffer_mask & (1 << pipe_idx)))
754 continue;
755
756 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
757
758 if (instance_divisor)
759 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
760 else
761 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
762
763 if (dev->gen >= ILO_GEN(7))
764 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
765
766 /* use null vb if there is no buffer or the stride is out of range */
767 if (vb->buffer && vb->stride <= 2048) {
768 const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
769 const uint32_t start_offset = vb->buffer_offset;
770 const uint32_t end_offset = buf->bo_size - 1;
771
772 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
773
774 ilo_cp_write(cp, dw);
775 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
776 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
777 ilo_cp_write(cp, instance_divisor);
778 }
779 else {
780 dw |= 1 << 13;
781
782 ilo_cp_write(cp, dw);
783 ilo_cp_write(cp, 0);
784 ilo_cp_write(cp, 0);
785 ilo_cp_write(cp, instance_divisor);
786 }
787 }
788
789 ilo_cp_end(cp);
790 }
791
792 static void
793 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
794 struct ilo_ve_cso *cso)
795 {
796 int format;
797
798 ILO_GPE_VALID_GEN(dev, 6, 7);
799
800 /*
801 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
802 *
803 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
804 * valid VERTEX_ELEMENT structure.
805 *
806 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
807 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
808 *
809 * - The Source Element Format must be set to the UINT format.
810 *
811 * - [DevSNB]: Edge Flags are not supported for QUADLIST
812 * primitives. Software may elect to convert QUADLIST primitives
813 * to some set of corresponding edge-flag-supported primitive
814 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
815 */
816
817 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
818 cso->payload[1] =
819 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
820 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
821 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
822 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
823
824 /*
825 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
826 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
827 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
828 *
829 * Since all the hardware cares about is whether the flags are zero or not,
830 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
831 */
832 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
833 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
834 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
835 BRW_SURFACEFORMAT_R32_FLOAT - 1);
836
837 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
838 }
839 else {
840 assert(format == BRW_SURFACEFORMAT_R8_UINT);
841 }
842 }
843
844 static void
845 ve_init_cso_with_components(const struct ilo_dev_info *dev,
846 int comp0, int comp1, int comp2, int comp3,
847 struct ilo_ve_cso *cso)
848 {
849 ILO_GPE_VALID_GEN(dev, 6, 7);
850
851 STATIC_ASSERT(Elements(cso->payload) >= 2);
852 cso->payload[0] = GEN6_VE0_VALID;
853 cso->payload[1] =
854 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
855 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
856 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
857 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
858 }
859
860 static void
861 ve_init_cso(const struct ilo_dev_info *dev,
862 const struct pipe_vertex_element *state,
863 unsigned vb_index,
864 struct ilo_ve_cso *cso)
865 {
866 int comp[4] = {
867 BRW_VE1_COMPONENT_STORE_SRC,
868 BRW_VE1_COMPONENT_STORE_SRC,
869 BRW_VE1_COMPONENT_STORE_SRC,
870 BRW_VE1_COMPONENT_STORE_SRC,
871 };
872 int format;
873
874 ILO_GPE_VALID_GEN(dev, 6, 7);
875
876 switch (util_format_get_nr_components(state->src_format)) {
877 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
878 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
879 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
880 BRW_VE1_COMPONENT_STORE_1_INT :
881 BRW_VE1_COMPONENT_STORE_1_FLT;
882 }
883
884 format = ilo_translate_vertex_format(state->src_format);
885
886 STATIC_ASSERT(Elements(cso->payload) >= 2);
887 cso->payload[0] =
888 vb_index << GEN6_VE0_INDEX_SHIFT |
889 GEN6_VE0_VALID |
890 format << BRW_VE0_FORMAT_SHIFT |
891 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
892
893 cso->payload[1] =
894 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
895 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
896 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
897 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
898 }
899
900 void
901 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
902 unsigned num_states,
903 const struct pipe_vertex_element *states,
904 struct ilo_ve_state *ve)
905 {
906 unsigned i;
907
908 ILO_GPE_VALID_GEN(dev, 6, 7);
909
910 ve->count = num_states;
911 ve->vb_count = 0;
912
913 for (i = 0; i < num_states; i++) {
914 const unsigned pipe_idx = states[i].vertex_buffer_index;
915 const unsigned instance_divisor = states[i].instance_divisor;
916 unsigned hw_idx;
917
918 /*
919 * map the pipe vb to the hardware vb, which has a fixed instance
920 * divisor
921 */
922 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
923 if (ve->vb_mapping[hw_idx] == pipe_idx &&
924 ve->instance_divisors[hw_idx] == instance_divisor)
925 break;
926 }
927
928 /* create one if there is no matching hardware vb */
929 if (hw_idx >= ve->vb_count) {
930 hw_idx = ve->vb_count++;
931
932 ve->vb_mapping[hw_idx] = pipe_idx;
933 ve->instance_divisors[hw_idx] = instance_divisor;
934 }
935
936 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
937 }
938 }
939
940 static void
941 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
942 const struct ilo_ve_state *ve,
943 bool last_velement_edgeflag,
944 bool prepend_generated_ids,
945 struct ilo_cp *cp)
946 {
947 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
948 uint8_t cmd_len;
949 unsigned i;
950
951 ILO_GPE_VALID_GEN(dev, 6, 7);
952
953 /*
954 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
955 *
956 * "Up to 34 (DevSNB+) vertex elements are supported."
957 */
958 assert(ve->count + prepend_generated_ids <= 34);
959
960 if (!ve->count && !prepend_generated_ids) {
961 struct ilo_ve_cso dummy;
962
963 ve_init_cso_with_components(dev,
964 BRW_VE1_COMPONENT_STORE_0,
965 BRW_VE1_COMPONENT_STORE_0,
966 BRW_VE1_COMPONENT_STORE_0,
967 BRW_VE1_COMPONENT_STORE_1_FLT,
968 &dummy);
969
970 cmd_len = 3;
971 ilo_cp_begin(cp, cmd_len);
972 ilo_cp_write(cp, cmd | (cmd_len - 2));
973 ilo_cp_write_multi(cp, dummy.payload, 2);
974 ilo_cp_end(cp);
975
976 return;
977 }
978
979 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
980
981 ilo_cp_begin(cp, cmd_len);
982 ilo_cp_write(cp, cmd | (cmd_len - 2));
983
984 if (prepend_generated_ids) {
985 struct ilo_ve_cso gen_ids;
986
987 ve_init_cso_with_components(dev,
988 BRW_VE1_COMPONENT_STORE_VID,
989 BRW_VE1_COMPONENT_STORE_IID,
990 BRW_VE1_COMPONENT_NOSTORE,
991 BRW_VE1_COMPONENT_NOSTORE,
992 &gen_ids);
993
994 ilo_cp_write_multi(cp, gen_ids.payload, 2);
995 }
996
997 if (last_velement_edgeflag) {
998 struct ilo_ve_cso edgeflag;
999
1000 for (i = 0; i < ve->count - 1; i++)
1001 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1002
1003 edgeflag = ve->cso[i];
1004 ve_set_cso_edgeflag(dev, &edgeflag);
1005 ilo_cp_write_multi(cp, edgeflag.payload, 2);
1006 }
1007 else {
1008 for (i = 0; i < ve->count; i++)
1009 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1010 }
1011
1012 ilo_cp_end(cp);
1013 }
1014
1015 static void
1016 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
1017 const struct pipe_index_buffer *ib,
1018 bool enable_cut_index,
1019 struct ilo_cp *cp)
1020 {
1021 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
1022 const uint8_t cmd_len = 3;
1023 const struct ilo_buffer *buf = ilo_buffer(ib->buffer);
1024 uint32_t start_offset, end_offset;
1025 int format;
1026
1027 ILO_GPE_VALID_GEN(dev, 6, 7);
1028
1029 if (!buf)
1030 return;
1031
1032 format = gen6_translate_index_size(ib->index_size);
1033
1034 start_offset = ib->offset;
1035 /* start_offset must be aligned to index size */
1036 if (start_offset % ib->index_size) {
1037 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
1038 assert(!"unaligned index buffer offset");
1039 start_offset -= start_offset % ib->index_size;
1040 }
1041
1042 /* end_offset must also be aligned */
1043 end_offset = buf->bo_size;
1044 end_offset -= (end_offset % ib->index_size);
1045 /* it is inclusive */
1046 end_offset -= 1;
1047
1048 ilo_cp_begin(cp, cmd_len);
1049 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1050 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
1051 format << 8);
1052 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1053 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1054 ilo_cp_end(cp);
1055 }
1056
1057 static void
1058 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
1059 uint32_t clip_viewport,
1060 uint32_t sf_viewport,
1061 uint32_t cc_viewport,
1062 struct ilo_cp *cp)
1063 {
1064 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
1065 const uint8_t cmd_len = 4;
1066
1067 ILO_GPE_VALID_GEN(dev, 6, 6);
1068
1069 ilo_cp_begin(cp, cmd_len);
1070 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1071 GEN6_CLIP_VIEWPORT_MODIFY |
1072 GEN6_SF_VIEWPORT_MODIFY |
1073 GEN6_CC_VIEWPORT_MODIFY);
1074 ilo_cp_write(cp, clip_viewport);
1075 ilo_cp_write(cp, sf_viewport);
1076 ilo_cp_write(cp, cc_viewport);
1077 ilo_cp_end(cp);
1078 }
1079
1080 static void
1081 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
1082 uint32_t blend_state,
1083 uint32_t depth_stencil_state,
1084 uint32_t color_calc_state,
1085 struct ilo_cp *cp)
1086 {
1087 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1088 const uint8_t cmd_len = 4;
1089
1090 ILO_GPE_VALID_GEN(dev, 6, 6);
1091
1092 ilo_cp_begin(cp, cmd_len);
1093 ilo_cp_write(cp, cmd | (cmd_len - 2));
1094 ilo_cp_write(cp, blend_state | 1);
1095 ilo_cp_write(cp, depth_stencil_state | 1);
1096 ilo_cp_write(cp, color_calc_state | 1);
1097 ilo_cp_end(cp);
1098 }
1099
1100 static void
1101 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1102 uint32_t scissor_rect,
1103 struct ilo_cp *cp)
1104 {
1105 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1106 const uint8_t cmd_len = 2;
1107
1108 ILO_GPE_VALID_GEN(dev, 6, 7);
1109
1110 ilo_cp_begin(cp, cmd_len);
1111 ilo_cp_write(cp, cmd | (cmd_len - 2));
1112 ilo_cp_write(cp, scissor_rect);
1113 ilo_cp_end(cp);
1114 }
1115
1116 static void
1117 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1118 const struct ilo_shader *vs,
1119 int num_samplers,
1120 struct ilo_cp *cp)
1121 {
1122 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1123 const uint8_t cmd_len = 6;
1124 uint32_t dw2, dw4, dw5;
1125 int vue_read_len, max_threads;
1126
1127 ILO_GPE_VALID_GEN(dev, 6, 7);
1128
1129 if (!vs) {
1130 ilo_cp_begin(cp, cmd_len);
1131 ilo_cp_write(cp, cmd | (cmd_len - 2));
1132 ilo_cp_write(cp, 0);
1133 ilo_cp_write(cp, 0);
1134 ilo_cp_write(cp, 0);
1135 ilo_cp_write(cp, 0);
1136 ilo_cp_write(cp, 0);
1137 ilo_cp_end(cp);
1138 return;
1139 }
1140
1141 /*
1142 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1143 *
1144 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1145 * 128-bit vertex elements to be passed into the payload for each
1146 * vertex."
1147 *
1148 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1149 * data to be read and passed to the thread."
1150 */
1151 vue_read_len = (vs->in.count + 1) / 2;
1152 if (!vue_read_len)
1153 vue_read_len = 1;
1154
1155 switch (dev->gen) {
1156 case ILO_GEN(6):
1157 /*
1158 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1159 *
1160 * "Device # of EUs #Threads/EU
1161 * SNB GT2 12 5
1162 * SNB GT1 6 4"
1163 */
1164 max_threads = (dev->gt == 2) ? 60 : 24;
1165 break;
1166 case ILO_GEN(7):
1167 /*
1168 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1169 *
1170 * "Device # of EUs #Threads/EU
1171 * Ivy Bridge (GT2) 16 8
1172 * Ivy Bridge (GT1) 6 6"
1173 */
1174 max_threads = (dev->gt == 2) ? 128 : 36;
1175 break;
1176 case ILO_GEN(7.5):
1177 /* see brwCreateContext() */
1178 max_threads = (dev->gt == 2) ? 280 : 70;
1179 break;
1180 default:
1181 max_threads = 1;
1182 break;
1183 }
1184
1185 dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1186 if (false)
1187 dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
1188
1189 dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1190 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1191 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1192
1193 dw5 = GEN6_VS_STATISTICS_ENABLE |
1194 GEN6_VS_ENABLE;
1195
1196 if (dev->gen >= ILO_GEN(7.5))
1197 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1198 else
1199 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1200
1201 ilo_cp_begin(cp, cmd_len);
1202 ilo_cp_write(cp, cmd | (cmd_len - 2));
1203 ilo_cp_write(cp, vs->cache_offset);
1204 ilo_cp_write(cp, dw2);
1205 ilo_cp_write(cp, 0); /* scratch */
1206 ilo_cp_write(cp, dw4);
1207 ilo_cp_write(cp, dw5);
1208 ilo_cp_end(cp);
1209 }
1210
1211 static void
1212 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1213 const struct ilo_shader *gs,
1214 const struct ilo_shader *vs,
1215 uint32_t vs_offset,
1216 struct ilo_cp *cp)
1217 {
1218 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1219 const uint8_t cmd_len = 7;
1220 uint32_t dw1, dw2, dw4, dw5, dw6;
1221 int i;
1222
1223 ILO_GPE_VALID_GEN(dev, 6, 6);
1224
1225 if (!gs && (!vs || !vs->stream_output)) {
1226 dw1 = 0;
1227 dw2 = 0;
1228 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1229 dw5 = GEN6_GS_STATISTICS_ENABLE;
1230 dw6 = 0;
1231 }
1232 else {
1233 int max_threads, vue_read_len;
1234
1235 /*
1236 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1237 *
1238 * "Maximum Number of Threads valid range is [0,27] when Rendering
1239 * Enabled bit is set."
1240 *
1241 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1242 *
1243 * "Programming Note: If the GS stage is enabled, software must
1244 * always allocate at least one GS URB Entry. This is true even if
1245 * the GS thread never needs to output vertices to the pipeline,
1246 * e.g., when only performing stream output. This is an artifact of
1247 * the need to pass the GS thread an initial destination URB
1248 * handle."
1249 *
1250 * As such, we always enable rendering, and limit the number of threads.
1251 */
1252 if (dev->gt == 2) {
1253 /* maximum is 60, but limited to 28 */
1254 max_threads = 28;
1255 }
1256 else {
1257 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1258 max_threads = 21;
1259 }
1260
1261 if (max_threads > 28)
1262 max_threads = 28;
1263
1264 dw2 = GEN6_GS_SPF_MODE;
1265
1266 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1267 GEN6_GS_STATISTICS_ENABLE |
1268 GEN6_GS_SO_STATISTICS_ENABLE |
1269 GEN6_GS_RENDERING_ENABLE;
1270
1271 /*
1272 * we cannot make use of GEN6_GS_REORDER because it will reorder
1273 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1274 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1275 * (2N+2, 2N+1, 2N+3)).
1276 */
1277 dw6 = GEN6_GS_ENABLE;
1278
1279 if (gs) {
1280 /* VS ouputs must match GS inputs */
1281 assert(gs->in.count == vs->out.count);
1282 for (i = 0; i < gs->in.count; i++) {
1283 assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
1284 assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
1285 }
1286
1287 /*
1288 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1289 *
1290 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1291 * Length) to 0 indicating no Vertex URB data to be read and
1292 * passed to the thread."
1293 */
1294 vue_read_len = (gs->in.count + 1) / 2;
1295 if (!vue_read_len)
1296 vue_read_len = 1;
1297
1298 dw1 = gs->cache_offset;
1299 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1300 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1301 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1302
1303 if (gs->in.discard_adj)
1304 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1305
1306 if (gs->stream_output) {
1307 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1308 if (gs->svbi_post_inc) {
1309 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1310 gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1311 }
1312 }
1313 }
1314 else {
1315 /*
1316 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1317 *
1318 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1319 * Length) to 0 indicating no Vertex URB data to be read and
1320 * passed to the thread."
1321 */
1322 vue_read_len = (vs->out.count + 1) / 2;
1323 if (!vue_read_len)
1324 vue_read_len = 1;
1325
1326 dw1 = vs_offset;
1327 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1328 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1329 vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1330
1331 if (vs->in.discard_adj)
1332 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1333
1334 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1335 if (vs->svbi_post_inc) {
1336 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1337 vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1338 }
1339 }
1340 }
1341
1342 ilo_cp_begin(cp, cmd_len);
1343 ilo_cp_write(cp, cmd | (cmd_len - 2));
1344 ilo_cp_write(cp, dw1);
1345 ilo_cp_write(cp, dw2);
1346 ilo_cp_write(cp, 0);
1347 ilo_cp_write(cp, dw4);
1348 ilo_cp_write(cp, dw5);
1349 ilo_cp_write(cp, dw6);
1350 ilo_cp_end(cp);
1351 }
1352
1353 void
1354 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
1355 const struct pipe_rasterizer_state *state,
1356 struct ilo_rasterizer_clip *clip)
1357 {
1358 uint32_t dw1, dw2, dw3;
1359
1360 ILO_GPE_VALID_GEN(dev, 6, 7);
1361
1362 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1363
1364 if (dev->gen >= ILO_GEN(7)) {
1365 /*
1366 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1367 *
1368 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1369 * enabled only for the cases where the incoming primitive topology
1370 * into the clipper guaranteed to be Trilist."
1371 *
1372 * What does this mean?
1373 */
1374 dw1 |= 0 << 19 |
1375 GEN7_CLIP_EARLY_CULL;
1376
1377 if (state->front_ccw)
1378 dw1 |= GEN7_CLIP_WINDING_CCW;
1379
1380 switch (state->cull_face) {
1381 case PIPE_FACE_NONE:
1382 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1383 break;
1384 case PIPE_FACE_FRONT:
1385 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1386 break;
1387 case PIPE_FACE_BACK:
1388 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1389 break;
1390 case PIPE_FACE_FRONT_AND_BACK:
1391 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1392 break;
1393 }
1394 }
1395
1396 dw2 = GEN6_CLIP_ENABLE |
1397 GEN6_CLIP_XY_TEST |
1398 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1399 GEN6_CLIP_MODE_NORMAL;
1400
1401 if (state->clip_halfz)
1402 dw2 |= GEN6_CLIP_API_D3D;
1403 else
1404 dw2 |= GEN6_CLIP_API_OGL;
1405
1406 if (state->depth_clip)
1407 dw2 |= GEN6_CLIP_Z_TEST;
1408
1409 if (state->flatshade_first) {
1410 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1411 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1412 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1413 }
1414 else {
1415 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1416 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1417 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1418 }
1419
1420 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1421 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
1422
1423 clip->payload[0] = dw1;
1424 clip->payload[1] = dw2;
1425 clip->payload[2] = dw3;
1426
1427 clip->can_enable_guardband = true;
1428
1429 /*
1430 * There are several reasons that guard band test should be disabled
1431 *
1432 * - GL wide points (to avoid partially visibie object)
1433 * - GL wide or AA lines (to avoid partially visibie object)
1434 */
1435 if (state->point_size_per_vertex || state->point_size > 1.0f)
1436 clip->can_enable_guardband = false;
1437 if (state->line_smooth || state->line_width > 1.0f)
1438 clip->can_enable_guardband = false;
1439 }
1440
1441 static void
1442 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1443 const struct ilo_rasterizer_state *rasterizer,
1444 bool has_linear_interp,
1445 bool enable_guardband,
1446 int num_viewports,
1447 struct ilo_cp *cp)
1448 {
1449 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1450 const uint8_t cmd_len = 4;
1451 uint32_t dw1, dw2, dw3;
1452
1453 if (rasterizer) {
1454 dw1 = rasterizer->clip.payload[0];
1455 dw2 = rasterizer->clip.payload[1];
1456 dw3 = rasterizer->clip.payload[2];
1457
1458 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1459 dw2 |= GEN6_CLIP_GB_TEST;
1460
1461 if (has_linear_interp)
1462 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1463
1464 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1465 (num_viewports - 1);
1466 }
1467 else {
1468 dw1 = 0;
1469 dw2 = 0;
1470 dw3 = 0;
1471 }
1472
1473 ilo_cp_begin(cp, cmd_len);
1474 ilo_cp_write(cp, cmd | (cmd_len - 2));
1475 ilo_cp_write(cp, dw1);
1476 ilo_cp_write(cp, dw2);
1477 ilo_cp_write(cp, dw3);
1478 ilo_cp_end(cp);
1479 }
1480
1481 void
1482 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
1483 const struct pipe_rasterizer_state *state,
1484 struct ilo_rasterizer_sf *sf)
1485 {
1486 float offset_const, offset_scale, offset_clamp;
1487 int line_width, point_width;
1488 uint32_t dw1, dw2, dw3;
1489
1490 ILO_GPE_VALID_GEN(dev, 6, 7);
1491
1492 /*
1493 * Scale the constant term. The minimum representable value used by the HW
1494 * is not large enouch to be the minimum resolvable difference.
1495 */
1496 offset_const = state->offset_units * 2.0f;
1497
1498 offset_scale = state->offset_scale;
1499 offset_clamp = state->offset_clamp;
1500
1501 /*
1502 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1503 *
1504 * "This bit (Statistics Enable) should be set whenever clipping is
1505 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1506 * should be cleared if clipping is disabled or Statistics Enable in
1507 * CLIP_STATE is clear."
1508 */
1509 dw1 = GEN6_SF_STATISTICS_ENABLE |
1510 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1511
1512 /* XXX GEN6 path seems to work fine for GEN7 */
1513 if (false && dev->gen >= ILO_GEN(7)) {
1514 /*
1515 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1516 *
1517 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1518 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1519 * Depth Offset Enable Point) should be set whenever non zero depth
1520 * bias (Slope, Bias) values are used. Setting this bit may have
1521 * some degradation of performance for some workloads."
1522 */
1523 if (state->offset_tri || state->offset_line || state->offset_point) {
1524 /* XXX need to scale offset_const according to the depth format */
1525 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1526
1527 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1528 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1529 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1530 }
1531 else {
1532 offset_const = 0.0f;
1533 offset_scale = 0.0f;
1534 offset_clamp = 0.0f;
1535 }
1536 }
1537 else {
1538 if (state->offset_tri)
1539 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1540 if (state->offset_line)
1541 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1542 if (state->offset_point)
1543 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1544 }
1545
1546 switch (state->fill_front) {
1547 case PIPE_POLYGON_MODE_FILL:
1548 dw1 |= GEN6_SF_FRONT_SOLID;
1549 break;
1550 case PIPE_POLYGON_MODE_LINE:
1551 dw1 |= GEN6_SF_FRONT_WIREFRAME;
1552 break;
1553 case PIPE_POLYGON_MODE_POINT:
1554 dw1 |= GEN6_SF_FRONT_POINT;
1555 break;
1556 }
1557
1558 switch (state->fill_back) {
1559 case PIPE_POLYGON_MODE_FILL:
1560 dw1 |= GEN6_SF_BACK_SOLID;
1561 break;
1562 case PIPE_POLYGON_MODE_LINE:
1563 dw1 |= GEN6_SF_BACK_WIREFRAME;
1564 break;
1565 case PIPE_POLYGON_MODE_POINT:
1566 dw1 |= GEN6_SF_BACK_POINT;
1567 break;
1568 }
1569
1570 if (state->front_ccw)
1571 dw1 |= GEN6_SF_WINDING_CCW;
1572
1573 dw2 = 0;
1574
1575 if (state->line_smooth) {
1576 /*
1577 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1578 *
1579 * "This field (Anti-aliasing Enable) must be disabled if any of the
1580 * render targets have integer (UINT or SINT) surface format."
1581 *
1582 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1583 *
1584 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1585 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1586 *
1587 * TODO We do not check those yet.
1588 */
1589 dw2 |= GEN6_SF_LINE_AA_ENABLE |
1590 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1591 }
1592
1593 switch (state->cull_face) {
1594 case PIPE_FACE_NONE:
1595 dw2 |= GEN6_SF_CULL_NONE;
1596 break;
1597 case PIPE_FACE_FRONT:
1598 dw2 |= GEN6_SF_CULL_FRONT;
1599 break;
1600 case PIPE_FACE_BACK:
1601 dw2 |= GEN6_SF_CULL_BACK;
1602 break;
1603 case PIPE_FACE_FRONT_AND_BACK:
1604 dw2 |= GEN6_SF_CULL_BOTH;
1605 break;
1606 }
1607
1608 /*
1609 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1610 * pixels in the minor direction. We have to make the lines slightly
1611 * thicker, 0.5 pixel on both sides, so that they intersect that many
1612 * pixels are considered into the lines.
1613 *
1614 * Line width is in U3.7.
1615 */
1616 line_width = (int) ((state->line_width +
1617 (float) state->line_smooth) * 128.0f + 0.5f);
1618 line_width = CLAMP(line_width, 0, 1023);
1619
1620 if (line_width == 128 && !state->line_smooth) {
1621 /* use GIQ rules */
1622 line_width = 0;
1623 }
1624
1625 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1626
1627 if (state->scissor)
1628 dw2 |= GEN6_SF_SCISSOR_ENABLE;
1629
1630 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
1631 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1632
1633 if (state->line_last_pixel)
1634 dw3 |= 1 << 31;
1635
1636 if (state->flatshade_first) {
1637 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1638 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1639 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1640 }
1641 else {
1642 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1643 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1644 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1645 }
1646
1647 if (!state->point_size_per_vertex)
1648 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
1649
1650 /* in U8.3 */
1651 point_width = (int) (state->point_size * 8.0f + 0.5f);
1652 point_width = CLAMP(point_width, 1, 2047);
1653
1654 dw3 |= point_width;
1655
1656 STATIC_ASSERT(Elements(sf->payload) >= 6);
1657 sf->payload[0] = dw1;
1658 sf->payload[1] = dw2;
1659 sf->payload[2] = dw3;
1660 sf->payload[3] = fui(offset_const);
1661 sf->payload[4] = fui(offset_scale);
1662 sf->payload[5] = fui(offset_clamp);
1663
1664 if (state->multisample) {
1665 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
1666
1667 /*
1668 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1669 *
1670 * "Software must not program a value of 0.0 when running in
1671 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
1672 * when multisampling rasterization is enabled."
1673 */
1674 if (!line_width) {
1675 line_width = 128; /* 1.0f */
1676
1677 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1678 }
1679 }
1680 else {
1681 sf->dw_msaa = 0;
1682 }
1683 }
1684
1685 /**
1686 * Fill in DW2 to DW7 of 3DSTATE_SF.
1687 */
1688 void
1689 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
1690 const struct ilo_rasterizer_sf *sf,
1691 int num_samples,
1692 enum pipe_format depth_format,
1693 uint32_t *payload, unsigned payload_len)
1694 {
1695 assert(payload_len == Elements(sf->payload));
1696
1697 if (sf) {
1698 memcpy(payload, sf->payload, sizeof(sf->payload));
1699
1700 if (num_samples > 1)
1701 payload[1] |= sf->dw_msaa;
1702
1703 if (dev->gen >= ILO_GEN(7)) {
1704 int format;
1705
1706 /* separate stencil */
1707 switch (depth_format) {
1708 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1709 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1710 break;
1711 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1712 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1713 break;
1714 case PIPE_FORMAT_S8_UINT:
1715 depth_format = PIPE_FORMAT_NONE;
1716 break;
1717 default:
1718 break;
1719 }
1720
1721 format = gen6_translate_depth_format(depth_format);
1722 /* FLOAT surface is assumed when there is no depth buffer */
1723 if (format < 0)
1724 format = BRW_DEPTHFORMAT_D32_FLOAT;
1725
1726 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1727 }
1728 }
1729 else {
1730 payload[0] = 0;
1731 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1732 payload[2] = 0;
1733 payload[3] = 0;
1734 payload[4] = 0;
1735 payload[5] = 0;
1736 }
1737 }
1738
1739 /**
1740 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1741 */
1742 void
1743 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
1744 const struct pipe_rasterizer_state *rasterizer,
1745 const struct ilo_shader *fs,
1746 const struct ilo_shader *last_sh,
1747 uint32_t *dw, int num_dwords)
1748 {
1749 uint32_t point_sprite_enable, const_interp_enable;
1750 uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
1751 int vue_offset, vue_len;
1752 int dst, max_src, i;
1753
1754 ILO_GPE_VALID_GEN(dev, 6, 7);
1755 assert(num_dwords == 13);
1756
1757 if (!fs) {
1758 if (dev->gen >= ILO_GEN(7))
1759 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1760 else
1761 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1762
1763 for (i = 1; i < num_dwords; i++)
1764 dw[i] = 0;
1765
1766 return;
1767 }
1768
1769 if (last_sh) {
1770 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1771 assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
1772 assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
1773 vue_offset = 2;
1774 vue_len = last_sh->out.count - vue_offset;
1775 }
1776 else {
1777 vue_offset = 0;
1778 vue_len = fs->in.count;
1779 }
1780
1781 point_sprite_enable = 0;
1782 const_interp_enable = 0;
1783 max_src = (last_sh) ? 0 : fs->in.count - 1;
1784
1785 for (dst = 0; dst < fs->in.count; dst++) {
1786 const int semantic = fs->in.semantic_names[dst];
1787 const int index = fs->in.semantic_indices[dst];
1788 const int interp = fs->in.interp[dst];
1789 int src;
1790 uint16_t ctrl;
1791
1792 /*
1793 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1794 *
1795 * "This field (Point Sprite Texture Coordinate Enable) must be
1796 * programmed to 0 when non-point primitives are rendered."
1797 *
1798 * TODO We do not check that yet.
1799 */
1800 if (semantic == TGSI_SEMANTIC_GENERIC &&
1801 (rasterizer->sprite_coord_enable & (1 << index)))
1802 point_sprite_enable |= 1 << dst;
1803
1804 if (interp == TGSI_INTERPOLATE_CONSTANT ||
1805 (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
1806 const_interp_enable |= 1 << dst;
1807
1808 if (!last_sh) {
1809 attr_ctrl[dst] = 0;
1810 continue;
1811 }
1812
1813 /* find the matching VS/GS OUT for FS IN[i] */
1814 ctrl = 0;
1815 for (src = 0; src < vue_len; src++) {
1816 if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
1817 last_sh->out.semantic_indices[vue_offset + src] != index)
1818 continue;
1819
1820 ctrl = src;
1821
1822 if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
1823 src < vue_len - 1) {
1824 const int next = src + 1;
1825
1826 if (last_sh->out.semantic_names[vue_offset + next] ==
1827 TGSI_SEMANTIC_BCOLOR &&
1828 last_sh->out.semantic_indices[vue_offset + next] == index) {
1829 ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
1830 ATTRIBUTE_SWIZZLE_SHIFT;
1831 src++;
1832 }
1833 }
1834
1835 break;
1836 }
1837
1838 /* if there is no COLOR, try BCOLOR */
1839 if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
1840 for (src = 0; src < vue_len; src++) {
1841 if (last_sh->out.semantic_names[vue_offset + src] !=
1842 TGSI_SEMANTIC_BCOLOR ||
1843 last_sh->out.semantic_indices[vue_offset + src] != index)
1844 continue;
1845
1846 ctrl = src;
1847 break;
1848 }
1849 }
1850
1851 if (src < vue_len) {
1852 attr_ctrl[dst] = ctrl;
1853 if (max_src < src)
1854 max_src = src;
1855 }
1856 else {
1857 /*
1858 * The previous shader stage does not output this attribute. The
1859 * value is supposed to be undefined for fs, unless the attribute
1860 * goes through point sprite replacement or the attribute is
1861 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1862 * attribute is picked.
1863 *
1864 * We should update the fs code and omit the output of
1865 * TGSI_SEMANTIC_POSITION here.
1866 */
1867 attr_ctrl[dst] = 0;
1868 }
1869 }
1870
1871 for (; dst < Elements(attr_ctrl); dst++)
1872 attr_ctrl[dst] = 0;
1873
1874 /* only the first 16 attributes can be remapped */
1875 for (dst = 16; dst < Elements(attr_ctrl); dst++)
1876 assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
1877
1878 /*
1879 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1880 *
1881 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1882 * 0 indicating no Vertex URB data to be read.
1883 *
1884 * This field should be set to the minimum length required to read the
1885 * maximum source attribute. The maximum source attribute is indicated
1886 * by the maximum value of the enabled Attribute # Source Attribute if
1887 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1888 * enable is not set.
1889 *
1890 * read_length = ceiling((max_source_attr+1)/2)
1891 *
1892 * [errata] Corruption/Hang possible if length programmed larger than
1893 * recommended"
1894 */
1895 vue_len = max_src + 1;
1896
1897 assert(fs->in.count <= 32);
1898 assert(vue_offset % 2 == 0);
1899
1900 if (dev->gen >= ILO_GEN(7)) {
1901 dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1902 (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1903 vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1904
1905 if (last_sh)
1906 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1907 }
1908 else {
1909 dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1910 (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1911 vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1912
1913 if (last_sh)
1914 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1915 }
1916
1917 switch (rasterizer->sprite_coord_mode) {
1918 case PIPE_SPRITE_COORD_UPPER_LEFT:
1919 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1920 break;
1921 case PIPE_SPRITE_COORD_LOWER_LEFT:
1922 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1923 break;
1924 }
1925
1926 for (i = 0; i < 8; i++)
1927 dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
1928
1929 dw[9] = point_sprite_enable;
1930 dw[10] = const_interp_enable;
1931
1932 /* WrapShortest enables */
1933 dw[11] = 0;
1934 dw[12] = 0;
1935 }
1936
1937 static void
1938 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1939 const struct ilo_rasterizer_state *rasterizer,
1940 const struct ilo_shader *fs,
1941 const struct ilo_shader *last_sh,
1942 struct ilo_cp *cp)
1943 {
1944 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1945 const uint8_t cmd_len = 20;
1946 uint32_t payload_raster[6], payload_sbe[13];
1947
1948 ILO_GPE_VALID_GEN(dev, 6, 6);
1949
1950 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, &rasterizer->sf,
1951 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1952 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, &rasterizer->state,
1953 fs, last_sh, payload_sbe, Elements(payload_sbe));
1954
1955 ilo_cp_begin(cp, cmd_len);
1956 ilo_cp_write(cp, cmd | (cmd_len - 2));
1957 ilo_cp_write(cp, payload_sbe[0]);
1958 ilo_cp_write_multi(cp, payload_raster, 6);
1959 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1960 ilo_cp_end(cp);
1961 }
1962
1963 static void
1964 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1965 const struct ilo_shader *fs,
1966 int num_samplers,
1967 const struct pipe_rasterizer_state *rasterizer,
1968 bool dual_blend, bool cc_may_kill,
1969 struct ilo_cp *cp)
1970 {
1971 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1972 const uint8_t cmd_len = 9;
1973 const int num_samples = 1;
1974 uint32_t dw2, dw4, dw5, dw6;
1975 int max_threads;
1976
1977 ILO_GPE_VALID_GEN(dev, 6, 6);
1978
1979 /* see brwCreateContext() */
1980 max_threads = (dev->gt == 2) ? 80 : 40;
1981
1982 if (!fs) {
1983 ilo_cp_begin(cp, cmd_len);
1984 ilo_cp_write(cp, cmd | (cmd_len - 2));
1985 ilo_cp_write(cp, 0);
1986 ilo_cp_write(cp, 0);
1987 ilo_cp_write(cp, 0);
1988 ilo_cp_write(cp, 0);
1989 /* honor the valid range even if dispatching is disabled */
1990 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1991 ilo_cp_write(cp, 0);
1992 ilo_cp_write(cp, 0);
1993 ilo_cp_write(cp, 0);
1994 ilo_cp_end(cp);
1995
1996 return;
1997 }
1998
1999 dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
2000 if (false)
2001 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
2002
2003 dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
2004 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
2005 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
2006
2007 if (true) {
2008 dw4 |= GEN6_WM_STATISTICS_ENABLE;
2009 }
2010 else {
2011 /*
2012 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
2013 *
2014 * "This bit (Statistics Enable) must be disabled if either of these
2015 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
2016 * Resolve Enable or Depth Buffer Resolve Enable."
2017 */
2018 dw4 |= GEN6_WM_DEPTH_CLEAR;
2019 dw4 |= GEN6_WM_DEPTH_RESOLVE;
2020 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
2021 }
2022
2023 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
2024 GEN6_WM_LINE_AA_WIDTH_2_0;
2025
2026 /*
2027 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2028 *
2029 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
2030 * PS kernel or color calculator has the ability to kill (discard)
2031 * pixels or samples, other than due to depth or stencil testing.
2032 * This bit is required to be ENABLED in the following situations:
2033 *
2034 * The API pixel shader program contains "killpix" or "discard"
2035 * instructions, or other code in the pixel shader kernel that can
2036 * cause the final pixel mask to differ from the pixel mask received
2037 * on dispatch.
2038 *
2039 * A sampler with chroma key enabled with kill pixel mode is used by
2040 * the pixel shader.
2041 *
2042 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
2043 * enabled.
2044 *
2045 * The pixel shader kernel generates and outputs oMask.
2046 *
2047 * Note: As ClipDistance clipping is fully supported in hardware and
2048 * therefore not via PS instructions, there should be no need to
2049 * ENABLE this bit due to ClipDistance clipping."
2050 */
2051 if (fs->has_kill || cc_may_kill)
2052 dw5 |= GEN6_WM_KILL_ENABLE;
2053
2054 /*
2055 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2056 *
2057 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
2058 * field must be set to disabled."
2059 *
2060 * TODO This is not checked yet.
2061 */
2062 if (fs->out.has_pos)
2063 dw5 |= GEN6_WM_COMPUTED_DEPTH;
2064
2065 if (fs->in.has_pos)
2066 dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
2067
2068 /*
2069 * Set this bit if
2070 *
2071 * a) fs writes colors and color is not masked, or
2072 * b) fs writes depth, or
2073 * c) fs or cc kills
2074 */
2075 if (true)
2076 dw5 |= GEN6_WM_DISPATCH_ENABLE;
2077
2078 /* same value as in 3DSTATE_SF */
2079 if (rasterizer->line_smooth)
2080 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
2081
2082 if (rasterizer->poly_stipple_enable)
2083 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
2084 if (rasterizer->line_stipple_enable)
2085 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
2086
2087 if (dual_blend)
2088 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
2089
2090 if (fs->dispatch_16)
2091 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
2092 else
2093 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
2094
2095 dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
2096 GEN6_WM_POSOFFSET_NONE |
2097 GEN6_WM_POSITION_ZW_PIXEL |
2098 fs->in.barycentric_interpolation_mode <<
2099 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
2100
2101 if (rasterizer->bottom_edge_rule)
2102 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
2103
2104 if (num_samples > 1) {
2105 if (rasterizer->multisample)
2106 dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
2107 else
2108 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
2109 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
2110 }
2111 else {
2112 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
2113 GEN6_WM_MSDISPMODE_PERSAMPLE;
2114 }
2115
2116 ilo_cp_begin(cp, cmd_len);
2117 ilo_cp_write(cp, cmd | (cmd_len - 2));
2118 ilo_cp_write(cp, fs->cache_offset);
2119 ilo_cp_write(cp, dw2);
2120 ilo_cp_write(cp, 0); /* scratch */
2121 ilo_cp_write(cp, dw4);
2122 ilo_cp_write(cp, dw5);
2123 ilo_cp_write(cp, dw6);
2124 ilo_cp_write(cp, 0); /* kernel 1 */
2125 ilo_cp_write(cp, 0); /* kernel 2 */
2126 ilo_cp_end(cp);
2127 }
2128
2129 static unsigned
2130 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
2131 const uint32_t *bufs, const int *sizes,
2132 int num_bufs, int max_read_length,
2133 uint32_t *dw, int num_dwords)
2134 {
2135 unsigned enabled = 0x0;
2136 int total_read_length, i;
2137
2138 assert(num_dwords == 4);
2139
2140 total_read_length = 0;
2141 for (i = 0; i < 4; i++) {
2142 if (i < num_bufs && sizes[i]) {
2143 /* in 256-bit units minus one */
2144 const int read_len = (sizes[i] + 31) / 32 - 1;
2145
2146 assert(bufs[i] % 32 == 0);
2147 assert(read_len < 32);
2148
2149 enabled |= 1 << i;
2150 dw[i] = bufs[i] | read_len;
2151
2152 total_read_length += read_len + 1;
2153 }
2154 else {
2155 dw[i] = 0;
2156 }
2157 }
2158
2159 assert(total_read_length <= max_read_length);
2160
2161 return enabled;
2162 }
2163
2164 static void
2165 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
2166 const uint32_t *bufs, const int *sizes,
2167 int num_bufs,
2168 struct ilo_cp *cp)
2169 {
2170 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2171 const uint8_t cmd_len = 5;
2172 uint32_t buf_dw[4], buf_enabled;
2173
2174 ILO_GPE_VALID_GEN(dev, 6, 6);
2175 assert(num_bufs <= 4);
2176
2177 /*
2178 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2179 *
2180 * "The sum of all four read length fields (each incremented to
2181 * represent the actual read length) must be less than or equal to 32"
2182 */
2183 buf_enabled = gen6_fill_3dstate_constant(dev,
2184 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2185
2186 ilo_cp_begin(cp, cmd_len);
2187 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2188 ilo_cp_write(cp, buf_dw[0]);
2189 ilo_cp_write(cp, buf_dw[1]);
2190 ilo_cp_write(cp, buf_dw[2]);
2191 ilo_cp_write(cp, buf_dw[3]);
2192 ilo_cp_end(cp);
2193 }
2194
2195 static void
2196 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
2197 const uint32_t *bufs, const int *sizes,
2198 int num_bufs,
2199 struct ilo_cp *cp)
2200 {
2201 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2202 const uint8_t cmd_len = 5;
2203 uint32_t buf_dw[4], buf_enabled;
2204
2205 ILO_GPE_VALID_GEN(dev, 6, 6);
2206 assert(num_bufs <= 4);
2207
2208 /*
2209 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2210 *
2211 * "The sum of all four read length fields (each incremented to
2212 * represent the actual read length) must be less than or equal to 64"
2213 */
2214 buf_enabled = gen6_fill_3dstate_constant(dev,
2215 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2216
2217 ilo_cp_begin(cp, cmd_len);
2218 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2219 ilo_cp_write(cp, buf_dw[0]);
2220 ilo_cp_write(cp, buf_dw[1]);
2221 ilo_cp_write(cp, buf_dw[2]);
2222 ilo_cp_write(cp, buf_dw[3]);
2223 ilo_cp_end(cp);
2224 }
2225
2226 static void
2227 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
2228 const uint32_t *bufs, const int *sizes,
2229 int num_bufs,
2230 struct ilo_cp *cp)
2231 {
2232 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2233 const uint8_t cmd_len = 5;
2234 uint32_t buf_dw[4], buf_enabled;
2235
2236 ILO_GPE_VALID_GEN(dev, 6, 6);
2237 assert(num_bufs <= 4);
2238
2239 /*
2240 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2241 *
2242 * "The sum of all four read length fields (each incremented to
2243 * represent the actual read length) must be less than or equal to 64"
2244 */
2245 buf_enabled = gen6_fill_3dstate_constant(dev,
2246 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2247
2248 ilo_cp_begin(cp, cmd_len);
2249 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2250 ilo_cp_write(cp, buf_dw[0]);
2251 ilo_cp_write(cp, buf_dw[1]);
2252 ilo_cp_write(cp, buf_dw[2]);
2253 ilo_cp_write(cp, buf_dw[3]);
2254 ilo_cp_end(cp);
2255 }
2256
2257 static void
2258 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
2259 unsigned sample_mask,
2260 struct ilo_cp *cp)
2261 {
2262 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2263 const uint8_t cmd_len = 2;
2264 const unsigned valid_mask = 0xf;
2265
2266 ILO_GPE_VALID_GEN(dev, 6, 6);
2267
2268 sample_mask &= valid_mask;
2269
2270 ilo_cp_begin(cp, cmd_len);
2271 ilo_cp_write(cp, cmd | (cmd_len - 2));
2272 ilo_cp_write(cp, sample_mask);
2273 ilo_cp_end(cp);
2274 }
2275
2276 static void
2277 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
2278 unsigned x, unsigned y,
2279 unsigned width, unsigned height,
2280 struct ilo_cp *cp)
2281 {
2282 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2283 const uint8_t cmd_len = 4;
2284 unsigned xmax = x + width - 1;
2285 unsigned ymax = y + height - 1;
2286 int rect_limit;
2287
2288 ILO_GPE_VALID_GEN(dev, 6, 7);
2289
2290 if (dev->gen >= ILO_GEN(7)) {
2291 rect_limit = 16383;
2292 }
2293 else {
2294 /*
2295 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2296 *
2297 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2298 * must be an even number"
2299 */
2300 assert(y % 2 == 0);
2301
2302 rect_limit = 8191;
2303 }
2304
2305 if (x > rect_limit) x = rect_limit;
2306 if (y > rect_limit) y = rect_limit;
2307 if (xmax > rect_limit) xmax = rect_limit;
2308 if (ymax > rect_limit) ymax = rect_limit;
2309
2310 ilo_cp_begin(cp, cmd_len);
2311 ilo_cp_write(cp, cmd | (cmd_len - 2));
2312 ilo_cp_write(cp, y << 16 | x);
2313 ilo_cp_write(cp, ymax << 16 | xmax);
2314
2315 /*
2316 * There is no need to set the origin. It is intended to support front
2317 * buffer rendering.
2318 */
2319 ilo_cp_write(cp, 0);
2320
2321 ilo_cp_end(cp);
2322 }
2323
2324 static int
2325 gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
2326 enum pipe_format format,
2327 bool hiz,
2328 bool separate_stencil,
2329 bool *has_depth,
2330 bool *has_stencil)
2331 {
2332 int depth_format;
2333
2334 ILO_GPE_VALID_GEN(dev, 6, 7);
2335
2336 *has_depth = true;
2337 *has_stencil = false;
2338
2339 /*
2340 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2341 *
2342 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2343 * Surface Format of the depth buffer cannot be
2344 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2345 * requires the separate stencil buffer."
2346 *
2347 * From the Ironlake PRM, volume 2 part 1, page 330:
2348 *
2349 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2350 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2351 *
2352 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2353 * is indeed used, the depth values output by the fragment shaders will
2354 * be different when read back.
2355 *
2356 * As for GEN7+, separate_stencil_buffer is always true.
2357 */
2358 switch (format) {
2359 case PIPE_FORMAT_Z16_UNORM:
2360 depth_format = BRW_DEPTHFORMAT_D16_UNORM;
2361 break;
2362 case PIPE_FORMAT_Z32_FLOAT:
2363 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2364 break;
2365 case PIPE_FORMAT_Z24X8_UNORM:
2366 depth_format = (separate_stencil) ?
2367 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2368 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2369 break;
2370 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2371 depth_format = (separate_stencil) ?
2372 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2373 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2374 *has_stencil = true;
2375 break;
2376 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2377 depth_format = (separate_stencil) ?
2378 BRW_DEPTHFORMAT_D32_FLOAT :
2379 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2380 *has_stencil = true;
2381 break;
2382 case PIPE_FORMAT_S8_UINT:
2383 if (separate_stencil) {
2384 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2385 *has_depth = false;
2386 *has_stencil = true;
2387 break;
2388 }
2389 /* fall through */
2390 default:
2391 assert(!"unsupported depth/stencil format");
2392 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2393 *has_depth = false;
2394 *has_stencil = false;
2395 break;
2396 }
2397
2398 return depth_format;
2399 }
2400
2401 static void
2402 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2403 const struct pipe_surface *surface,
2404 struct ilo_cp *cp)
2405 {
2406 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2407 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2408 const uint8_t cmd_len = 7;
2409 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
2410 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
2411 const bool hiz = false;
2412 struct ilo_texture *tex;
2413 uint32_t dw1, dw3, dw4, dw6;
2414 uint32_t slice_offset, x_offset, y_offset;
2415 int surface_type, depth_format;
2416 unsigned lod, first_layer, num_layers;
2417 unsigned width, height, depth;
2418 bool separate_stencil, has_depth, has_stencil;
2419
2420 ILO_GPE_VALID_GEN(dev, 6, 7);
2421
2422 if (dev->gen >= ILO_GEN(7)) {
2423 separate_stencil = true;
2424 }
2425 else {
2426 /*
2427 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2428 *
2429 * "This field (Separate Stencil Buffer Enable) must be set to the
2430 * same value (enabled or disabled) as Hierarchical Depth Buffer
2431 * Enable."
2432 */
2433 separate_stencil = hiz;
2434 }
2435
2436 if (surface) {
2437 depth_format = gen6_get_depth_buffer_format(dev,
2438 surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
2439 }
2440 else {
2441 has_depth = false;
2442 has_stencil = false;
2443 }
2444
2445 if (!has_depth && !has_stencil) {
2446 dw1 = BRW_SURFACE_NULL << 29 |
2447 BRW_DEPTHFORMAT_D32_FLOAT << 18;
2448
2449 /* Y-tiled */
2450 if (dev->gen == ILO_GEN(6)) {
2451 dw1 |= 1 << 27 |
2452 1 << 26;
2453 }
2454
2455 ilo_cp_begin(cp, cmd_len);
2456 ilo_cp_write(cp, cmd | (cmd_len - 2));
2457 ilo_cp_write(cp, dw1);
2458 ilo_cp_write(cp, 0);
2459 ilo_cp_write(cp, 0);
2460 ilo_cp_write(cp, 0);
2461 ilo_cp_write(cp, 0);
2462 ilo_cp_write(cp, 0);
2463 ilo_cp_end(cp);
2464
2465 return;
2466 }
2467
2468 tex = ilo_texture(surface->texture);
2469
2470 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
2471 lod = surface->u.tex.level;
2472 first_layer = surface->u.tex.first_layer;
2473 num_layers = surface->u.tex.last_layer - first_layer + 1;
2474
2475 width = tex->base.width0;
2476 height = tex->base.height0;
2477 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
2478 tex->base.depth0 : num_layers;
2479
2480 if (surface_type == BRW_SURFACE_CUBE) {
2481 /*
2482 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2483 *
2484 * "For Other Surfaces (Cube Surfaces):
2485 * This field (Minimum Array Element) is ignored."
2486 *
2487 * "For Other Surfaces (Cube Surfaces):
2488 * This field (Render Target View Extent) is ignored."
2489 *
2490 * As such, we cannot set first_layer and num_layers on cube surfaces.
2491 * To work around that, treat it as a 2D surface.
2492 */
2493 surface_type = BRW_SURFACE_2D;
2494 }
2495
2496 /*
2497 * we always treat the resource as non-mipmapped and set the slice/x/y
2498 * offsets manually
2499 */
2500 if (true) {
2501 /* no layered rendering */
2502 assert(num_layers == 1);
2503
2504 slice_offset = ilo_texture_get_slice_offset(tex,
2505 lod, first_layer, &x_offset, &y_offset);
2506
2507 /*
2508 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2509 *
2510 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2511 * Coordinate Offset X) must be zero to ensure correct alignment"
2512 *
2513 * XXX Skip the check for gen6, which seems to be fine. We need to make
2514 * sure that does not happen eventually.
2515 */
2516 if (dev->gen >= ILO_GEN(7)) {
2517 assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
2518 x_offset &= ~7;
2519 y_offset &= ~7;
2520 }
2521
2522 /* the size of the layer */
2523 width = u_minify(width, lod);
2524 height = u_minify(height, lod);
2525 if (surface_type == BRW_SURFACE_3D)
2526 depth = u_minify(depth, lod);
2527 else
2528 depth = 1;
2529
2530 lod = 0;
2531 first_layer = 0;
2532
2533 width += x_offset;
2534 height += y_offset;
2535
2536 /* we have to treat them as 2D surfaces */
2537 if (surface_type == BRW_SURFACE_CUBE) {
2538 assert(tex->base.width0 == tex->base.height0);
2539 /* we will set slice_offset to point to the single face */
2540 surface_type = BRW_SURFACE_2D;
2541 }
2542 else if (surface_type == BRW_SURFACE_1D && height > 1) {
2543 assert(tex->base.height0 == 1);
2544 surface_type = BRW_SURFACE_2D;
2545 }
2546 }
2547 else {
2548 slice_offset = 0;
2549 x_offset = 0;
2550 y_offset = 0;
2551 }
2552
2553 /* required for GEN6+ */
2554 assert(tex->tiling == INTEL_TILING_Y);
2555 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2556 tex->bo_stride % 128 == 0);
2557 assert(width <= tex->bo_stride);
2558
2559 switch (surface_type) {
2560 case BRW_SURFACE_1D:
2561 assert(width <= max_2d_size && height == 1 &&
2562 depth <= max_array_size);
2563 assert(first_layer < max_array_size - 1 &&
2564 num_layers <= max_array_size);
2565 break;
2566 case BRW_SURFACE_2D:
2567 assert(width <= max_2d_size && height <= max_2d_size &&
2568 depth <= max_array_size);
2569 assert(first_layer < max_array_size - 1 &&
2570 num_layers <= max_array_size);
2571 break;
2572 case BRW_SURFACE_3D:
2573 assert(width <= 2048 && height <= 2048 && depth <= 2048);
2574 assert(first_layer < 2048 && num_layers <= max_array_size);
2575 assert(x_offset == 0 && y_offset == 0);
2576 break;
2577 case BRW_SURFACE_CUBE:
2578 assert(width <= max_2d_size && height <= max_2d_size && depth == 1);
2579 assert(first_layer == 0 && num_layers == 1);
2580 assert(width == height);
2581 assert(x_offset == 0 && y_offset == 0);
2582 break;
2583 default:
2584 assert(!"unexpected depth surface type");
2585 break;
2586 }
2587
2588 dw1 = surface_type << 29 |
2589 depth_format << 18 |
2590 (tex->bo_stride - 1);
2591
2592 if (dev->gen >= ILO_GEN(7)) {
2593 if (has_depth)
2594 dw1 |= 1 << 28;
2595
2596 if (has_stencil)
2597 dw1 |= 1 << 27;
2598
2599 if (hiz)
2600 dw1 |= 1 << 22;
2601
2602 dw3 = (height - 1) << 18 |
2603 (width - 1) << 4 |
2604 lod;
2605
2606 dw4 = (depth - 1) << 21 |
2607 first_layer << 10;
2608
2609 dw6 = (num_layers - 1) << 21;
2610 }
2611 else {
2612 dw1 |= (tex->tiling != INTEL_TILING_NONE) << 27 |
2613 (tex->tiling == INTEL_TILING_Y) << 26;
2614
2615 if (hiz) {
2616 dw1 |= 1 << 22 |
2617 1 << 21;
2618 }
2619
2620 dw3 = (height - 1) << 19 |
2621 (width - 1) << 6 |
2622 lod << 2 |
2623 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2624
2625 dw4 = (depth - 1) << 21 |
2626 first_layer << 10 |
2627 (num_layers - 1) << 1;
2628
2629 dw6 = 0;
2630 }
2631
2632 ilo_cp_begin(cp, cmd_len);
2633 ilo_cp_write(cp, cmd | (cmd_len - 2));
2634 ilo_cp_write(cp, dw1);
2635
2636 if (has_depth) {
2637 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2638 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2639 }
2640 else {
2641 ilo_cp_write(cp, 0);
2642 }
2643
2644 ilo_cp_write(cp, dw3);
2645 ilo_cp_write(cp, dw4);
2646 ilo_cp_write(cp, y_offset << 16 | x_offset);
2647 ilo_cp_write(cp, dw6);
2648 ilo_cp_end(cp);
2649 }
2650
2651 static void
2652 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
2653 int x_offset, int y_offset,
2654 struct ilo_cp *cp)
2655 {
2656 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2657 const uint8_t cmd_len = 2;
2658
2659 ILO_GPE_VALID_GEN(dev, 6, 7);
2660 assert(x_offset >= 0 && x_offset <= 31);
2661 assert(y_offset >= 0 && y_offset <= 31);
2662
2663 ilo_cp_begin(cp, cmd_len);
2664 ilo_cp_write(cp, cmd | (cmd_len - 2));
2665 ilo_cp_write(cp, x_offset << 8 | y_offset);
2666 ilo_cp_end(cp);
2667 }
2668
2669 static void
2670 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
2671 const struct pipe_poly_stipple *pattern,
2672 struct ilo_cp *cp)
2673 {
2674 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2675 const uint8_t cmd_len = 33;
2676 int i;
2677
2678 ILO_GPE_VALID_GEN(dev, 6, 7);
2679 assert(Elements(pattern->stipple) == 32);
2680
2681 ilo_cp_begin(cp, cmd_len);
2682 ilo_cp_write(cp, cmd | (cmd_len - 2));
2683 for (i = 0; i < 32; i++)
2684 ilo_cp_write(cp, pattern->stipple[i]);
2685 ilo_cp_end(cp);
2686 }
2687
2688 static void
2689 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
2690 unsigned pattern, unsigned factor,
2691 struct ilo_cp *cp)
2692 {
2693 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2694 const uint8_t cmd_len = 3;
2695 unsigned inverse;
2696
2697 ILO_GPE_VALID_GEN(dev, 6, 7);
2698 assert((pattern & 0xffff) == pattern);
2699 assert(factor >= 1 && factor <= 256);
2700
2701 ilo_cp_begin(cp, cmd_len);
2702 ilo_cp_write(cp, cmd | (cmd_len - 2));
2703 ilo_cp_write(cp, pattern);
2704
2705 if (dev->gen >= ILO_GEN(7)) {
2706 /* in U1.16 */
2707 inverse = (unsigned) (65536.0f / factor);
2708 ilo_cp_write(cp, inverse << 15 | factor);
2709 }
2710 else {
2711 /* in U1.13 */
2712 inverse = (unsigned) (8192.0f / factor);
2713 ilo_cp_write(cp, inverse << 16 | factor);
2714 }
2715
2716 ilo_cp_end(cp);
2717 }
2718
2719 static void
2720 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
2721 struct ilo_cp *cp)
2722 {
2723 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2724 const uint8_t cmd_len = 3;
2725
2726 ILO_GPE_VALID_GEN(dev, 6, 7);
2727
2728 ilo_cp_begin(cp, cmd_len);
2729 ilo_cp_write(cp, cmd | (cmd_len - 2));
2730 ilo_cp_write(cp, 0 << 16 | 0);
2731 ilo_cp_write(cp, 0 << 16 | 0);
2732 ilo_cp_end(cp);
2733 }
2734
2735 static void
2736 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
2737 int index, unsigned svbi,
2738 unsigned max_svbi,
2739 bool load_vertex_count,
2740 struct ilo_cp *cp)
2741 {
2742 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2743 const uint8_t cmd_len = 4;
2744 uint32_t dw1;
2745
2746 ILO_GPE_VALID_GEN(dev, 6, 6);
2747 assert(index >= 0 && index < 4);
2748
2749 dw1 = index << SVB_INDEX_SHIFT;
2750 if (load_vertex_count)
2751 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2752
2753 ilo_cp_begin(cp, cmd_len);
2754 ilo_cp_write(cp, cmd | (cmd_len - 2));
2755 ilo_cp_write(cp, dw1);
2756 ilo_cp_write(cp, svbi);
2757 ilo_cp_write(cp, max_svbi);
2758 ilo_cp_end(cp);
2759 }
2760
2761 static void
2762 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
2763 int num_samples,
2764 const uint32_t *packed_sample_pos,
2765 bool pixel_location_center,
2766 struct ilo_cp *cp)
2767 {
2768 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2769 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
2770 uint32_t dw1, dw2, dw3;
2771
2772 ILO_GPE_VALID_GEN(dev, 6, 7);
2773
2774 dw1 = (pixel_location_center) ?
2775 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2776
2777 switch (num_samples) {
2778 case 0:
2779 case 1:
2780 dw1 |= MS_NUMSAMPLES_1;
2781 dw2 = 0;
2782 dw3 = 0;
2783 break;
2784 case 4:
2785 dw1 |= MS_NUMSAMPLES_4;
2786 dw2 = packed_sample_pos[0];
2787 dw3 = 0;
2788 break;
2789 case 8:
2790 assert(dev->gen >= ILO_GEN(7));
2791 dw1 |= MS_NUMSAMPLES_8;
2792 dw2 = packed_sample_pos[0];
2793 dw3 = packed_sample_pos[1];
2794 break;
2795 default:
2796 assert(!"unsupported sample count");
2797 dw1 |= MS_NUMSAMPLES_1;
2798 dw2 = 0;
2799 dw3 = 0;
2800 break;
2801 }
2802
2803 ilo_cp_begin(cp, cmd_len);
2804 ilo_cp_write(cp, cmd | (cmd_len - 2));
2805 ilo_cp_write(cp, dw1);
2806 ilo_cp_write(cp, dw2);
2807 if (dev->gen >= ILO_GEN(7))
2808 ilo_cp_write(cp, dw3);
2809 ilo_cp_end(cp);
2810 }
2811
2812 static void
2813 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
2814 const struct pipe_surface *surface,
2815 struct ilo_cp *cp)
2816 {
2817 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2818 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2819 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2820 const uint8_t cmd_len = 3;
2821 struct ilo_texture *tex;
2822 uint32_t slice_offset, x_offset, y_offset;
2823 int pitch;
2824
2825 ILO_GPE_VALID_GEN(dev, 6, 7);
2826
2827 tex = (surface) ? ilo_texture(surface->texture) : NULL;
2828 if (tex && surface->format != PIPE_FORMAT_S8_UINT)
2829 tex = tex->separate_s8;
2830
2831 if (!tex) {
2832 ilo_cp_begin(cp, cmd_len);
2833 ilo_cp_write(cp, cmd | (cmd_len - 2));
2834 ilo_cp_write(cp, 0);
2835 ilo_cp_write(cp, 0);
2836 ilo_cp_end(cp);
2837
2838 return;
2839 }
2840
2841 if (true) {
2842 slice_offset = ilo_texture_get_slice_offset(tex,
2843 surface->u.tex.level, surface->u.tex.first_layer,
2844 &x_offset, &y_offset);
2845 /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
2846 }
2847 else {
2848 slice_offset = 0;
2849 x_offset = 0;
2850 y_offset = 0;
2851 }
2852
2853 /*
2854 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2855 *
2856 * "The pitch must be set to 2x the value computed based on width, as
2857 * the stencil buffer is stored with two rows interleaved."
2858 *
2859 * According to the classic driver, we need to do the same for GEN7+ even
2860 * though the Ivy Bridge PRM does not say anything about it.
2861 */
2862 pitch = 2 * tex->bo_stride;
2863 assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
2864
2865 ilo_cp_begin(cp, cmd_len);
2866 ilo_cp_write(cp, cmd | (cmd_len - 2));
2867 ilo_cp_write(cp, pitch - 1);
2868 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2869 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2870 ilo_cp_end(cp);
2871 }
2872
2873 static void
2874 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2875 const struct pipe_surface *surface,
2876 struct ilo_cp *cp)
2877 {
2878 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2879 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2880 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2881 const uint8_t cmd_len = 3;
2882 const bool hiz = false;
2883 struct ilo_texture *tex;
2884 uint32_t slice_offset;
2885
2886 ILO_GPE_VALID_GEN(dev, 6, 7);
2887
2888 if (!surface || !hiz) {
2889 ilo_cp_begin(cp, cmd_len);
2890 ilo_cp_write(cp, cmd | (cmd_len - 2));
2891 ilo_cp_write(cp, 0);
2892 ilo_cp_write(cp, 0);
2893 ilo_cp_end(cp);
2894
2895 return;
2896 }
2897
2898 tex = ilo_texture(surface->texture);
2899
2900 /* TODO */
2901 slice_offset = 0;
2902
2903 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2904 tex->bo_stride % 128 == 0);
2905
2906 ilo_cp_begin(cp, cmd_len);
2907 ilo_cp_write(cp, cmd | (cmd_len - 2));
2908 ilo_cp_write(cp, tex->bo_stride - 1);
2909 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2910 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2911 ilo_cp_end(cp);
2912 }
2913
2914 static void
2915 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
2916 uint32_t clear_val,
2917 struct ilo_cp *cp)
2918 {
2919 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2920 const uint8_t cmd_len = 2;
2921
2922 ILO_GPE_VALID_GEN(dev, 6, 6);
2923
2924 ilo_cp_begin(cp, cmd_len);
2925 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2926 GEN5_DEPTH_CLEAR_VALID);
2927 ilo_cp_write(cp, clear_val);
2928 ilo_cp_end(cp);
2929 }
2930
2931 static void
2932 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
2933 uint32_t dw1,
2934 struct intel_bo *bo, uint32_t bo_offset,
2935 bool write_qword,
2936 struct ilo_cp *cp)
2937 {
2938 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
2939 const uint8_t cmd_len = (write_qword) ? 5 : 4;
2940 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
2941 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
2942
2943 ILO_GPE_VALID_GEN(dev, 6, 7);
2944
2945 if (dw1 & PIPE_CONTROL_CS_STALL) {
2946 /*
2947 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2948 *
2949 * "1 of the following must also be set (when CS stall is set):
2950 *
2951 * * Depth Cache Flush Enable ([0] of DW1)
2952 * * Stall at Pixel Scoreboard ([1] of DW1)
2953 * * Depth Stall ([13] of DW1)
2954 * * Post-Sync Operation ([13] of DW1)
2955 * * Render Target Cache Flush Enable ([12] of DW1)
2956 * * Notify Enable ([8] of DW1)"
2957 *
2958 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2959 *
2960 * "One of the following must also be set (when CS stall is set):
2961 *
2962 * * Render Target Cache Flush Enable ([12] of DW1)
2963 * * Depth Cache Flush Enable ([0] of DW1)
2964 * * Stall at Pixel Scoreboard ([1] of DW1)
2965 * * Depth Stall ([13] of DW1)
2966 * * Post-Sync Operation ([13] of DW1)"
2967 */
2968 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
2969 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2970 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2971 PIPE_CONTROL_DEPTH_STALL;
2972
2973 /* post-sync op */
2974 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
2975 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2976 PIPE_CONTROL_WRITE_TIMESTAMP;
2977
2978 if (dev->gen == ILO_GEN(6))
2979 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
2980
2981 assert(dw1 & bit_test);
2982 }
2983
2984 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
2985 /*
2986 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2987 *
2988 * "Following bits must be clear (when Depth Stall is set):
2989 *
2990 * * Render Target Cache Flush Enable ([12] of DW1)
2991 * * Depth Cache Flush Enable ([0] of DW1)"
2992 */
2993 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
2994 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
2995 }
2996
2997 ilo_cp_begin(cp, cmd_len);
2998 ilo_cp_write(cp, cmd | (cmd_len - 2));
2999 ilo_cp_write(cp, dw1);
3000 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
3001 ilo_cp_write(cp, 0);
3002 if (write_qword)
3003 ilo_cp_write(cp, 0);
3004 ilo_cp_end(cp);
3005 }
3006
3007 static void
3008 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
3009 const struct pipe_draw_info *info,
3010 bool rectlist,
3011 struct ilo_cp *cp)
3012 {
3013 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
3014 const uint8_t cmd_len = 6;
3015 const int prim = (rectlist) ?
3016 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
3017 const int vb_access = (info->indexed) ?
3018 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
3019 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
3020
3021 ILO_GPE_VALID_GEN(dev, 6, 6);
3022
3023 ilo_cp_begin(cp, cmd_len);
3024 ilo_cp_write(cp, cmd | (cmd_len - 2) |
3025 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
3026 vb_access);
3027 ilo_cp_write(cp, info->count);
3028 ilo_cp_write(cp, info->start);
3029 ilo_cp_write(cp, info->instance_count);
3030 ilo_cp_write(cp, info->start_instance);
3031 ilo_cp_write(cp, info->index_bias);
3032 ilo_cp_end(cp);
3033 }
3034
3035 static uint32_t
3036 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
3037 const struct ilo_shader **cs,
3038 uint32_t *sampler_state,
3039 int *num_samplers,
3040 uint32_t *binding_table_state,
3041 int *num_surfaces,
3042 int num_ids,
3043 struct ilo_cp *cp)
3044 {
3045 /*
3046 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
3047 *
3048 * "(Interface Descriptor Total Length) This field must have the same
3049 * alignment as the Interface Descriptor Data Start Address.
3050 *
3051 * It must be DQWord (32-byte) aligned..."
3052 *
3053 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
3054 *
3055 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
3056 * aligned address of the Interface Descriptor data."
3057 */
3058 const int state_align = 32 / 4;
3059 const int state_len = (32 / 4) * num_ids;
3060 uint32_t state_offset, *dw;
3061 int i;
3062
3063 ILO_GPE_VALID_GEN(dev, 6, 6);
3064
3065 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
3066 state_len, state_align, &state_offset);
3067
3068 for (i = 0; i < num_ids; i++) {
3069 int curbe_read_len;
3070
3071 curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
3072
3073 dw[0] = cs[i]->cache_offset;
3074 dw[1] = 1 << 18; /* SPF */
3075 dw[2] = sampler_state[i] |
3076 (num_samplers[i] + 3) / 4 << 2;
3077 dw[3] = binding_table_state[i] |
3078 num_surfaces[i];
3079 dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
3080 0; /* CURBE Read Offset */
3081 dw[5] = 0; /* Barrier ID */
3082 dw[6] = 0;
3083 dw[7] = 0;
3084
3085 dw += 8;
3086 }
3087
3088 return state_offset;
3089 }
3090
3091 static void
3092 viewport_get_guardband(const struct ilo_dev_info *dev,
3093 int center_x, int center_y,
3094 int *min_gbx, int *max_gbx,
3095 int *min_gby, int *max_gby)
3096 {
3097 /*
3098 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
3099 *
3100 * "Per-Device Guardband Extents
3101 *
3102 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
3103 * - Maximum Post-Clamp Delta (X or Y): 16K"
3104 *
3105 * "In addition, in order to be correctly rendered, objects must have a
3106 * screenspace bounding box not exceeding 8K in the X or Y direction.
3107 * This additional restriction must also be comprehended by software,
3108 * i.e., enforced by use of clipping."
3109 *
3110 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
3111 *
3112 * "Per-Device Guardband Extents
3113 *
3114 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3115 * - Maximum Post-Clamp Delta (X or Y): N/A"
3116 *
3117 * "In addition, in order to be correctly rendered, objects must have a
3118 * screenspace bounding box not exceeding 8K in the X or Y direction.
3119 * This additional restriction must also be comprehended by software,
3120 * i.e., enforced by use of clipping."
3121 *
3122 * Combined, the bounding box of any object can not exceed 8K in both
3123 * width and height.
3124 *
3125 * Below we set the guardband as a squre of length 8K, centered at where
3126 * the viewport is. This makes sure all objects passing the GB test are
3127 * valid to the renderer, and those failing the XY clipping have a
3128 * better chance of passing the GB test.
3129 */
3130 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
3131 const int half_len = 8192 / 2;
3132
3133 /* make sure the guardband is within the valid range */
3134 if (center_x - half_len < -max_extent)
3135 center_x = -max_extent + half_len;
3136 else if (center_x + half_len > max_extent - 1)
3137 center_x = max_extent - half_len;
3138
3139 if (center_y - half_len < -max_extent)
3140 center_y = -max_extent + half_len;
3141 else if (center_y + half_len > max_extent - 1)
3142 center_y = max_extent - half_len;
3143
3144 *min_gbx = (float) (center_x - half_len);
3145 *max_gbx = (float) (center_x + half_len);
3146 *min_gby = (float) (center_y - half_len);
3147 *max_gby = (float) (center_y + half_len);
3148 }
3149
3150 void
3151 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
3152 const struct pipe_viewport_state *state,
3153 struct ilo_viewport_cso *vp)
3154 {
3155 const float scale_x = fabs(state->scale[0]);
3156 const float scale_y = fabs(state->scale[1]);
3157 const float scale_z = fabs(state->scale[2]);
3158 int min_gbx, max_gbx, min_gby, max_gby;
3159
3160 ILO_GPE_VALID_GEN(dev, 6, 7);
3161
3162 viewport_get_guardband(dev,
3163 (int) state->translate[0],
3164 (int) state->translate[1],
3165 &min_gbx, &max_gbx, &min_gby, &max_gby);
3166
3167 /* matrix form */
3168 vp->m00 = state->scale[0];
3169 vp->m11 = state->scale[1];
3170 vp->m22 = state->scale[2];
3171 vp->m30 = state->translate[0];
3172 vp->m31 = state->translate[1];
3173 vp->m32 = state->translate[2];
3174
3175 /* guardband in NDC space */
3176 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
3177 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
3178 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
3179 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
3180
3181 /* viewport in screen space */
3182 vp->min_x = scale_x * -1.0f + state->translate[0];
3183 vp->max_x = scale_x * 1.0f + state->translate[0];
3184 vp->min_y = scale_y * -1.0f + state->translate[1];
3185 vp->max_y = scale_y * 1.0f + state->translate[1];
3186 vp->min_z = scale_z * -1.0f + state->translate[2];
3187 vp->max_z = scale_z * 1.0f + state->translate[2];
3188 }
3189
3190 static uint32_t
3191 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
3192 const struct ilo_viewport_cso *viewports,
3193 unsigned num_viewports,
3194 struct ilo_cp *cp)
3195 {
3196 const int state_align = 32 / 4;
3197 const int state_len = 8 * num_viewports;
3198 uint32_t state_offset, *dw;
3199 unsigned i;
3200
3201 ILO_GPE_VALID_GEN(dev, 6, 6);
3202
3203 /*
3204 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3205 *
3206 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3207 * stored as an array of up to 16 elements..."
3208 */
3209 assert(num_viewports && num_viewports <= 16);
3210
3211 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3212 state_len, state_align, &state_offset);
3213
3214 for (i = 0; i < num_viewports; i++) {
3215 const struct ilo_viewport_cso *vp = &viewports[i];
3216
3217 dw[0] = fui(vp->m00);
3218 dw[1] = fui(vp->m11);
3219 dw[2] = fui(vp->m22);
3220 dw[3] = fui(vp->m30);
3221 dw[4] = fui(vp->m31);
3222 dw[5] = fui(vp->m32);
3223 dw[6] = 0;
3224 dw[7] = 0;
3225
3226 dw += 8;
3227 }
3228
3229 return state_offset;
3230 }
3231
3232 static uint32_t
3233 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
3234 const struct ilo_viewport_cso *viewports,
3235 unsigned num_viewports,
3236 struct ilo_cp *cp)
3237 {
3238 const int state_align = 32 / 4;
3239 const int state_len = 4 * num_viewports;
3240 uint32_t state_offset, *dw;
3241 unsigned i;
3242
3243 ILO_GPE_VALID_GEN(dev, 6, 6);
3244
3245 /*
3246 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3247 *
3248 * "The viewport-related state is stored as an array of up to 16
3249 * elements..."
3250 */
3251 assert(num_viewports && num_viewports <= 16);
3252
3253 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3254 state_len, state_align, &state_offset);
3255
3256 for (i = 0; i < num_viewports; i++) {
3257 const struct ilo_viewport_cso *vp = &viewports[i];
3258
3259 dw[0] = fui(vp->min_gbx);
3260 dw[1] = fui(vp->max_gbx);
3261 dw[2] = fui(vp->min_gby);
3262 dw[3] = fui(vp->max_gby);
3263
3264 dw += 4;
3265 }
3266
3267 return state_offset;
3268 }
3269
3270 static uint32_t
3271 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
3272 const struct ilo_viewport_cso *viewports,
3273 unsigned num_viewports,
3274 struct ilo_cp *cp)
3275 {
3276 const int state_align = 32 / 4;
3277 const int state_len = 2 * num_viewports;
3278 uint32_t state_offset, *dw;
3279 unsigned i;
3280
3281 ILO_GPE_VALID_GEN(dev, 6, 7);
3282
3283 /*
3284 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3285 *
3286 * "The viewport state is stored as an array of up to 16 elements..."
3287 */
3288 assert(num_viewports && num_viewports <= 16);
3289
3290 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3291 state_len, state_align, &state_offset);
3292
3293 for (i = 0; i < num_viewports; i++) {
3294 const struct ilo_viewport_cso *vp = &viewports[i];
3295
3296 dw[0] = fui(vp->min_z);
3297 dw[1] = fui(vp->max_z);
3298
3299 dw += 2;
3300 }
3301
3302 return state_offset;
3303 }
3304
3305 static uint32_t
3306 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
3307 const struct pipe_stencil_ref *stencil_ref,
3308 float alpha_ref,
3309 const struct pipe_blend_color *blend_color,
3310 struct ilo_cp *cp)
3311 {
3312 const int state_align = 64 / 4;
3313 const int state_len = 6;
3314 uint32_t state_offset, *dw;
3315
3316 ILO_GPE_VALID_GEN(dev, 6, 7);
3317
3318 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3319 state_len, state_align, &state_offset);
3320
3321 dw[0] = stencil_ref->ref_value[0] << 24 |
3322 stencil_ref->ref_value[1] << 16 |
3323 BRW_ALPHATEST_FORMAT_UNORM8;
3324 dw[1] = float_to_ubyte(alpha_ref);
3325 dw[2] = fui(blend_color->color[0]);
3326 dw[3] = fui(blend_color->color[1]);
3327 dw[4] = fui(blend_color->color[2]);
3328 dw[5] = fui(blend_color->color[3]);
3329
3330 return state_offset;
3331 }
3332
3333 static int
3334 gen6_blend_factor_dst_alpha_forced_one(int factor)
3335 {
3336 switch (factor) {
3337 case BRW_BLENDFACTOR_DST_ALPHA:
3338 return BRW_BLENDFACTOR_ONE;
3339 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3340 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3341 return BRW_BLENDFACTOR_ZERO;
3342 default:
3343 return factor;
3344 }
3345 }
3346
3347 static uint32_t
3348 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
3349 const struct pipe_rt_blend_state *rt,
3350 bool dst_alpha_forced_one)
3351 {
3352 int rgb_src, rgb_dst, a_src, a_dst;
3353 uint32_t dw;
3354
3355 if (!rt->blend_enable)
3356 return 0;
3357
3358 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3359 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3360 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3361 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3362
3363 if (dst_alpha_forced_one) {
3364 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3365 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3366 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3367 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3368 }
3369
3370 dw = 1 << 31 |
3371 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3372 a_src << 20 |
3373 a_dst << 15 |
3374 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3375 rgb_src << 5 |
3376 rgb_dst;
3377
3378 if (rt->rgb_func != rt->alpha_func ||
3379 rgb_src != a_src || rgb_dst != a_dst)
3380 dw |= 1 << 30;
3381
3382 return dw;
3383 }
3384
3385 void
3386 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
3387 const struct pipe_blend_state *state,
3388 struct ilo_blend_state *blend)
3389 {
3390 unsigned num_cso, i;
3391
3392 ILO_GPE_VALID_GEN(dev, 6, 7);
3393
3394 if (state->independent_blend_enable) {
3395 num_cso = Elements(blend->cso);
3396 }
3397 else {
3398 memset(blend->cso, 0, sizeof(blend->cso));
3399 num_cso = 1;
3400 }
3401
3402 blend->independent_blend_enable = state->independent_blend_enable;
3403 blend->alpha_to_coverage = state->alpha_to_coverage;
3404 blend->dual_blend = false;
3405
3406 for (i = 0; i < num_cso; i++) {
3407 const struct pipe_rt_blend_state *rt = &state->rt[i];
3408 struct ilo_blend_cso *cso = &blend->cso[i];
3409 bool dual_blend;
3410
3411 cso->payload[0] = 0;
3412 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
3413 0x3;
3414
3415 if (!(rt->colormask & PIPE_MASK_A))
3416 cso->payload[1] |= 1 << 27;
3417 if (!(rt->colormask & PIPE_MASK_R))
3418 cso->payload[1] |= 1 << 26;
3419 if (!(rt->colormask & PIPE_MASK_G))
3420 cso->payload[1] |= 1 << 25;
3421 if (!(rt->colormask & PIPE_MASK_B))
3422 cso->payload[1] |= 1 << 24;
3423
3424 if (state->dither)
3425 cso->payload[1] |= 1 << 12;
3426
3427 /*
3428 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3429 *
3430 * "Color Buffer Blending and Logic Ops must not be enabled
3431 * simultaneously, or behavior is UNDEFINED."
3432 *
3433 * Since state->logicop_enable takes precedence over rt->blend_enable,
3434 * no special care is needed.
3435 */
3436 if (state->logicop_enable) {
3437 cso->dw_logicop = 1 << 22 |
3438 gen6_translate_pipe_logicop(state->logicop_func) << 18;
3439
3440 cso->dw_blend = 0;
3441 cso->dw_blend_dst_alpha_forced_one = 0;
3442
3443 dual_blend = false;
3444 }
3445 else {
3446 cso->dw_logicop = 0;
3447
3448 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
3449 cso->dw_blend_dst_alpha_forced_one =
3450 blend_get_rt_blend_enable(dev, rt, true);
3451
3452 dual_blend = (rt->blend_enable &&
3453 util_blend_state_is_dual(state, i));
3454 }
3455
3456 cso->dw_alpha_mod = 0;
3457
3458 if (state->alpha_to_coverage) {
3459 cso->dw_alpha_mod |= 1 << 31;
3460
3461 if (dev->gen >= ILO_GEN(7))
3462 cso->dw_alpha_mod |= 1 << 29;
3463 }
3464
3465 /*
3466 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3467 *
3468 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
3469 * must be disabled."
3470 */
3471 if (state->alpha_to_one && !dual_blend)
3472 cso->dw_alpha_mod |= 1 << 30;
3473
3474 if (dual_blend)
3475 blend->dual_blend = true;
3476 }
3477 }
3478
3479 static uint32_t
3480 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
3481 const struct ilo_blend_state *blend,
3482 const struct ilo_fb_state *fb,
3483 const struct pipe_alpha_state *alpha,
3484 struct ilo_cp *cp)
3485 {
3486 const int state_align = 64 / 4;
3487 int state_len;
3488 uint32_t state_offset, *dw;
3489 unsigned num_targets, i;
3490
3491 ILO_GPE_VALID_GEN(dev, 6, 7);
3492
3493 /*
3494 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3495 *
3496 * "The blend state is stored as an array of up to 8 elements..."
3497 */
3498 num_targets = fb->state.nr_cbufs;
3499 assert(num_targets <= 8);
3500
3501 if (!num_targets) {
3502 if (!alpha->enabled)
3503 return 0;
3504 /* to be able to reference alpha func */
3505 num_targets = 1;
3506 }
3507
3508 state_len = 2 * num_targets;
3509
3510 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3511 state_len, state_align, &state_offset);
3512
3513 for (i = 0; i < num_targets; i++) {
3514 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
3515 const struct ilo_blend_cso *cso = &blend->cso[idx];
3516 const int num_samples = fb->num_samples;
3517 const struct util_format_description *format_desc =
3518 (idx < fb->state.nr_cbufs) ?
3519 util_format_description(fb->state.cbufs[idx]->format) : NULL;
3520 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3521
3522 rt_is_unorm = true;
3523 rt_is_pure_integer = false;
3524 rt_dst_alpha_forced_one = false;
3525
3526 if (format_desc) {
3527 int ch;
3528
3529 switch (format_desc->format) {
3530 case PIPE_FORMAT_B8G8R8X8_UNORM:
3531 /* force alpha to one when the HW format has alpha */
3532 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3533 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3534 rt_dst_alpha_forced_one = true;
3535 break;
3536 default:
3537 break;
3538 }
3539
3540 for (ch = 0; ch < 4; ch++) {
3541 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3542 continue;
3543
3544 if (format_desc->channel[ch].pure_integer) {
3545 rt_is_unorm = false;
3546 rt_is_pure_integer = true;
3547 break;
3548 }
3549
3550 if (!format_desc->channel[ch].normalized ||
3551 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3552 rt_is_unorm = false;
3553 }
3554 }
3555
3556 dw[0] = cso->payload[0];
3557 dw[1] = cso->payload[1];
3558
3559 if (!rt_is_pure_integer) {
3560 if (rt_dst_alpha_forced_one)
3561 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
3562 else
3563 dw[0] |= cso->dw_blend;
3564 }
3565
3566 /*
3567 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3568 *
3569 * "Logic Ops are only supported on *_UNORM surfaces (excluding
3570 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3571 *
3572 * Since logicop is ignored for non-UNORM color buffers, no special care
3573 * is needed.
3574 */
3575 if (rt_is_unorm)
3576 dw[1] |= cso->dw_logicop;
3577
3578 /*
3579 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3580 *
3581 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3582 * Dither both must be disabled."
3583 *
3584 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3585 * requires that anyway.
3586 */
3587 if (num_samples > 1)
3588 dw[1] |= cso->dw_alpha_mod;
3589
3590 /*
3591 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3592 *
3593 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3594 * alpha value."
3595 */
3596 if (alpha->enabled && !rt_is_pure_integer) {
3597 dw[1] |= 1 << 16 |
3598 gen6_translate_dsa_func(alpha->func) << 13;
3599 }
3600
3601 dw += 2;
3602 }
3603
3604 return state_offset;
3605 }
3606
3607 void
3608 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
3609 const struct pipe_depth_stencil_alpha_state *state,
3610 struct ilo_dsa_state *dsa)
3611 {
3612 const struct pipe_depth_state *depth = &state->depth;
3613 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
3614 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
3615 uint32_t *dw;
3616
3617 ILO_GPE_VALID_GEN(dev, 6, 7);
3618
3619 /* copy alpha state for later use */
3620 dsa->alpha = state->alpha;
3621
3622 STATIC_ASSERT(Elements(dsa->payload) >= 3);
3623 dw = dsa->payload;
3624
3625 /*
3626 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3627 *
3628 * "If the Depth Buffer is either undefined or does not have a surface
3629 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3630 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3631 *
3632 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3633 *
3634 * "This field (Stencil Test Enable) cannot be enabled if
3635 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3636 *
3637 * TODO We do not check these yet.
3638 */
3639 if (stencil0->enabled) {
3640 dw[0] = 1 << 31 |
3641 gen6_translate_dsa_func(stencil0->func) << 28 |
3642 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
3643 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
3644 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
3645 if (stencil0->writemask)
3646 dw[0] |= 1 << 18;
3647
3648 dw[1] = stencil0->valuemask << 24 |
3649 stencil0->writemask << 16;
3650
3651 if (stencil1->enabled) {
3652 dw[0] |= 1 << 15 |
3653 gen6_translate_dsa_func(stencil1->func) << 12 |
3654 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
3655 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
3656 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
3657 if (stencil1->writemask)
3658 dw[0] |= 1 << 18;
3659
3660 dw[1] |= stencil1->valuemask << 8 |
3661 stencil1->writemask;
3662 }
3663 }
3664 else {
3665 dw[0] = 0;
3666 dw[1] = 0;
3667 }
3668
3669 /*
3670 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3671 *
3672 * "Enabling the Depth Test function without defining a Depth Buffer is
3673 * UNDEFINED."
3674 *
3675 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3676 *
3677 * "A Depth Buffer must be defined before enabling writes to it, or
3678 * operation is UNDEFINED."
3679 *
3680 * TODO We do not check these yet.
3681 */
3682 dw[2] = depth->enabled << 31 |
3683 depth->writemask << 26;
3684 if (depth->enabled)
3685 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
3686 else
3687 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3688 }
3689
3690 static uint32_t
3691 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
3692 const struct ilo_dsa_state *dsa,
3693 struct ilo_cp *cp)
3694 {
3695 const int state_align = 64 / 4;
3696 const int state_len = 3;
3697 uint32_t state_offset, *dw;
3698
3699
3700 ILO_GPE_VALID_GEN(dev, 6, 7);
3701
3702 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3703 state_len, state_align, &state_offset);
3704
3705 dw[0] = dsa->payload[0];
3706 dw[1] = dsa->payload[1];
3707 dw[2] = dsa->payload[2];
3708
3709 return state_offset;
3710 }
3711
3712 void
3713 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
3714 unsigned start_slot,
3715 unsigned num_states,
3716 const struct pipe_scissor_state *states,
3717 struct ilo_scissor_state *scissor)
3718 {
3719 unsigned i;
3720
3721 ILO_GPE_VALID_GEN(dev, 6, 7);
3722
3723 for (i = 0; i < num_states; i++) {
3724 uint16_t min_x, min_y, max_x, max_y;
3725
3726 /* both max and min are inclusive in SCISSOR_RECT */
3727 if (states[i].minx < states[i].maxx &&
3728 states[i].miny < states[i].maxy) {
3729 min_x = states[i].minx;
3730 min_y = states[i].miny;
3731 max_x = states[i].maxx - 1;
3732 max_y = states[i].maxy - 1;
3733 }
3734 else {
3735 /* we have to make min greater than max */
3736 min_x = 1;
3737 min_y = 1;
3738 max_x = 0;
3739 max_y = 0;
3740 }
3741
3742 scissor->payload[start_slot * 2 + 0] = min_y << 16 | min_x;
3743 scissor->payload[start_slot * 2 + 1] = max_y << 16 | max_x;
3744 start_slot++;
3745 }
3746 }
3747
3748 void
3749 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
3750 struct ilo_scissor_state *scissor)
3751 {
3752 unsigned i;
3753
3754 for (i = 0; i < Elements(scissor->payload); i += 2) {
3755 scissor->payload[i + 0] = 1 << 16 | 1;
3756 scissor->payload[i + 1] = 0;
3757 }
3758 }
3759
3760 static uint32_t
3761 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
3762 const struct ilo_scissor_state *scissor,
3763 unsigned num_viewports,
3764 struct ilo_cp *cp)
3765 {
3766 const int state_align = 32 / 4;
3767 const int state_len = 2 * num_viewports;
3768 uint32_t state_offset, *dw;
3769
3770 ILO_GPE_VALID_GEN(dev, 6, 7);
3771
3772 /*
3773 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3774 *
3775 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3776 * stored as an array of up to 16 elements..."
3777 */
3778 assert(num_viewports && num_viewports <= 16);
3779
3780 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3781 state_len, state_align, &state_offset);
3782
3783 memcpy(dw, scissor->payload, state_len * 4);
3784
3785 return state_offset;
3786 }
3787
3788 static uint32_t
3789 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
3790 uint32_t *surface_states,
3791 int num_surface_states,
3792 struct ilo_cp *cp)
3793 {
3794 const int state_align = 32 / 4;
3795 const int state_len = num_surface_states;
3796 uint32_t state_offset, *dw;
3797
3798 ILO_GPE_VALID_GEN(dev, 6, 7);
3799
3800 /*
3801 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3802 *
3803 * "It is stored as an array of up to 256 elements..."
3804 */
3805 assert(num_surface_states <= 256);
3806
3807 if (!num_surface_states)
3808 return 0;
3809
3810 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3811 state_len, state_align, &state_offset);
3812 memcpy(dw, surface_states,
3813 num_surface_states * sizeof(surface_states[0]));
3814
3815 return state_offset;
3816 }
3817
3818 void
3819 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
3820 unsigned width, unsigned height,
3821 unsigned depth, unsigned level,
3822 struct ilo_view_surface *surf)
3823 {
3824 uint32_t *dw;
3825
3826 ILO_GPE_VALID_GEN(dev, 6, 6);
3827
3828 /*
3829 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3830 *
3831 * "A null surface will be used in instances where an actual surface is
3832 * not bound. When a write message is generated to a null surface, no
3833 * actual surface is written to. When a read message (including any
3834 * sampling engine message) is generated to a null surface, the result
3835 * is all zeros. Note that a null surface type is allowed to be used
3836 * with all messages, even if it is not specificially indicated as
3837 * supported. All of the remaining fields in surface state are ignored
3838 * for null surfaces, with the following exceptions:
3839 *
3840 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3841 * depth buffer's corresponding state for all render target
3842 * surfaces, including null.
3843 * * Surface Format must be R8G8B8A8_UNORM."
3844 *
3845 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3846 *
3847 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3848 * true"
3849 */
3850
3851 STATIC_ASSERT(Elements(surf->payload) >= 6);
3852 dw = surf->payload;
3853
3854 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3855 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3856
3857 dw[1] = 0;
3858
3859 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3860 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3861 level << BRW_SURFACE_LOD_SHIFT;
3862
3863 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3864 BRW_SURFACE_TILED;
3865
3866 dw[4] = 0;
3867 dw[5] = 0;
3868
3869 surf->bo = NULL;
3870 }
3871
3872 void
3873 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
3874 const struct ilo_buffer *buf,
3875 unsigned offset, unsigned size,
3876 unsigned struct_size,
3877 enum pipe_format elem_format,
3878 bool is_rt, bool render_cache_rw,
3879 struct ilo_view_surface *surf)
3880 {
3881 const int elem_size = util_format_get_blocksize(elem_format);
3882 int width, height, depth, pitch;
3883 int surface_format, num_entries;
3884 uint32_t *dw;
3885
3886 ILO_GPE_VALID_GEN(dev, 6, 6);
3887
3888 /*
3889 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3890 * structure in a buffer.
3891 */
3892
3893 surface_format = ilo_translate_color_format(elem_format);
3894
3895 num_entries = size / struct_size;
3896 /* see if there is enough space to fit another element */
3897 if (size % struct_size >= elem_size)
3898 num_entries++;
3899
3900 /*
3901 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3902 *
3903 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3904 * Address) specifies the base address of first element of the
3905 * surface. The surface is interpreted as a simple array of that
3906 * single element type. The address must be naturally-aligned to the
3907 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3908 * must be 16-byte aligned).
3909 *
3910 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3911 * the base address of the first element of the surface, computed in
3912 * software by adding the surface base address to the byte offset of
3913 * the element in the buffer."
3914 */
3915 if (is_rt)
3916 assert(offset % elem_size == 0);
3917
3918 /*
3919 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3920 *
3921 * "For buffer surfaces, the number of entries in the buffer ranges
3922 * from 1 to 2^27."
3923 */
3924 assert(num_entries >= 1 && num_entries <= 1 << 27);
3925
3926 /*
3927 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3928 *
3929 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3930 * indicates the size of the structure."
3931 */
3932 pitch = struct_size;
3933
3934 pitch--;
3935 num_entries--;
3936 /* bits [6:0] */
3937 width = (num_entries & 0x0000007f);
3938 /* bits [19:7] */
3939 height = (num_entries & 0x000fff80) >> 7;
3940 /* bits [26:20] */
3941 depth = (num_entries & 0x07f00000) >> 20;
3942
3943 STATIC_ASSERT(Elements(surf->payload) >= 6);
3944 dw = surf->payload;
3945
3946 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
3947 surface_format << BRW_SURFACE_FORMAT_SHIFT;
3948 if (render_cache_rw)
3949 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3950
3951 dw[1] = offset;
3952
3953 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
3954 width << BRW_SURFACE_WIDTH_SHIFT;
3955
3956 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
3957 pitch << BRW_SURFACE_PITCH_SHIFT;
3958
3959 dw[4] = 0;
3960 dw[5] = 0;
3961
3962 /* do not increment reference count */
3963 surf->bo = buf->bo;
3964 }
3965
3966 void
3967 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
3968 const struct ilo_texture *tex,
3969 enum pipe_format format,
3970 unsigned first_level,
3971 unsigned num_levels,
3972 unsigned first_layer,
3973 unsigned num_layers,
3974 bool is_rt, bool render_cache_rw,
3975 struct ilo_view_surface *surf)
3976 {
3977 int surface_type, surface_format;
3978 int width, height, depth, pitch, lod;
3979 unsigned layer_offset, x_offset, y_offset;
3980 uint32_t *dw;
3981
3982 ILO_GPE_VALID_GEN(dev, 6, 6);
3983
3984 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
3985 assert(surface_type != BRW_SURFACE_BUFFER);
3986
3987 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
3988 format = PIPE_FORMAT_Z32_FLOAT;
3989
3990 if (is_rt)
3991 surface_format = ilo_translate_render_format(format);
3992 else
3993 surface_format = ilo_translate_texture_format(format);
3994 assert(surface_format >= 0);
3995
3996 width = tex->base.width0;
3997 height = tex->base.height0;
3998 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
3999 tex->base.depth0 : num_layers;
4000 pitch = tex->bo_stride;
4001
4002 if (surface_type == BRW_SURFACE_CUBE) {
4003 /*
4004 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4005 *
4006 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
4007 * range of this field (Depth) is [0,84], indicating the number of
4008 * cube array elements (equal to the number of underlying 2D array
4009 * elements divided by 6). For other surfaces, this field must be
4010 * zero."
4011 *
4012 * When is_rt is true, we treat the texture as a 2D one to avoid the
4013 * restriction.
4014 */
4015 if (is_rt) {
4016 surface_type = BRW_SURFACE_2D;
4017 }
4018 else {
4019 assert(num_layers % 6 == 0);
4020 depth = num_layers / 6;
4021 }
4022 }
4023
4024 /* sanity check the size */
4025 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
4026 switch (surface_type) {
4027 case BRW_SURFACE_1D:
4028 assert(width <= 8192 && height == 1 && depth <= 512);
4029 assert(first_layer < 512 && num_layers <= 512);
4030 break;
4031 case BRW_SURFACE_2D:
4032 assert(width <= 8192 && height <= 8192 && depth <= 512);
4033 assert(first_layer < 512 && num_layers <= 512);
4034 break;
4035 case BRW_SURFACE_3D:
4036 assert(width <= 2048 && height <= 2048 && depth <= 2048);
4037 assert(first_layer < 2048 && num_layers <= 512);
4038 if (!is_rt)
4039 assert(first_layer == 0);
4040 break;
4041 case BRW_SURFACE_CUBE:
4042 assert(width <= 8192 && height <= 8192 && depth <= 85);
4043 assert(width == height);
4044 assert(first_layer < 512 && num_layers <= 512);
4045 if (is_rt)
4046 assert(first_layer == 0);
4047 break;
4048 default:
4049 assert(!"unexpected surface type");
4050 break;
4051 }
4052
4053 /* non-full array spacing is supported only on GEN7+ */
4054 assert(tex->array_spacing_full);
4055 /* non-interleaved samples are supported only on GEN7+ */
4056 if (tex->base.nr_samples > 1)
4057 assert(tex->interleaved);
4058
4059 if (is_rt) {
4060 /*
4061 * Compute the offset to the layer manually.
4062 *
4063 * For rendering, the hardware requires LOD to be the same for all
4064 * render targets and the depth buffer. We need to compute the offset
4065 * to the layer manually and always set LOD to 0.
4066 */
4067 if (true) {
4068 /* we lose the capability for layered rendering */
4069 assert(num_layers == 1);
4070
4071 layer_offset = ilo_texture_get_slice_offset(tex,
4072 first_level, first_layer, &x_offset, &y_offset);
4073
4074 assert(x_offset % 4 == 0);
4075 assert(y_offset % 2 == 0);
4076 x_offset /= 4;
4077 y_offset /= 2;
4078
4079 /* derive the size for the LOD */
4080 width = u_minify(width, first_level);
4081 height = u_minify(height, first_level);
4082 if (surface_type == BRW_SURFACE_3D)
4083 depth = u_minify(depth, first_level);
4084 else
4085 depth = 1;
4086
4087 first_level = 0;
4088 first_layer = 0;
4089 lod = 0;
4090 }
4091 else {
4092 layer_offset = 0;
4093 x_offset = 0;
4094 y_offset = 0;
4095 }
4096
4097 assert(num_levels == 1);
4098 lod = first_level;
4099 }
4100 else {
4101 layer_offset = 0;
4102 x_offset = 0;
4103 y_offset = 0;
4104
4105 lod = num_levels - 1;
4106 }
4107
4108 /*
4109 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
4110 *
4111 * "Linear render target surface base addresses must be element-size
4112 * aligned, for non-YUV surface formats, or a multiple of 2
4113 * element-sizes for YUV surface formats. Other linear surfaces have
4114 * no alignment requirements (byte alignment is sufficient.)"
4115 *
4116 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4117 *
4118 * "For linear render target surfaces, the pitch must be a multiple
4119 * of the element size for non-YUV surface formats. Pitch must be a
4120 * multiple of 2 * element size for YUV surface formats."
4121 *
4122 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
4123 *
4124 * "For linear surfaces, this field (X Offset) must be zero"
4125 */
4126 if (tex->tiling == INTEL_TILING_NONE) {
4127 if (is_rt) {
4128 const int elem_size = util_format_get_blocksize(format);
4129 assert(layer_offset % elem_size == 0);
4130 assert(pitch % elem_size == 0);
4131 }
4132
4133 assert(!x_offset);
4134 }
4135
4136 STATIC_ASSERT(Elements(surf->payload) >= 6);
4137 dw = surf->payload;
4138
4139 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
4140 surface_format << BRW_SURFACE_FORMAT_SHIFT |
4141 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
4142
4143 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
4144 dw[0] |= 1 << 9 |
4145 BRW_SURFACE_CUBEFACE_ENABLES;
4146 }
4147
4148 if (render_cache_rw)
4149 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
4150
4151 dw[1] = layer_offset;
4152
4153 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
4154 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
4155 lod << BRW_SURFACE_LOD_SHIFT;
4156
4157 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
4158 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
4159 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
4160
4161 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
4162 first_layer << 17 |
4163 (num_layers - 1) << 8 |
4164 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
4165 BRW_SURFACE_MULTISAMPLECOUNT_1);
4166
4167 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
4168 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
4169 if (tex->valign_4)
4170 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
4171
4172 /* do not increment reference count */
4173 surf->bo = tex->bo;
4174 }
4175
4176 static uint32_t
4177 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
4178 const struct ilo_view_surface *surf,
4179 bool for_render,
4180 struct ilo_cp *cp)
4181 {
4182 const int state_align = 32 / 4;
4183 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
4184 uint32_t state_offset;
4185 uint32_t read_domains, write_domain;
4186
4187 ILO_GPE_VALID_GEN(dev, 6, 7);
4188
4189 if (for_render) {
4190 read_domains = INTEL_DOMAIN_RENDER;
4191 write_domain = INTEL_DOMAIN_RENDER;
4192 }
4193 else {
4194 read_domains = INTEL_DOMAIN_SAMPLER;
4195 write_domain = 0;
4196 }
4197
4198 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
4199
4200 STATIC_ASSERT(Elements(surf->payload) >= 8);
4201
4202 ilo_cp_write(cp, surf->payload[0]);
4203 ilo_cp_write_bo(cp, surf->payload[1],
4204 surf->bo, read_domains, write_domain);
4205 ilo_cp_write(cp, surf->payload[2]);
4206 ilo_cp_write(cp, surf->payload[3]);
4207 ilo_cp_write(cp, surf->payload[4]);
4208 ilo_cp_write(cp, surf->payload[5]);
4209
4210 if (dev->gen >= ILO_GEN(7)) {
4211 ilo_cp_write(cp, surf->payload[6]);
4212 ilo_cp_write(cp, surf->payload[7]);
4213 }
4214
4215 ilo_cp_end(cp);
4216
4217 return state_offset;
4218 }
4219
4220 static uint32_t
4221 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
4222 const struct pipe_stream_output_target *so,
4223 const struct pipe_stream_output_info *so_info,
4224 int so_index,
4225 struct ilo_cp *cp)
4226 {
4227 struct ilo_buffer *buf = ilo_buffer(so->buffer);
4228 unsigned bo_offset, struct_size;
4229 enum pipe_format elem_format;
4230 struct ilo_view_surface surf;
4231
4232 ILO_GPE_VALID_GEN(dev, 6, 6);
4233
4234 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
4235 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
4236
4237 switch (so_info->output[so_index].num_components) {
4238 case 1:
4239 elem_format = PIPE_FORMAT_R32_FLOAT;
4240 break;
4241 case 2:
4242 elem_format = PIPE_FORMAT_R32G32_FLOAT;
4243 break;
4244 case 3:
4245 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
4246 break;
4247 case 4:
4248 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4249 break;
4250 default:
4251 assert(!"unexpected SO components length");
4252 elem_format = PIPE_FORMAT_R32_FLOAT;
4253 break;
4254 }
4255
4256 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
4257 struct_size, elem_format, false, true, &surf);
4258
4259 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
4260 }
4261
4262 static void
4263 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
4264 const union pipe_color_union *color,
4265 uint32_t *dw, int num_dwords)
4266 {
4267 float rgba[4] = {
4268 color->f[0], color->f[1], color->f[2], color->f[3],
4269 };
4270
4271 ILO_GPE_VALID_GEN(dev, 6, 6);
4272
4273 assert(num_dwords >= 12);
4274
4275 /*
4276 * This state is not documented in the Sandy Bridge PRM, but in the
4277 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4278 */
4279
4280 /* IEEE_FP */
4281 dw[1] = fui(rgba[0]);
4282 dw[2] = fui(rgba[1]);
4283 dw[3] = fui(rgba[2]);
4284 dw[4] = fui(rgba[3]);
4285
4286 /* FLOAT_16 */
4287 dw[5] = util_float_to_half(rgba[0]) |
4288 util_float_to_half(rgba[1]) << 16;
4289 dw[6] = util_float_to_half(rgba[2]) |
4290 util_float_to_half(rgba[3]) << 16;
4291
4292 /* clamp to [-1.0f, 1.0f] */
4293 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4294 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4295 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4296 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4297
4298 /* SNORM16 */
4299 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4300 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4301 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4302 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4303
4304 /* SNORM8 */
4305 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4306 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4307 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4308 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4309
4310 /* clamp to [0.0f, 1.0f] */
4311 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4312 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4313 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4314 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4315
4316 /* UNORM8 */
4317 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4318 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4319 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4320 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4321
4322 /* UNORM16 */
4323 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4324 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4325 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4326 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4327 }
4328
4329 void
4330 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
4331 const struct pipe_sampler_state *state,
4332 struct ilo_sampler_cso *sampler)
4333 {
4334 int mip_filter, min_filter, mag_filter, max_aniso;
4335 int lod_bias, max_lod, min_lod;
4336 int wrap_s, wrap_t, wrap_r, wrap_cube;
4337 bool clamp_is_to_edge;
4338 uint32_t dw0, dw1, dw3;
4339
4340 ILO_GPE_VALID_GEN(dev, 6, 7);
4341
4342 memset(sampler, 0, sizeof(*sampler));
4343
4344 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
4345 min_filter = gen6_translate_tex_filter(state->min_img_filter);
4346 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
4347
4348 sampler->anisotropic = state->max_anisotropy;
4349
4350 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
4351 max_aniso = state->max_anisotropy / 2 - 1;
4352 else if (state->max_anisotropy > 16)
4353 max_aniso = BRW_ANISORATIO_16;
4354 else
4355 max_aniso = BRW_ANISORATIO_2;
4356
4357 /*
4358 *
4359 * Here is how the hardware calculate per-pixel LOD, from my reading of the
4360 * PRMs:
4361 *
4362 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4363 * other ways. The number of texels is measured using level
4364 * SurfMinLod.
4365 * 2) Bias is added to LOD.
4366 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4367 * compared with Base to determine whether magnification or
4368 * minification is needed. (if preclamp is disabled, LOD is compared
4369 * with Base before clamping)
4370 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4371 * set to floor(MinLod).
4372 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4373 *
4374 * With Gallium interface, Base is always zero and
4375 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
4376 */
4377 if (dev->gen >= ILO_GEN(7)) {
4378 const float scale = 256.0f;
4379
4380 /* [-16.0, 16.0) in S4.8 */
4381 lod_bias = (int)
4382 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4383 lod_bias &= 0x1fff;
4384
4385 /* [0.0, 14.0] in U4.8 */
4386 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
4387 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
4388 }
4389 else {
4390 const float scale = 64.0f;
4391
4392 /* [-16.0, 16.0) in S4.6 */
4393 lod_bias = (int)
4394 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4395 lod_bias &= 0x7ff;
4396
4397 /* [0.0, 13.0] in U4.6 */
4398 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
4399 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
4400 }
4401
4402 /*
4403 * We want LOD to be clamped to determine magnification/minification, and
4404 * get set to zero when it is magnification or when mipmapping is disabled.
4405 * The hardware would set LOD to floor(MinLod) and that is a problem when
4406 * MinLod is greater than or equal to 1.0f.
4407 *
4408 * With Base being zero, it is always minification when MinLod is non-zero.
4409 * To achieve our goal, we just need to set MinLod to zero and set
4410 * MagFilter to MinFilter when mipmapping is disabled.
4411 */
4412 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4413 min_lod = 0;
4414 mag_filter = min_filter;
4415 }
4416
4417 /*
4418 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4419 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
4420 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
4421 * texture coordinates to [0.0, 1.0].
4422 *
4423 * The clamping will be taken care of in the shaders. There are two
4424 * filters here, but let the minification one has a say.
4425 */
4426 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4427 if (!clamp_is_to_edge) {
4428 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
4429 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
4430 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
4431 }
4432
4433 /* determine wrap s/t/r */
4434 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
4435 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
4436 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
4437
4438 /*
4439 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4440 *
4441 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
4442 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
4443 * must have the same Address Control mode."
4444 *
4445 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4446 *
4447 * "This field (Cube Surface Control Mode) must be set to
4448 * CUBECTRLMODE_PROGRAMMED"
4449 *
4450 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
4451 * map filtering.
4452 */
4453 if (state->seamless_cube_map &&
4454 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4455 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4456 wrap_cube = BRW_TEXCOORDMODE_CUBE;
4457 }
4458 else {
4459 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
4460 }
4461
4462 if (!state->normalized_coords) {
4463 /*
4464 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4465 *
4466 * "The following state must be set as indicated if this field
4467 * (Non-normalized Coordinate Enable) is enabled:
4468 *
4469 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4470 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4471 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4472 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4473 * MAPFILTER_LINEAR.
4474 * - Min Mode Filter must be MAPFILTER_NEAREST or
4475 * MAPFILTER_LINEAR.
4476 * - Mip Mode Filter must be MIPFILTER_NONE.
4477 * - Min LOD must be 0.
4478 * - Max LOD must be 0.
4479 * - MIP Count must be 0.
4480 * - Surface Min LOD must be 0.
4481 * - Texture LOD Bias must be 0."
4482 */
4483 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4484 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4485 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4486 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4487 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4488 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4489
4490 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4491 mag_filter == BRW_MAPFILTER_LINEAR);
4492 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4493 min_filter == BRW_MAPFILTER_LINEAR);
4494
4495 /* work around a bug in util_blitter */
4496 mip_filter = BRW_MIPFILTER_NONE;
4497
4498 assert(mip_filter == BRW_MIPFILTER_NONE);
4499 }
4500
4501 if (dev->gen >= ILO_GEN(7)) {
4502 dw0 = 1 << 28 |
4503 mip_filter << 20 |
4504 lod_bias << 1;
4505
4506 sampler->dw_filter = mag_filter << 17 |
4507 min_filter << 14;
4508
4509 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4510 BRW_MAPFILTER_ANISOTROPIC << 14 |
4511 1;
4512
4513 dw1 = min_lod << 20 |
4514 max_lod << 8;
4515
4516 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4517 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
4518
4519 dw3 = max_aniso << 19;
4520
4521 /* round the coordinates for linear filtering */
4522 if (min_filter != BRW_MAPFILTER_NEAREST) {
4523 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4524 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4525 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4526 }
4527 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4528 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4529 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4530 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4531 }
4532
4533 if (!state->normalized_coords)
4534 dw3 |= 1 << 10;
4535
4536 sampler->dw_wrap = wrap_s << 6 |
4537 wrap_t << 3 |
4538 wrap_r;
4539
4540 /*
4541 * As noted in the classic i965 driver, the HW may still reference
4542 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
4543 * mode
4544 */
4545 sampler->dw_wrap_1d = wrap_s << 6 |
4546 BRW_TEXCOORDMODE_WRAP << 3 |
4547 BRW_TEXCOORDMODE_WRAP;
4548
4549 sampler->dw_wrap_cube = wrap_cube << 6 |
4550 wrap_cube << 3 |
4551 wrap_cube;
4552
4553 STATIC_ASSERT(Elements(sampler->payload) >= 7);
4554
4555 sampler->payload[0] = dw0;
4556 sampler->payload[1] = dw1;
4557 sampler->payload[2] = dw3;
4558
4559 memcpy(&sampler->payload[3],
4560 state->border_color.ui, sizeof(state->border_color.ui));
4561 }
4562 else {
4563 dw0 = 1 << 28 |
4564 mip_filter << 20 |
4565 lod_bias << 3;
4566
4567 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4568 dw0 |= gen6_translate_shadow_func(state->compare_func);
4569
4570 sampler->dw_filter = (min_filter != mag_filter) << 27 |
4571 mag_filter << 17 |
4572 min_filter << 14;
4573
4574 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4575 BRW_MAPFILTER_ANISOTROPIC << 14;
4576
4577 dw1 = min_lod << 22 |
4578 max_lod << 12;
4579
4580 sampler->dw_wrap = wrap_s << 6 |
4581 wrap_t << 3 |
4582 wrap_r;
4583
4584 sampler->dw_wrap_1d = wrap_s << 6 |
4585 BRW_TEXCOORDMODE_WRAP << 3 |
4586 BRW_TEXCOORDMODE_WRAP;
4587
4588 sampler->dw_wrap_cube = wrap_cube << 6 |
4589 wrap_cube << 3 |
4590 wrap_cube;
4591
4592 dw3 = max_aniso << 19;
4593
4594 /* round the coordinates for linear filtering */
4595 if (min_filter != BRW_MAPFILTER_NEAREST) {
4596 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4597 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4598 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4599 }
4600 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4601 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4602 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4603 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4604 }
4605
4606 if (!state->normalized_coords)
4607 dw3 |= 1;
4608
4609 STATIC_ASSERT(Elements(sampler->payload) >= 15);
4610
4611 sampler->payload[0] = dw0;
4612 sampler->payload[1] = dw1;
4613 sampler->payload[2] = dw3;
4614
4615 sampler_init_border_color_gen6(dev,
4616 &state->border_color, &sampler->payload[3], 12);
4617 }
4618 }
4619
4620 static uint32_t
4621 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
4622 const struct ilo_sampler_cso * const *samplers,
4623 const struct pipe_sampler_view * const *views,
4624 const uint32_t *sampler_border_colors,
4625 int num_samplers,
4626 struct ilo_cp *cp)
4627 {
4628 const int state_align = 32 / 4;
4629 const int state_len = 4 * num_samplers;
4630 uint32_t state_offset, *dw;
4631 int i;
4632
4633 ILO_GPE_VALID_GEN(dev, 6, 7);
4634
4635 /*
4636 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4637 *
4638 * "The sampler state is stored as an array of up to 16 elements..."
4639 */
4640 assert(num_samplers <= 16);
4641
4642 if (!num_samplers)
4643 return 0;
4644
4645 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
4646 state_len, state_align, &state_offset);
4647
4648 for (i = 0; i < num_samplers; i++) {
4649 const struct ilo_sampler_cso *sampler = samplers[i];
4650 const struct pipe_sampler_view *view = views[i];
4651 const uint32_t border_color = sampler_border_colors[i];
4652 uint32_t dw_filter, dw_wrap;
4653
4654 /* there may be holes */
4655 if (!sampler || !view) {
4656 /* disabled sampler */
4657 dw[0] = 1 << 31;
4658 dw[1] = 0;
4659 dw[2] = 0;
4660 dw[3] = 0;
4661 dw += 4;
4662
4663 continue;
4664 }
4665
4666 /* determine filter and wrap modes */
4667 switch (view->texture->target) {
4668 case PIPE_TEXTURE_1D:
4669 dw_filter = (sampler->anisotropic) ?
4670 sampler->dw_filter_aniso : sampler->dw_filter;
4671 dw_wrap = sampler->dw_wrap_1d;
4672 break;
4673 case PIPE_TEXTURE_3D:
4674 /*
4675 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4676 *
4677 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4678 * surfaces of type SURFTYPE_3D."
4679 */
4680 dw_filter = sampler->dw_filter;
4681 dw_wrap = sampler->dw_wrap;
4682 break;
4683 case PIPE_TEXTURE_CUBE:
4684 dw_filter = (sampler->anisotropic) ?
4685 sampler->dw_filter_aniso : sampler->dw_filter;
4686 dw_wrap = sampler->dw_wrap_cube;
4687 break;
4688 default:
4689 dw_filter = (sampler->anisotropic) ?
4690 sampler->dw_filter_aniso : sampler->dw_filter;
4691 dw_wrap = sampler->dw_wrap;
4692 break;
4693 }
4694
4695 dw[0] = sampler->payload[0];
4696 dw[1] = sampler->payload[1];
4697 assert(!(border_color & 0x1f));
4698 dw[2] = border_color;
4699 dw[3] = sampler->payload[2];
4700
4701 dw[0] |= dw_filter;
4702
4703 if (dev->gen >= ILO_GEN(7)) {
4704 dw[3] |= dw_wrap;
4705 }
4706 else {
4707 /*
4708 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4709 *
4710 * "[DevSNB] Errata: Incorrect behavior is observed in cases
4711 * where the min and mag mode filters are different and
4712 * SurfMinLOD is nonzero. The determination of MagMode uses the
4713 * following equation instead of the one in the above
4714 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
4715 *
4716 * As a way to work around that, we set Base to
4717 * view->u.tex.first_level.
4718 */
4719 dw[0] |= view->u.tex.first_level << 22;
4720
4721 dw[1] |= dw_wrap;
4722 }
4723
4724 dw += 4;
4725 }
4726
4727 return state_offset;
4728 }
4729
4730 static uint32_t
4731 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
4732 const struct ilo_sampler_cso *sampler,
4733 struct ilo_cp *cp)
4734 {
4735 const int state_align = 32 / 4;
4736 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
4737 uint32_t state_offset, *dw;
4738
4739 ILO_GPE_VALID_GEN(dev, 6, 7);
4740
4741 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4742 state_len, state_align, &state_offset);
4743
4744 memcpy(dw, &sampler->payload[3], state_len * 4);
4745
4746 return state_offset;
4747 }
4748
4749 static uint32_t
4750 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
4751 int size, void **pcb,
4752 struct ilo_cp *cp)
4753 {
4754 /*
4755 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4756 * to 32 bytes, and their sizes are specified in 256-bit units.
4757 */
4758 const int state_align = 32 / 4;
4759 const int state_len = align(size, 32) / 4;
4760 uint32_t state_offset;
4761 char *buf;
4762
4763 ILO_GPE_VALID_GEN(dev, 6, 7);
4764
4765 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4766 state_len, state_align, &state_offset);
4767
4768 /* zero out the unused range */
4769 if (size < state_len * 4)
4770 memset(&buf[size], 0, state_len * 4 - size);
4771
4772 if (pcb)
4773 *pcb = buf;
4774
4775 return state_offset;
4776 }
4777
4778 static int
4779 gen6_estimate_command_size(const struct ilo_dev_info *dev,
4780 enum ilo_gpe_gen6_command cmd,
4781 int arg)
4782 {
4783 static const struct {
4784 int header;
4785 int body;
4786 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4787 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4788 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4789 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4790 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4791 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4792 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4793 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4794 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4795 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4796 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4797 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4798 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4799 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4800 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4801 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4802 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4803 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4804 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4805 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4806 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4807 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4808 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4809 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4810 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4811 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4812 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4813 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4814 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4815 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4816 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4817 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4818 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4819 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4820 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4821 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4822 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4823 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4824 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4825 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4826 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4827 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4828 };
4829 const int header = gen6_command_size_table[cmd].header;
4830 const int body = gen6_command_size_table[arg].body;
4831 const int count = arg;
4832
4833 ILO_GPE_VALID_GEN(dev, 6, 6);
4834 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4835
4836 return (likely(count)) ? header + body * count : 0;
4837 }
4838
4839 static int
4840 gen6_estimate_state_size(const struct ilo_dev_info *dev,
4841 enum ilo_gpe_gen6_state state,
4842 int arg)
4843 {
4844 static const struct {
4845 int alignment;
4846 int body;
4847 bool is_array;
4848 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4849 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4850 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4851 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4852 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4853 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4854 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4855 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4856 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4857 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4858 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4859 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4860 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4861 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4862 };
4863 const int alignment = gen6_state_size_table[state].alignment;
4864 const int body = gen6_state_size_table[state].body;
4865 const bool is_array = gen6_state_size_table[state].is_array;
4866 const int count = arg;
4867 int estimate;
4868
4869 ILO_GPE_VALID_GEN(dev, 6, 6);
4870 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4871
4872 if (likely(count)) {
4873 if (is_array) {
4874 estimate = (alignment - 1) + body * count;
4875 }
4876 else {
4877 estimate = (alignment - 1) + body;
4878 /* all states are aligned */
4879 if (count > 1)
4880 estimate += util_align_npot(body, alignment) * (count - 1);
4881 }
4882 }
4883 else {
4884 estimate = 0;
4885 }
4886
4887 return estimate;
4888 }
4889
4890 static const struct ilo_gpe_gen6 gen6_gpe = {
4891 .estimate_command_size = gen6_estimate_command_size,
4892 .estimate_state_size = gen6_estimate_state_size,
4893
4894 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4895 GEN6_SET(STATE_BASE_ADDRESS),
4896 GEN6_SET(STATE_SIP),
4897 GEN6_SET(3DSTATE_VF_STATISTICS),
4898 GEN6_SET(PIPELINE_SELECT),
4899 GEN6_SET(MEDIA_VFE_STATE),
4900 GEN6_SET(MEDIA_CURBE_LOAD),
4901 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4902 GEN6_SET(MEDIA_GATEWAY_STATE),
4903 GEN6_SET(MEDIA_STATE_FLUSH),
4904 GEN6_SET(MEDIA_OBJECT_WALKER),
4905 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4906 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4907 GEN6_SET(3DSTATE_URB),
4908 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4909 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4910 GEN6_SET(3DSTATE_INDEX_BUFFER),
4911 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4912 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4913 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4914 GEN6_SET(3DSTATE_VS),
4915 GEN6_SET(3DSTATE_GS),
4916 GEN6_SET(3DSTATE_CLIP),
4917 GEN6_SET(3DSTATE_SF),
4918 GEN6_SET(3DSTATE_WM),
4919 GEN6_SET(3DSTATE_CONSTANT_VS),
4920 GEN6_SET(3DSTATE_CONSTANT_GS),
4921 GEN6_SET(3DSTATE_CONSTANT_PS),
4922 GEN6_SET(3DSTATE_SAMPLE_MASK),
4923 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4924 GEN6_SET(3DSTATE_DEPTH_BUFFER),
4925 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
4926 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
4927 GEN6_SET(3DSTATE_LINE_STIPPLE),
4928 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
4929 GEN6_SET(3DSTATE_GS_SVB_INDEX),
4930 GEN6_SET(3DSTATE_MULTISAMPLE),
4931 GEN6_SET(3DSTATE_STENCIL_BUFFER),
4932 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
4933 GEN6_SET(3DSTATE_CLEAR_PARAMS),
4934 GEN6_SET(PIPE_CONTROL),
4935 GEN6_SET(3DPRIMITIVE),
4936 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
4937 GEN6_SET(SF_VIEWPORT),
4938 GEN6_SET(CLIP_VIEWPORT),
4939 GEN6_SET(CC_VIEWPORT),
4940 GEN6_SET(COLOR_CALC_STATE),
4941 GEN6_SET(BLEND_STATE),
4942 GEN6_SET(DEPTH_STENCIL_STATE),
4943 GEN6_SET(SCISSOR_RECT),
4944 GEN6_SET(BINDING_TABLE_STATE),
4945 GEN6_SET(SURFACE_STATE),
4946 GEN6_SET(so_SURFACE_STATE),
4947 GEN6_SET(SAMPLER_STATE),
4948 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
4949 GEN6_SET(push_constant_buffer),
4950 #undef GEN6_SET
4951 };
4952
4953 const struct ilo_gpe_gen6 *
4954 ilo_gpe_gen6_get(void)
4955 {
4956 return &gen6_gpe;
4957 }