ilo: add BLT-based blitting methods to ilo_blitter
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate winsys tiling to hardware tiling.
43 */
44 int
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
46 {
47 switch (tiling) {
48 case INTEL_TILING_NONE:
49 return 0;
50 case INTEL_TILING_X:
51 return BRW_SURFACE_TILED;
52 case INTEL_TILING_Y:
53 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
54 default:
55 assert(!"unknown tiling");
56 return 0;
57 }
58 }
59
60 /**
61 * Translate a pipe primitive type to the matching hardware primitive type.
62 */
63 int
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
65 {
66 static const int prim_mapping[PIPE_PRIM_MAX] = {
67 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
68 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
69 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
70 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
71 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
72 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
73 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
74 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
75 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
76 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
77 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
81 };
82
83 assert(prim_mapping[prim]);
84
85 return prim_mapping[prim];
86 }
87
88 /**
89 * Translate a pipe texture target to the matching hardware surface type.
90 */
91 int
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
93 {
94 switch (target) {
95 case PIPE_BUFFER:
96 return BRW_SURFACE_BUFFER;
97 case PIPE_TEXTURE_1D:
98 case PIPE_TEXTURE_1D_ARRAY:
99 return BRW_SURFACE_1D;
100 case PIPE_TEXTURE_2D:
101 case PIPE_TEXTURE_RECT:
102 case PIPE_TEXTURE_2D_ARRAY:
103 return BRW_SURFACE_2D;
104 case PIPE_TEXTURE_3D:
105 return BRW_SURFACE_3D;
106 case PIPE_TEXTURE_CUBE:
107 case PIPE_TEXTURE_CUBE_ARRAY:
108 return BRW_SURFACE_CUBE;
109 default:
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER;
112 }
113 }
114
115 /**
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
118 */
119 static int
120 gen6_translate_depth_format(enum pipe_format format)
121 {
122 switch (format) {
123 case PIPE_FORMAT_Z16_UNORM:
124 return BRW_DEPTHFORMAT_D16_UNORM;
125 case PIPE_FORMAT_Z32_FLOAT:
126 return BRW_DEPTHFORMAT_D32_FLOAT;
127 case PIPE_FORMAT_Z24X8_UNORM:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
133 default:
134 return -1;
135 }
136 }
137
138 /**
139 * Translate a pipe logicop to the matching hardware logicop.
140 */
141 static int
142 gen6_translate_pipe_logicop(unsigned logicop)
143 {
144 switch (logicop) {
145 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
146 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
147 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
148 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
149 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
150 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
151 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
152 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
153 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
154 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
155 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
156 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
157 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
158 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
159 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
160 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
161 default:
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR;
164 }
165 }
166
167 /**
168 * Translate a pipe blend function to the matching hardware blend function.
169 */
170 static int
171 gen6_translate_pipe_blend(unsigned blend)
172 {
173 switch (blend) {
174 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
175 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
176 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
177 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
178 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
179 default:
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD;
182 };
183 }
184
185 /**
186 * Translate a pipe blend factor to the matching hardware blend factor.
187 */
188 static int
189 gen6_translate_pipe_blendfactor(unsigned blendfactor)
190 {
191 switch (blendfactor) {
192 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
193 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
194 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
195 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
196 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
198 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
199 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
200 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
202 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
211 default:
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE;
214 };
215 }
216
217 /**
218 * Translate a pipe stencil op to the matching hardware stencil op.
219 */
220 static int
221 gen6_translate_pipe_stencil_op(unsigned stencil_op)
222 {
223 switch (stencil_op) {
224 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
225 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
226 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
227 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
228 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
229 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
230 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
231 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
232 default:
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP;
235 }
236 }
237
238 /**
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
240 */
241 static int
242 gen6_translate_tex_mipfilter(unsigned filter)
243 {
244 switch (filter) {
245 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
246 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
247 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
248 default:
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE;
251 }
252 }
253
254 /**
255 * Translate a pipe texture filter to the matching hardware mapfilter.
256 */
257 static int
258 gen6_translate_tex_filter(unsigned filter)
259 {
260 switch (filter) {
261 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
262 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
263 default:
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST;
266 }
267 }
268
269 /**
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
271 * wrapping mode.
272 */
273 static int
274 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
275 {
276 /* clamp to edge or border? */
277 if (wrap == PIPE_TEX_WRAP_CLAMP) {
278 wrap = (clamp_to_edge) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
280 }
281
282 switch (wrap) {
283 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
287 case PIPE_TEX_WRAP_CLAMP:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
291 default:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP;
294 }
295 }
296
297 /**
298 * Translate a pipe DSA test function to the matching hardware compare
299 * function.
300 */
301 static int
302 gen6_translate_dsa_func(unsigned func)
303 {
304 switch (func) {
305 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
306 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
307 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
308 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
309 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
310 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
311 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
312 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
313 default:
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER;
316 }
317 }
318
319 /**
320 * Translate a pipe shadow compare function to the matching hardware shadow
321 * function.
322 */
323 static int
324 gen6_translate_shadow_func(unsigned func)
325 {
326 /*
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
329 *
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
332 */
333 switch (func) {
334 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
335 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
336 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
337 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
338 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
339 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
340 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
341 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
342 default:
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER;
345 }
346 }
347
348 /**
349 * Translate an index size to the matching hardware index format.
350 */
351 static int
352 gen6_translate_index_size(int size)
353 {
354 switch (size) {
355 case 4: return BRW_INDEX_DWORD;
356 case 2: return BRW_INDEX_WORD;
357 case 1: return BRW_INDEX_BYTE;
358 default:
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE;
361 }
362 }
363
364 static void
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
366 struct intel_bo *general_state_bo,
367 struct intel_bo *surface_state_bo,
368 struct intel_bo *dynamic_state_bo,
369 struct intel_bo *indirect_object_bo,
370 struct intel_bo *instruction_bo,
371 uint32_t general_state_size,
372 uint32_t dynamic_state_size,
373 uint32_t indirect_object_size,
374 uint32_t instruction_size,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len = 10;
379
380 ILO_GPE_VALID_GEN(dev, 6, 7);
381
382 /* 4K-page aligned */
383 assert(((general_state_size | dynamic_state_size |
384 indirect_object_size | instruction_size) & 0xfff) == 0);
385
386 ilo_cp_begin(cp, cmd_len);
387 ilo_cp_write(cp, cmd | (cmd_len - 2));
388
389 ilo_cp_write_bo(cp, 1, general_state_bo,
390 INTEL_DOMAIN_RENDER,
391 0);
392 ilo_cp_write_bo(cp, 1, surface_state_bo,
393 INTEL_DOMAIN_SAMPLER,
394 0);
395 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
396 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
397 0);
398 ilo_cp_write_bo(cp, 1, indirect_object_bo,
399 0,
400 0);
401 ilo_cp_write_bo(cp, 1, instruction_bo,
402 INTEL_DOMAIN_INSTRUCTION,
403 0);
404
405 if (general_state_size) {
406 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
407 INTEL_DOMAIN_RENDER,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 1);
413 }
414
415 if (dynamic_state_size) {
416 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
417 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 0xfffff000 + 1);
423 }
424
425 if (indirect_object_size) {
426 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
427 0,
428 0);
429 }
430 else {
431 /* skip range check */
432 ilo_cp_write(cp, 0xfffff000 + 1);
433 }
434
435 if (instruction_size) {
436 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
437 INTEL_DOMAIN_INSTRUCTION,
438 0);
439 }
440 else {
441 /* skip range check */
442 ilo_cp_write(cp, 1);
443 }
444
445 ilo_cp_end(cp);
446 }
447
448 static void
449 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
450 uint32_t sip,
451 struct ilo_cp *cp)
452 {
453 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len = 2;
455
456 ILO_GPE_VALID_GEN(dev, 6, 7);
457
458 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
459 ilo_cp_write(cp, cmd);
460 ilo_cp_write(cp, sip);
461 ilo_cp_end(cp);
462 }
463
464 static void
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
466 bool enable,
467 struct ilo_cp *cp)
468 {
469 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len = 1;
471
472 ILO_GPE_VALID_GEN(dev, 6, 7);
473
474 ilo_cp_begin(cp, cmd_len);
475 ilo_cp_write(cp, cmd | enable);
476 ilo_cp_end(cp);
477 }
478
479 static void
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
481 int pipeline,
482 struct ilo_cp *cp)
483 {
484 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len = 1;
486
487 ILO_GPE_VALID_GEN(dev, 6, 7);
488
489 /* 3D or media */
490 assert(pipeline == 0x0 || pipeline == 0x1);
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | pipeline);
494 ilo_cp_end(cp);
495 }
496
497 static void
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
499 int max_threads, int num_urb_entries,
500 int urb_entry_size,
501 struct ilo_cp *cp)
502 {
503 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len = 8;
505 uint32_t dw2, dw4;
506
507 ILO_GPE_VALID_GEN(dev, 6, 6);
508
509 dw2 = (max_threads - 1) << 16 |
510 num_urb_entries << 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
513
514 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
516
517 ilo_cp_begin(cp, cmd_len);
518 ilo_cp_write(cp, cmd | (cmd_len - 2));
519 ilo_cp_write(cp, 0); /* scratch */
520 ilo_cp_write(cp, dw2);
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, dw4);
523 ilo_cp_write(cp, 0); /* scoreboard */
524 ilo_cp_write(cp, 0);
525 ilo_cp_write(cp, 0);
526 ilo_cp_end(cp);
527 }
528
529 static void
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
531 uint32_t buf, int size,
532 struct ilo_cp *cp)
533 {
534 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len = 4;
536
537 ILO_GPE_VALID_GEN(dev, 6, 6);
538
539 assert(buf % 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size = align(size, 32);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 ilo_cp_write(cp, size);
547 ilo_cp_write(cp, buf);
548 ilo_cp_end(cp);
549 }
550
551 static void
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
553 uint32_t offset, int num_ids,
554 struct ilo_cp *cp)
555 {
556 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len = 4;
558
559 ILO_GPE_VALID_GEN(dev, 6, 6);
560
561 assert(offset % 32 == 0);
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp, num_ids * 8 * 4);
568 ilo_cp_write(cp, offset);
569 ilo_cp_end(cp);
570 }
571
572 static void
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
574 int id, int byte, int thread_count,
575 struct ilo_cp *cp)
576 {
577 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len = 2;
579 uint32_t dw1;
580
581 ILO_GPE_VALID_GEN(dev, 6, 6);
582
583 dw1 = id << 16 |
584 byte << 8 |
585 thread_count;
586
587 ilo_cp_begin(cp, cmd_len);
588 ilo_cp_write(cp, cmd | (cmd_len - 2));
589 ilo_cp_write(cp, dw1);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
595 int thread_count_water_mark,
596 int barrier_mask,
597 struct ilo_cp *cp)
598 {
599 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len = 2;
601 uint32_t dw1;
602
603 ILO_GPE_VALID_GEN(dev, 6, 6);
604
605 dw1 = thread_count_water_mark << 16 |
606 barrier_mask;
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2));
610 ilo_cp_write(cp, dw1);
611 ilo_cp_end(cp);
612 }
613
614 static void
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
616 struct ilo_cp *cp)
617 {
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
619 }
620
621 static void
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_binding_table,
624 uint32_t gs_binding_table,
625 uint32_t ps_binding_table,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS |
636 GEN6_BINDING_TABLE_MODIFY_GS |
637 GEN6_BINDING_TABLE_MODIFY_PS);
638 ilo_cp_write(cp, vs_binding_table);
639 ilo_cp_write(cp, gs_binding_table);
640 ilo_cp_write(cp, ps_binding_table);
641 ilo_cp_end(cp);
642 }
643
644 static void
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
646 uint32_t vs_sampler_state,
647 uint32_t gs_sampler_state,
648 uint32_t ps_sampler_state,
649 struct ilo_cp *cp)
650 {
651 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len = 4;
653
654 ILO_GPE_VALID_GEN(dev, 6, 6);
655
656 ilo_cp_begin(cp, cmd_len);
657 ilo_cp_write(cp, cmd | (cmd_len - 2) |
658 VS_SAMPLER_STATE_CHANGE |
659 GS_SAMPLER_STATE_CHANGE |
660 PS_SAMPLER_STATE_CHANGE);
661 ilo_cp_write(cp, vs_sampler_state);
662 ilo_cp_write(cp, gs_sampler_state);
663 ilo_cp_write(cp, ps_sampler_state);
664 ilo_cp_end(cp);
665 }
666
667 static void
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
669 int vs_total_size, int gs_total_size,
670 int vs_entry_size, int gs_entry_size,
671 struct ilo_cp *cp)
672 {
673 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len = 3;
675 const int row_size = 128; /* 1024 bits */
676 int vs_alloc_size, gs_alloc_size;
677 int vs_num_entries, gs_num_entries;
678
679 ILO_GPE_VALID_GEN(dev, 6, 6);
680
681 /* in 1024-bit URB rows */
682 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
683 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
684
685 /* the valid range is [1, 5] */
686 if (!vs_alloc_size)
687 vs_alloc_size = 1;
688 if (!gs_alloc_size)
689 gs_alloc_size = 1;
690 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
691
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
694 if (vs_num_entries > 256)
695 vs_num_entries = 256;
696 assert(vs_num_entries >= 24);
697
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
700 if (gs_num_entries > 256)
701 gs_num_entries = 256;
702
703 ilo_cp_begin(cp, cmd_len);
704 ilo_cp_write(cp, cmd | (cmd_len - 2));
705 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
706 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
707 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
708 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
709 ilo_cp_end(cp);
710 }
711
712 static void
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
714 const struct pipe_vertex_buffer *vbuffers,
715 uint64_t vbuffer_mask,
716 const struct ilo_ve_state *ve,
717 struct ilo_cp *cp)
718 {
719 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
720 uint8_t cmd_len;
721 unsigned hw_idx;
722
723 ILO_GPE_VALID_GEN(dev, 6, 7);
724
725 /*
726 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
727 *
728 * "From 1 to 33 VBs can be specified..."
729 */
730 assert(vbuffer_mask <= (1UL << 33));
731
732 if (!vbuffer_mask)
733 return;
734
735 cmd_len = 1;
736
737 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
738 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
739
740 if (vbuffer_mask & (1 << pipe_idx))
741 cmd_len += 4;
742 }
743
744 ilo_cp_begin(cp, cmd_len);
745 ilo_cp_write(cp, cmd | (cmd_len - 2));
746
747 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
748 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
749 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
750 const struct pipe_vertex_buffer *vb = &vbuffers[pipe_idx];
751 uint32_t dw;
752
753 if (!(vbuffer_mask & (1 << pipe_idx)))
754 continue;
755
756 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
757
758 if (instance_divisor)
759 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
760 else
761 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
762
763 if (dev->gen >= ILO_GEN(7))
764 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
765
766 /* use null vb if there is no buffer or the stride is out of range */
767 if (vb->buffer && vb->stride <= 2048) {
768 const struct ilo_buffer *buf = ilo_buffer(vb->buffer);
769 const uint32_t start_offset = vb->buffer_offset;
770 /*
771 * As noted in ilo_translate_format(), we treat some 3-component
772 * formats as 4-component formats to work around hardware
773 * limitations. Imagine the case where the vertex buffer holds a
774 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
775 * The hardware would not be able to fetch it because the vertex
776 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
777 * and that takes at least 8 bytes.
778 *
779 * For the workaround to work, we query the physical size, which is
780 * page aligned, to calculate end_offset so that the last vertex has
781 * a better chance to be fetched.
782 */
783 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
784
785 dw |= vb->stride << BRW_VB0_PITCH_SHIFT;
786
787 ilo_cp_write(cp, dw);
788 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
789 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
790 ilo_cp_write(cp, instance_divisor);
791 }
792 else {
793 dw |= 1 << 13;
794
795 ilo_cp_write(cp, dw);
796 ilo_cp_write(cp, 0);
797 ilo_cp_write(cp, 0);
798 ilo_cp_write(cp, instance_divisor);
799 }
800 }
801
802 ilo_cp_end(cp);
803 }
804
805 static void
806 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
807 struct ilo_ve_cso *cso)
808 {
809 int format;
810
811 ILO_GPE_VALID_GEN(dev, 6, 7);
812
813 /*
814 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
815 *
816 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
817 * valid VERTEX_ELEMENT structure.
818 *
819 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
820 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
821 *
822 * - The Source Element Format must be set to the UINT format.
823 *
824 * - [DevSNB]: Edge Flags are not supported for QUADLIST
825 * primitives. Software may elect to convert QUADLIST primitives
826 * to some set of corresponding edge-flag-supported primitive
827 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
828 */
829
830 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
831 cso->payload[1] =
832 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
833 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
834 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
835 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
836
837 /*
838 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
839 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
840 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
841 *
842 * Since all the hardware cares about is whether the flags are zero or not,
843 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
844 */
845 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
846 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
847 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
848 BRW_SURFACEFORMAT_R32_FLOAT - 1);
849
850 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
851 }
852 else {
853 assert(format == BRW_SURFACEFORMAT_R8_UINT);
854 }
855 }
856
857 static void
858 ve_init_cso_with_components(const struct ilo_dev_info *dev,
859 int comp0, int comp1, int comp2, int comp3,
860 struct ilo_ve_cso *cso)
861 {
862 ILO_GPE_VALID_GEN(dev, 6, 7);
863
864 STATIC_ASSERT(Elements(cso->payload) >= 2);
865 cso->payload[0] = GEN6_VE0_VALID;
866 cso->payload[1] =
867 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
868 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
869 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
870 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
871 }
872
873 static void
874 ve_init_cso(const struct ilo_dev_info *dev,
875 const struct pipe_vertex_element *state,
876 unsigned vb_index,
877 struct ilo_ve_cso *cso)
878 {
879 int comp[4] = {
880 BRW_VE1_COMPONENT_STORE_SRC,
881 BRW_VE1_COMPONENT_STORE_SRC,
882 BRW_VE1_COMPONENT_STORE_SRC,
883 BRW_VE1_COMPONENT_STORE_SRC,
884 };
885 int format;
886
887 ILO_GPE_VALID_GEN(dev, 6, 7);
888
889 switch (util_format_get_nr_components(state->src_format)) {
890 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
891 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
892 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
893 BRW_VE1_COMPONENT_STORE_1_INT :
894 BRW_VE1_COMPONENT_STORE_1_FLT;
895 }
896
897 format = ilo_translate_vertex_format(state->src_format);
898
899 STATIC_ASSERT(Elements(cso->payload) >= 2);
900 cso->payload[0] =
901 vb_index << GEN6_VE0_INDEX_SHIFT |
902 GEN6_VE0_VALID |
903 format << BRW_VE0_FORMAT_SHIFT |
904 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
905
906 cso->payload[1] =
907 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
908 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
909 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
910 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
911 }
912
913 void
914 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
915 unsigned num_states,
916 const struct pipe_vertex_element *states,
917 struct ilo_ve_state *ve)
918 {
919 unsigned i;
920
921 ILO_GPE_VALID_GEN(dev, 6, 7);
922
923 ve->count = num_states;
924 ve->vb_count = 0;
925
926 for (i = 0; i < num_states; i++) {
927 const unsigned pipe_idx = states[i].vertex_buffer_index;
928 const unsigned instance_divisor = states[i].instance_divisor;
929 unsigned hw_idx;
930
931 /*
932 * map the pipe vb to the hardware vb, which has a fixed instance
933 * divisor
934 */
935 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
936 if (ve->vb_mapping[hw_idx] == pipe_idx &&
937 ve->instance_divisors[hw_idx] == instance_divisor)
938 break;
939 }
940
941 /* create one if there is no matching hardware vb */
942 if (hw_idx >= ve->vb_count) {
943 hw_idx = ve->vb_count++;
944
945 ve->vb_mapping[hw_idx] = pipe_idx;
946 ve->instance_divisors[hw_idx] = instance_divisor;
947 }
948
949 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
950 }
951 }
952
953 static void
954 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
955 const struct ilo_ve_state *ve,
956 bool last_velement_edgeflag,
957 bool prepend_generated_ids,
958 struct ilo_cp *cp)
959 {
960 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
961 uint8_t cmd_len;
962 unsigned i;
963
964 ILO_GPE_VALID_GEN(dev, 6, 7);
965
966 /*
967 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
968 *
969 * "Up to 34 (DevSNB+) vertex elements are supported."
970 */
971 assert(ve->count + prepend_generated_ids <= 34);
972
973 if (!ve->count && !prepend_generated_ids) {
974 struct ilo_ve_cso dummy;
975
976 ve_init_cso_with_components(dev,
977 BRW_VE1_COMPONENT_STORE_0,
978 BRW_VE1_COMPONENT_STORE_0,
979 BRW_VE1_COMPONENT_STORE_0,
980 BRW_VE1_COMPONENT_STORE_1_FLT,
981 &dummy);
982
983 cmd_len = 3;
984 ilo_cp_begin(cp, cmd_len);
985 ilo_cp_write(cp, cmd | (cmd_len - 2));
986 ilo_cp_write_multi(cp, dummy.payload, 2);
987 ilo_cp_end(cp);
988
989 return;
990 }
991
992 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
993
994 ilo_cp_begin(cp, cmd_len);
995 ilo_cp_write(cp, cmd | (cmd_len - 2));
996
997 if (prepend_generated_ids) {
998 struct ilo_ve_cso gen_ids;
999
1000 ve_init_cso_with_components(dev,
1001 BRW_VE1_COMPONENT_STORE_VID,
1002 BRW_VE1_COMPONENT_STORE_IID,
1003 BRW_VE1_COMPONENT_NOSTORE,
1004 BRW_VE1_COMPONENT_NOSTORE,
1005 &gen_ids);
1006
1007 ilo_cp_write_multi(cp, gen_ids.payload, 2);
1008 }
1009
1010 if (last_velement_edgeflag) {
1011 struct ilo_ve_cso edgeflag;
1012
1013 for (i = 0; i < ve->count - 1; i++)
1014 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1015
1016 edgeflag = ve->cso[i];
1017 ve_set_cso_edgeflag(dev, &edgeflag);
1018 ilo_cp_write_multi(cp, edgeflag.payload, 2);
1019 }
1020 else {
1021 for (i = 0; i < ve->count; i++)
1022 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
1023 }
1024
1025 ilo_cp_end(cp);
1026 }
1027
1028 static void
1029 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
1030 const struct pipe_index_buffer *ib,
1031 bool enable_cut_index,
1032 struct ilo_cp *cp)
1033 {
1034 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
1035 const uint8_t cmd_len = 3;
1036 const struct ilo_buffer *buf = ilo_buffer(ib->buffer);
1037 uint32_t start_offset, end_offset;
1038 int format;
1039
1040 ILO_GPE_VALID_GEN(dev, 6, 7);
1041
1042 if (!buf)
1043 return;
1044
1045 format = gen6_translate_index_size(ib->index_size);
1046
1047 start_offset = ib->offset;
1048 /* start_offset must be aligned to index size */
1049 if (start_offset % ib->index_size) {
1050 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
1051 assert(!"unaligned index buffer offset");
1052 start_offset -= start_offset % ib->index_size;
1053 }
1054
1055 /* end_offset must also be aligned */
1056 end_offset = buf->bo_size;
1057 end_offset -= (end_offset % ib->index_size);
1058 /* it is inclusive */
1059 end_offset -= 1;
1060
1061 ilo_cp_begin(cp, cmd_len);
1062 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1063 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
1064 format << 8);
1065 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1066 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
1067 ilo_cp_end(cp);
1068 }
1069
1070 static void
1071 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
1072 uint32_t clip_viewport,
1073 uint32_t sf_viewport,
1074 uint32_t cc_viewport,
1075 struct ilo_cp *cp)
1076 {
1077 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
1078 const uint8_t cmd_len = 4;
1079
1080 ILO_GPE_VALID_GEN(dev, 6, 6);
1081
1082 ilo_cp_begin(cp, cmd_len);
1083 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1084 GEN6_CLIP_VIEWPORT_MODIFY |
1085 GEN6_SF_VIEWPORT_MODIFY |
1086 GEN6_CC_VIEWPORT_MODIFY);
1087 ilo_cp_write(cp, clip_viewport);
1088 ilo_cp_write(cp, sf_viewport);
1089 ilo_cp_write(cp, cc_viewport);
1090 ilo_cp_end(cp);
1091 }
1092
1093 static void
1094 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
1095 uint32_t blend_state,
1096 uint32_t depth_stencil_state,
1097 uint32_t color_calc_state,
1098 struct ilo_cp *cp)
1099 {
1100 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1101 const uint8_t cmd_len = 4;
1102
1103 ILO_GPE_VALID_GEN(dev, 6, 6);
1104
1105 ilo_cp_begin(cp, cmd_len);
1106 ilo_cp_write(cp, cmd | (cmd_len - 2));
1107 ilo_cp_write(cp, blend_state | 1);
1108 ilo_cp_write(cp, depth_stencil_state | 1);
1109 ilo_cp_write(cp, color_calc_state | 1);
1110 ilo_cp_end(cp);
1111 }
1112
1113 static void
1114 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1115 uint32_t scissor_rect,
1116 struct ilo_cp *cp)
1117 {
1118 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1119 const uint8_t cmd_len = 2;
1120
1121 ILO_GPE_VALID_GEN(dev, 6, 7);
1122
1123 ilo_cp_begin(cp, cmd_len);
1124 ilo_cp_write(cp, cmd | (cmd_len - 2));
1125 ilo_cp_write(cp, scissor_rect);
1126 ilo_cp_end(cp);
1127 }
1128
1129 static void
1130 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1131 const struct ilo_shader *vs,
1132 int num_samplers,
1133 struct ilo_cp *cp)
1134 {
1135 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1136 const uint8_t cmd_len = 6;
1137 uint32_t dw2, dw4, dw5;
1138 int vue_read_len, max_threads;
1139
1140 ILO_GPE_VALID_GEN(dev, 6, 7);
1141
1142 if (!vs) {
1143 ilo_cp_begin(cp, cmd_len);
1144 ilo_cp_write(cp, cmd | (cmd_len - 2));
1145 ilo_cp_write(cp, 0);
1146 ilo_cp_write(cp, 0);
1147 ilo_cp_write(cp, 0);
1148 ilo_cp_write(cp, 0);
1149 ilo_cp_write(cp, 0);
1150 ilo_cp_end(cp);
1151 return;
1152 }
1153
1154 /*
1155 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1156 *
1157 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1158 * 128-bit vertex elements to be passed into the payload for each
1159 * vertex."
1160 *
1161 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1162 * data to be read and passed to the thread."
1163 */
1164 vue_read_len = (vs->in.count + 1) / 2;
1165 if (!vue_read_len)
1166 vue_read_len = 1;
1167
1168 switch (dev->gen) {
1169 case ILO_GEN(6):
1170 /*
1171 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1172 *
1173 * "Device # of EUs #Threads/EU
1174 * SNB GT2 12 5
1175 * SNB GT1 6 4"
1176 */
1177 max_threads = (dev->gt == 2) ? 60 : 24;
1178 break;
1179 case ILO_GEN(7):
1180 /*
1181 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1182 *
1183 * "Device # of EUs #Threads/EU
1184 * Ivy Bridge (GT2) 16 8
1185 * Ivy Bridge (GT1) 6 6"
1186 */
1187 max_threads = (dev->gt == 2) ? 128 : 36;
1188 break;
1189 case ILO_GEN(7.5):
1190 /* see brwCreateContext() */
1191 max_threads = (dev->gt == 2) ? 280 : 70;
1192 break;
1193 default:
1194 max_threads = 1;
1195 break;
1196 }
1197
1198 dw2 = ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1199 if (false)
1200 dw2 |= GEN6_VS_FLOATING_POINT_MODE_ALT;
1201
1202 dw4 = vs->in.start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
1203 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
1204 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
1205
1206 dw5 = GEN6_VS_STATISTICS_ENABLE |
1207 GEN6_VS_ENABLE;
1208
1209 if (dev->gen >= ILO_GEN(7.5))
1210 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
1211 else
1212 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
1213
1214 ilo_cp_begin(cp, cmd_len);
1215 ilo_cp_write(cp, cmd | (cmd_len - 2));
1216 ilo_cp_write(cp, vs->cache_offset);
1217 ilo_cp_write(cp, dw2);
1218 ilo_cp_write(cp, 0); /* scratch */
1219 ilo_cp_write(cp, dw4);
1220 ilo_cp_write(cp, dw5);
1221 ilo_cp_end(cp);
1222 }
1223
1224 static void
1225 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1226 const struct ilo_shader *gs,
1227 const struct ilo_shader *vs,
1228 uint32_t vs_offset,
1229 struct ilo_cp *cp)
1230 {
1231 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1232 const uint8_t cmd_len = 7;
1233 uint32_t dw1, dw2, dw4, dw5, dw6;
1234 int i;
1235
1236 ILO_GPE_VALID_GEN(dev, 6, 6);
1237
1238 if (!gs && (!vs || !vs->stream_output)) {
1239 dw1 = 0;
1240 dw2 = 0;
1241 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1242 dw5 = GEN6_GS_STATISTICS_ENABLE;
1243 dw6 = 0;
1244 }
1245 else {
1246 int max_threads, vue_read_len;
1247
1248 /*
1249 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1250 *
1251 * "Maximum Number of Threads valid range is [0,27] when Rendering
1252 * Enabled bit is set."
1253 *
1254 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1255 *
1256 * "Programming Note: If the GS stage is enabled, software must
1257 * always allocate at least one GS URB Entry. This is true even if
1258 * the GS thread never needs to output vertices to the pipeline,
1259 * e.g., when only performing stream output. This is an artifact of
1260 * the need to pass the GS thread an initial destination URB
1261 * handle."
1262 *
1263 * As such, we always enable rendering, and limit the number of threads.
1264 */
1265 if (dev->gt == 2) {
1266 /* maximum is 60, but limited to 28 */
1267 max_threads = 28;
1268 }
1269 else {
1270 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1271 max_threads = 21;
1272 }
1273
1274 if (max_threads > 28)
1275 max_threads = 28;
1276
1277 dw2 = GEN6_GS_SPF_MODE;
1278
1279 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
1280 GEN6_GS_STATISTICS_ENABLE |
1281 GEN6_GS_SO_STATISTICS_ENABLE |
1282 GEN6_GS_RENDERING_ENABLE;
1283
1284 /*
1285 * we cannot make use of GEN6_GS_REORDER because it will reorder
1286 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1287 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1288 * (2N+2, 2N+1, 2N+3)).
1289 */
1290 dw6 = GEN6_GS_ENABLE;
1291
1292 if (gs) {
1293 /* VS ouputs must match GS inputs */
1294 assert(gs->in.count == vs->out.count);
1295 for (i = 0; i < gs->in.count; i++) {
1296 assert(gs->in.semantic_names[i] == vs->out.semantic_names[i]);
1297 assert(gs->in.semantic_indices[i] == vs->out.semantic_indices[i]);
1298 }
1299
1300 /*
1301 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1302 *
1303 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1304 * Length) to 0 indicating no Vertex URB data to be read and
1305 * passed to the thread."
1306 */
1307 vue_read_len = (gs->in.count + 1) / 2;
1308 if (!vue_read_len)
1309 vue_read_len = 1;
1310
1311 dw1 = gs->cache_offset;
1312 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1313 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1314 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1315
1316 if (gs->in.discard_adj)
1317 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1318
1319 if (gs->stream_output) {
1320 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1321 if (gs->svbi_post_inc) {
1322 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1323 gs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1324 }
1325 }
1326 }
1327 else {
1328 /*
1329 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1330 *
1331 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1332 * Length) to 0 indicating no Vertex URB data to be read and
1333 * passed to the thread."
1334 */
1335 vue_read_len = (vs->out.count + 1) / 2;
1336 if (!vue_read_len)
1337 vue_read_len = 1;
1338
1339 dw1 = vs_offset;
1340 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
1341 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
1342 vs->gs_start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
1343
1344 if (vs->in.discard_adj)
1345 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
1346
1347 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
1348 if (vs->svbi_post_inc) {
1349 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
1350 vs->svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
1351 }
1352 }
1353 }
1354
1355 ilo_cp_begin(cp, cmd_len);
1356 ilo_cp_write(cp, cmd | (cmd_len - 2));
1357 ilo_cp_write(cp, dw1);
1358 ilo_cp_write(cp, dw2);
1359 ilo_cp_write(cp, 0);
1360 ilo_cp_write(cp, dw4);
1361 ilo_cp_write(cp, dw5);
1362 ilo_cp_write(cp, dw6);
1363 ilo_cp_end(cp);
1364 }
1365
1366 void
1367 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
1368 const struct pipe_rasterizer_state *state,
1369 struct ilo_rasterizer_clip *clip)
1370 {
1371 uint32_t dw1, dw2, dw3;
1372
1373 ILO_GPE_VALID_GEN(dev, 6, 7);
1374
1375 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
1376
1377 if (dev->gen >= ILO_GEN(7)) {
1378 /*
1379 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1380 *
1381 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1382 * enabled only for the cases where the incoming primitive topology
1383 * into the clipper guaranteed to be Trilist."
1384 *
1385 * What does this mean?
1386 */
1387 dw1 |= 0 << 19 |
1388 GEN7_CLIP_EARLY_CULL;
1389
1390 if (state->front_ccw)
1391 dw1 |= GEN7_CLIP_WINDING_CCW;
1392
1393 switch (state->cull_face) {
1394 case PIPE_FACE_NONE:
1395 dw1 |= GEN7_CLIP_CULLMODE_NONE;
1396 break;
1397 case PIPE_FACE_FRONT:
1398 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
1399 break;
1400 case PIPE_FACE_BACK:
1401 dw1 |= GEN7_CLIP_CULLMODE_BACK;
1402 break;
1403 case PIPE_FACE_FRONT_AND_BACK:
1404 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
1405 break;
1406 }
1407 }
1408
1409 dw2 = GEN6_CLIP_ENABLE |
1410 GEN6_CLIP_XY_TEST |
1411 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
1412 GEN6_CLIP_MODE_NORMAL;
1413
1414 if (state->clip_halfz)
1415 dw2 |= GEN6_CLIP_API_D3D;
1416 else
1417 dw2 |= GEN6_CLIP_API_OGL;
1418
1419 if (state->depth_clip)
1420 dw2 |= GEN6_CLIP_Z_TEST;
1421
1422 if (state->flatshade_first) {
1423 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1424 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1425 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1426 }
1427 else {
1428 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
1429 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
1430 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
1431 }
1432
1433 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
1434 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
1435
1436 clip->payload[0] = dw1;
1437 clip->payload[1] = dw2;
1438 clip->payload[2] = dw3;
1439
1440 clip->can_enable_guardband = true;
1441
1442 /*
1443 * There are several reasons that guard band test should be disabled
1444 *
1445 * - GL wide points (to avoid partially visibie object)
1446 * - GL wide or AA lines (to avoid partially visibie object)
1447 */
1448 if (state->point_size_per_vertex || state->point_size > 1.0f)
1449 clip->can_enable_guardband = false;
1450 if (state->line_smooth || state->line_width > 1.0f)
1451 clip->can_enable_guardband = false;
1452 }
1453
1454 static void
1455 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1456 const struct ilo_rasterizer_state *rasterizer,
1457 bool has_linear_interp,
1458 bool enable_guardband,
1459 int num_viewports,
1460 struct ilo_cp *cp)
1461 {
1462 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1463 const uint8_t cmd_len = 4;
1464 uint32_t dw1, dw2, dw3;
1465
1466 if (rasterizer) {
1467 dw1 = rasterizer->clip.payload[0];
1468 dw2 = rasterizer->clip.payload[1];
1469 dw3 = rasterizer->clip.payload[2];
1470
1471 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1472 dw2 |= GEN6_CLIP_GB_TEST;
1473
1474 if (has_linear_interp)
1475 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1476
1477 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1478 (num_viewports - 1);
1479 }
1480 else {
1481 dw1 = 0;
1482 dw2 = 0;
1483 dw3 = 0;
1484 }
1485
1486 ilo_cp_begin(cp, cmd_len);
1487 ilo_cp_write(cp, cmd | (cmd_len - 2));
1488 ilo_cp_write(cp, dw1);
1489 ilo_cp_write(cp, dw2);
1490 ilo_cp_write(cp, dw3);
1491 ilo_cp_end(cp);
1492 }
1493
1494 void
1495 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
1496 const struct pipe_rasterizer_state *state,
1497 struct ilo_rasterizer_sf *sf)
1498 {
1499 float offset_const, offset_scale, offset_clamp;
1500 int line_width, point_width;
1501 uint32_t dw1, dw2, dw3;
1502
1503 ILO_GPE_VALID_GEN(dev, 6, 7);
1504
1505 /*
1506 * Scale the constant term. The minimum representable value used by the HW
1507 * is not large enouch to be the minimum resolvable difference.
1508 */
1509 offset_const = state->offset_units * 2.0f;
1510
1511 offset_scale = state->offset_scale;
1512 offset_clamp = state->offset_clamp;
1513
1514 /*
1515 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1516 *
1517 * "This bit (Statistics Enable) should be set whenever clipping is
1518 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1519 * should be cleared if clipping is disabled or Statistics Enable in
1520 * CLIP_STATE is clear."
1521 */
1522 dw1 = GEN6_SF_STATISTICS_ENABLE |
1523 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
1524
1525 /* XXX GEN6 path seems to work fine for GEN7 */
1526 if (false && dev->gen >= ILO_GEN(7)) {
1527 /*
1528 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1529 *
1530 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1531 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1532 * Depth Offset Enable Point) should be set whenever non zero depth
1533 * bias (Slope, Bias) values are used. Setting this bit may have
1534 * some degradation of performance for some workloads."
1535 */
1536 if (state->offset_tri || state->offset_line || state->offset_point) {
1537 /* XXX need to scale offset_const according to the depth format */
1538 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
1539
1540 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
1541 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
1542 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1543 }
1544 else {
1545 offset_const = 0.0f;
1546 offset_scale = 0.0f;
1547 offset_clamp = 0.0f;
1548 }
1549 }
1550 else {
1551 if (state->offset_tri)
1552 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
1553 if (state->offset_line)
1554 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
1555 if (state->offset_point)
1556 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
1557 }
1558
1559 switch (state->fill_front) {
1560 case PIPE_POLYGON_MODE_FILL:
1561 dw1 |= GEN6_SF_FRONT_SOLID;
1562 break;
1563 case PIPE_POLYGON_MODE_LINE:
1564 dw1 |= GEN6_SF_FRONT_WIREFRAME;
1565 break;
1566 case PIPE_POLYGON_MODE_POINT:
1567 dw1 |= GEN6_SF_FRONT_POINT;
1568 break;
1569 }
1570
1571 switch (state->fill_back) {
1572 case PIPE_POLYGON_MODE_FILL:
1573 dw1 |= GEN6_SF_BACK_SOLID;
1574 break;
1575 case PIPE_POLYGON_MODE_LINE:
1576 dw1 |= GEN6_SF_BACK_WIREFRAME;
1577 break;
1578 case PIPE_POLYGON_MODE_POINT:
1579 dw1 |= GEN6_SF_BACK_POINT;
1580 break;
1581 }
1582
1583 if (state->front_ccw)
1584 dw1 |= GEN6_SF_WINDING_CCW;
1585
1586 dw2 = 0;
1587
1588 if (state->line_smooth) {
1589 /*
1590 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1591 *
1592 * "This field (Anti-aliasing Enable) must be disabled if any of the
1593 * render targets have integer (UINT or SINT) surface format."
1594 *
1595 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1596 *
1597 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1598 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1599 *
1600 * TODO We do not check those yet.
1601 */
1602 dw2 |= GEN6_SF_LINE_AA_ENABLE |
1603 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
1604 }
1605
1606 switch (state->cull_face) {
1607 case PIPE_FACE_NONE:
1608 dw2 |= GEN6_SF_CULL_NONE;
1609 break;
1610 case PIPE_FACE_FRONT:
1611 dw2 |= GEN6_SF_CULL_FRONT;
1612 break;
1613 case PIPE_FACE_BACK:
1614 dw2 |= GEN6_SF_CULL_BACK;
1615 break;
1616 case PIPE_FACE_FRONT_AND_BACK:
1617 dw2 |= GEN6_SF_CULL_BOTH;
1618 break;
1619 }
1620
1621 /*
1622 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1623 * pixels in the minor direction. We have to make the lines slightly
1624 * thicker, 0.5 pixel on both sides, so that they intersect that many
1625 * pixels are considered into the lines.
1626 *
1627 * Line width is in U3.7.
1628 */
1629 line_width = (int) ((state->line_width +
1630 (float) state->line_smooth) * 128.0f + 0.5f);
1631 line_width = CLAMP(line_width, 0, 1023);
1632
1633 if (line_width == 128 && !state->line_smooth) {
1634 /* use GIQ rules */
1635 line_width = 0;
1636 }
1637
1638 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1639
1640 if (state->scissor)
1641 dw2 |= GEN6_SF_SCISSOR_ENABLE;
1642
1643 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
1644 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
1645
1646 if (state->line_last_pixel)
1647 dw3 |= 1 << 31;
1648
1649 if (state->flatshade_first) {
1650 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
1651 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
1652 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1653 }
1654 else {
1655 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
1656 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
1657 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
1658 }
1659
1660 if (!state->point_size_per_vertex)
1661 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
1662
1663 /* in U8.3 */
1664 point_width = (int) (state->point_size * 8.0f + 0.5f);
1665 point_width = CLAMP(point_width, 1, 2047);
1666
1667 dw3 |= point_width;
1668
1669 STATIC_ASSERT(Elements(sf->payload) >= 6);
1670 sf->payload[0] = dw1;
1671 sf->payload[1] = dw2;
1672 sf->payload[2] = dw3;
1673 sf->payload[3] = fui(offset_const);
1674 sf->payload[4] = fui(offset_scale);
1675 sf->payload[5] = fui(offset_clamp);
1676
1677 if (state->multisample) {
1678 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
1679
1680 /*
1681 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1682 *
1683 * "Software must not program a value of 0.0 when running in
1684 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
1685 * when multisampling rasterization is enabled."
1686 */
1687 if (!line_width) {
1688 line_width = 128; /* 1.0f */
1689
1690 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
1691 }
1692 }
1693 else {
1694 sf->dw_msaa = 0;
1695 }
1696 }
1697
1698 /**
1699 * Fill in DW2 to DW7 of 3DSTATE_SF.
1700 */
1701 void
1702 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
1703 const struct ilo_rasterizer_sf *sf,
1704 int num_samples,
1705 enum pipe_format depth_format,
1706 uint32_t *payload, unsigned payload_len)
1707 {
1708 assert(payload_len == Elements(sf->payload));
1709
1710 if (sf) {
1711 memcpy(payload, sf->payload, sizeof(sf->payload));
1712
1713 if (num_samples > 1)
1714 payload[1] |= sf->dw_msaa;
1715
1716 if (dev->gen >= ILO_GEN(7)) {
1717 int format;
1718
1719 /* separate stencil */
1720 switch (depth_format) {
1721 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1722 depth_format = PIPE_FORMAT_Z24X8_UNORM;
1723 break;
1724 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1725 depth_format = PIPE_FORMAT_Z32_FLOAT;;
1726 break;
1727 case PIPE_FORMAT_S8_UINT:
1728 depth_format = PIPE_FORMAT_NONE;
1729 break;
1730 default:
1731 break;
1732 }
1733
1734 format = gen6_translate_depth_format(depth_format);
1735 /* FLOAT surface is assumed when there is no depth buffer */
1736 if (format < 0)
1737 format = BRW_DEPTHFORMAT_D32_FLOAT;
1738
1739 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
1740 }
1741 }
1742 else {
1743 payload[0] = 0;
1744 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
1745 payload[2] = 0;
1746 payload[3] = 0;
1747 payload[4] = 0;
1748 payload[5] = 0;
1749 }
1750 }
1751
1752 /**
1753 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1754 */
1755 void
1756 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
1757 const struct pipe_rasterizer_state *rasterizer,
1758 const struct ilo_shader *fs,
1759 const struct ilo_shader *last_sh,
1760 uint32_t *dw, int num_dwords)
1761 {
1762 uint32_t point_sprite_enable, const_interp_enable;
1763 uint16_t attr_ctrl[PIPE_MAX_SHADER_INPUTS];
1764 int vue_offset, vue_len;
1765 int dst, max_src, i;
1766
1767 ILO_GPE_VALID_GEN(dev, 6, 7);
1768 assert(num_dwords == 13);
1769
1770 if (!fs) {
1771 if (dev->gen >= ILO_GEN(7))
1772 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
1773 else
1774 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
1775
1776 for (i = 1; i < num_dwords; i++)
1777 dw[i] = 0;
1778
1779 return;
1780 }
1781
1782 if (last_sh) {
1783 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1784 assert(last_sh->out.semantic_names[0] == TGSI_SEMANTIC_PSIZE);
1785 assert(last_sh->out.semantic_names[1] == TGSI_SEMANTIC_POSITION);
1786 vue_offset = 2;
1787 vue_len = last_sh->out.count - vue_offset;
1788 }
1789 else {
1790 vue_offset = 0;
1791 vue_len = fs->in.count;
1792 }
1793
1794 point_sprite_enable = 0;
1795 const_interp_enable = 0;
1796 max_src = (last_sh) ? 0 : fs->in.count - 1;
1797
1798 for (dst = 0; dst < fs->in.count; dst++) {
1799 const int semantic = fs->in.semantic_names[dst];
1800 const int index = fs->in.semantic_indices[dst];
1801 const int interp = fs->in.interp[dst];
1802 int src;
1803 uint16_t ctrl;
1804
1805 /*
1806 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1807 *
1808 * "This field (Point Sprite Texture Coordinate Enable) must be
1809 * programmed to 0 when non-point primitives are rendered."
1810 *
1811 * TODO We do not check that yet.
1812 */
1813 if (semantic == TGSI_SEMANTIC_GENERIC &&
1814 (rasterizer->sprite_coord_enable & (1 << index)))
1815 point_sprite_enable |= 1 << dst;
1816
1817 if (interp == TGSI_INTERPOLATE_CONSTANT ||
1818 (interp == TGSI_INTERPOLATE_COLOR && rasterizer->flatshade))
1819 const_interp_enable |= 1 << dst;
1820
1821 if (!last_sh) {
1822 attr_ctrl[dst] = 0;
1823 continue;
1824 }
1825
1826 /* find the matching VS/GS OUT for FS IN[i] */
1827 ctrl = 0;
1828 for (src = 0; src < vue_len; src++) {
1829 if (last_sh->out.semantic_names[vue_offset + src] != semantic ||
1830 last_sh->out.semantic_indices[vue_offset + src] != index)
1831 continue;
1832
1833 ctrl = src;
1834
1835 if (semantic == TGSI_SEMANTIC_COLOR && rasterizer->light_twoside &&
1836 src < vue_len - 1) {
1837 const int next = src + 1;
1838
1839 if (last_sh->out.semantic_names[vue_offset + next] ==
1840 TGSI_SEMANTIC_BCOLOR &&
1841 last_sh->out.semantic_indices[vue_offset + next] == index) {
1842 ctrl |= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING <<
1843 ATTRIBUTE_SWIZZLE_SHIFT;
1844 src++;
1845 }
1846 }
1847
1848 break;
1849 }
1850
1851 /* if there is no COLOR, try BCOLOR */
1852 if (src >= vue_len && semantic == TGSI_SEMANTIC_COLOR) {
1853 for (src = 0; src < vue_len; src++) {
1854 if (last_sh->out.semantic_names[vue_offset + src] !=
1855 TGSI_SEMANTIC_BCOLOR ||
1856 last_sh->out.semantic_indices[vue_offset + src] != index)
1857 continue;
1858
1859 ctrl = src;
1860 break;
1861 }
1862 }
1863
1864 if (src < vue_len) {
1865 attr_ctrl[dst] = ctrl;
1866 if (max_src < src)
1867 max_src = src;
1868 }
1869 else {
1870 /*
1871 * The previous shader stage does not output this attribute. The
1872 * value is supposed to be undefined for fs, unless the attribute
1873 * goes through point sprite replacement or the attribute is
1874 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1875 * attribute is picked.
1876 *
1877 * We should update the fs code and omit the output of
1878 * TGSI_SEMANTIC_POSITION here.
1879 */
1880 attr_ctrl[dst] = 0;
1881 }
1882 }
1883
1884 for (; dst < Elements(attr_ctrl); dst++)
1885 attr_ctrl[dst] = 0;
1886
1887 /* only the first 16 attributes can be remapped */
1888 for (dst = 16; dst < Elements(attr_ctrl); dst++)
1889 assert(attr_ctrl[dst] == 0 || attr_ctrl[dst] == dst);
1890
1891 /*
1892 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1893 *
1894 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1895 * 0 indicating no Vertex URB data to be read.
1896 *
1897 * This field should be set to the minimum length required to read the
1898 * maximum source attribute. The maximum source attribute is indicated
1899 * by the maximum value of the enabled Attribute # Source Attribute if
1900 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1901 * enable is not set.
1902 *
1903 * read_length = ceiling((max_source_attr+1)/2)
1904 *
1905 * [errata] Corruption/Hang possible if length programmed larger than
1906 * recommended"
1907 */
1908 vue_len = max_src + 1;
1909
1910 assert(fs->in.count <= 32);
1911 assert(vue_offset % 2 == 0);
1912
1913 if (dev->gen >= ILO_GEN(7)) {
1914 dw[0] = fs->in.count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
1915 (vue_len + 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
1916 vue_offset / 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
1917
1918 if (last_sh)
1919 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
1920 }
1921 else {
1922 dw[0] = fs->in.count << GEN6_SF_NUM_OUTPUTS_SHIFT |
1923 (vue_len + 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
1924 vue_offset / 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
1925
1926 if (last_sh)
1927 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
1928 }
1929
1930 switch (rasterizer->sprite_coord_mode) {
1931 case PIPE_SPRITE_COORD_UPPER_LEFT:
1932 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
1933 break;
1934 case PIPE_SPRITE_COORD_LOWER_LEFT:
1935 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
1936 break;
1937 }
1938
1939 for (i = 0; i < 8; i++)
1940 dw[1 + i] = attr_ctrl[2 * i + 1] << 16 | attr_ctrl[2 * i];
1941
1942 dw[9] = point_sprite_enable;
1943 dw[10] = const_interp_enable;
1944
1945 /* WrapShortest enables */
1946 dw[11] = 0;
1947 dw[12] = 0;
1948 }
1949
1950 static void
1951 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1952 const struct ilo_rasterizer_state *rasterizer,
1953 const struct ilo_shader *fs,
1954 const struct ilo_shader *last_sh,
1955 struct ilo_cp *cp)
1956 {
1957 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1958 const uint8_t cmd_len = 20;
1959 uint32_t payload_raster[6], payload_sbe[13];
1960
1961 ILO_GPE_VALID_GEN(dev, 6, 6);
1962
1963 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, &rasterizer->sf,
1964 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1965 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, &rasterizer->state,
1966 fs, last_sh, payload_sbe, Elements(payload_sbe));
1967
1968 ilo_cp_begin(cp, cmd_len);
1969 ilo_cp_write(cp, cmd | (cmd_len - 2));
1970 ilo_cp_write(cp, payload_sbe[0]);
1971 ilo_cp_write_multi(cp, payload_raster, 6);
1972 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1973 ilo_cp_end(cp);
1974 }
1975
1976 static void
1977 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1978 const struct ilo_shader *fs,
1979 int num_samplers,
1980 const struct pipe_rasterizer_state *rasterizer,
1981 bool dual_blend, bool cc_may_kill,
1982 struct ilo_cp *cp)
1983 {
1984 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1985 const uint8_t cmd_len = 9;
1986 const int num_samples = 1;
1987 uint32_t dw2, dw4, dw5, dw6;
1988 int max_threads;
1989
1990 ILO_GPE_VALID_GEN(dev, 6, 6);
1991
1992 /* see brwCreateContext() */
1993 max_threads = (dev->gt == 2) ? 80 : 40;
1994
1995 if (!fs) {
1996 ilo_cp_begin(cp, cmd_len);
1997 ilo_cp_write(cp, cmd | (cmd_len - 2));
1998 ilo_cp_write(cp, 0);
1999 ilo_cp_write(cp, 0);
2000 ilo_cp_write(cp, 0);
2001 ilo_cp_write(cp, 0);
2002 /* honor the valid range even if dispatching is disabled */
2003 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
2004 ilo_cp_write(cp, 0);
2005 ilo_cp_write(cp, 0);
2006 ilo_cp_write(cp, 0);
2007 ilo_cp_end(cp);
2008
2009 return;
2010 }
2011
2012 dw2 = (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
2013 if (false)
2014 dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
2015
2016 dw4 = fs->in.start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
2017 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
2018 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
2019
2020 if (true) {
2021 dw4 |= GEN6_WM_STATISTICS_ENABLE;
2022 }
2023 else {
2024 /*
2025 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
2026 *
2027 * "This bit (Statistics Enable) must be disabled if either of these
2028 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
2029 * Resolve Enable or Depth Buffer Resolve Enable."
2030 */
2031 dw4 |= GEN6_WM_DEPTH_CLEAR;
2032 dw4 |= GEN6_WM_DEPTH_RESOLVE;
2033 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
2034 }
2035
2036 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT |
2037 GEN6_WM_LINE_AA_WIDTH_2_0;
2038
2039 /*
2040 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2041 *
2042 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
2043 * PS kernel or color calculator has the ability to kill (discard)
2044 * pixels or samples, other than due to depth or stencil testing.
2045 * This bit is required to be ENABLED in the following situations:
2046 *
2047 * The API pixel shader program contains "killpix" or "discard"
2048 * instructions, or other code in the pixel shader kernel that can
2049 * cause the final pixel mask to differ from the pixel mask received
2050 * on dispatch.
2051 *
2052 * A sampler with chroma key enabled with kill pixel mode is used by
2053 * the pixel shader.
2054 *
2055 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
2056 * enabled.
2057 *
2058 * The pixel shader kernel generates and outputs oMask.
2059 *
2060 * Note: As ClipDistance clipping is fully supported in hardware and
2061 * therefore not via PS instructions, there should be no need to
2062 * ENABLE this bit due to ClipDistance clipping."
2063 */
2064 if (fs->has_kill || cc_may_kill)
2065 dw5 |= GEN6_WM_KILL_ENABLE;
2066
2067 /*
2068 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2069 *
2070 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
2071 * field must be set to disabled."
2072 *
2073 * TODO This is not checked yet.
2074 */
2075 if (fs->out.has_pos)
2076 dw5 |= GEN6_WM_COMPUTED_DEPTH;
2077
2078 if (fs->in.has_pos)
2079 dw5 |= GEN6_WM_USES_SOURCE_DEPTH | GEN6_WM_USES_SOURCE_W;
2080
2081 /*
2082 * Set this bit if
2083 *
2084 * a) fs writes colors and color is not masked, or
2085 * b) fs writes depth, or
2086 * c) fs or cc kills
2087 */
2088 if (true)
2089 dw5 |= GEN6_WM_DISPATCH_ENABLE;
2090
2091 /* same value as in 3DSTATE_SF */
2092 if (rasterizer->line_smooth)
2093 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
2094
2095 if (rasterizer->poly_stipple_enable)
2096 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
2097 if (rasterizer->line_stipple_enable)
2098 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
2099
2100 if (dual_blend)
2101 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
2102
2103 if (fs->dispatch_16)
2104 dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
2105 else
2106 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
2107
2108 dw6 = fs->in.count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
2109 GEN6_WM_POSOFFSET_NONE |
2110 GEN6_WM_POSITION_ZW_PIXEL |
2111 fs->in.barycentric_interpolation_mode <<
2112 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
2113
2114 if (rasterizer->bottom_edge_rule)
2115 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
2116
2117 if (num_samples > 1) {
2118 if (rasterizer->multisample)
2119 dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
2120 else
2121 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
2122 dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
2123 }
2124 else {
2125 dw6 |= GEN6_WM_MSRAST_OFF_PIXEL |
2126 GEN6_WM_MSDISPMODE_PERSAMPLE;
2127 }
2128
2129 ilo_cp_begin(cp, cmd_len);
2130 ilo_cp_write(cp, cmd | (cmd_len - 2));
2131 ilo_cp_write(cp, fs->cache_offset);
2132 ilo_cp_write(cp, dw2);
2133 ilo_cp_write(cp, 0); /* scratch */
2134 ilo_cp_write(cp, dw4);
2135 ilo_cp_write(cp, dw5);
2136 ilo_cp_write(cp, dw6);
2137 ilo_cp_write(cp, 0); /* kernel 1 */
2138 ilo_cp_write(cp, 0); /* kernel 2 */
2139 ilo_cp_end(cp);
2140 }
2141
2142 static unsigned
2143 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
2144 const uint32_t *bufs, const int *sizes,
2145 int num_bufs, int max_read_length,
2146 uint32_t *dw, int num_dwords)
2147 {
2148 unsigned enabled = 0x0;
2149 int total_read_length, i;
2150
2151 assert(num_dwords == 4);
2152
2153 total_read_length = 0;
2154 for (i = 0; i < 4; i++) {
2155 if (i < num_bufs && sizes[i]) {
2156 /* in 256-bit units minus one */
2157 const int read_len = (sizes[i] + 31) / 32 - 1;
2158
2159 assert(bufs[i] % 32 == 0);
2160 assert(read_len < 32);
2161
2162 enabled |= 1 << i;
2163 dw[i] = bufs[i] | read_len;
2164
2165 total_read_length += read_len + 1;
2166 }
2167 else {
2168 dw[i] = 0;
2169 }
2170 }
2171
2172 assert(total_read_length <= max_read_length);
2173
2174 return enabled;
2175 }
2176
2177 static void
2178 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
2179 const uint32_t *bufs, const int *sizes,
2180 int num_bufs,
2181 struct ilo_cp *cp)
2182 {
2183 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
2184 const uint8_t cmd_len = 5;
2185 uint32_t buf_dw[4], buf_enabled;
2186
2187 ILO_GPE_VALID_GEN(dev, 6, 6);
2188 assert(num_bufs <= 4);
2189
2190 /*
2191 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2192 *
2193 * "The sum of all four read length fields (each incremented to
2194 * represent the actual read length) must be less than or equal to 32"
2195 */
2196 buf_enabled = gen6_fill_3dstate_constant(dev,
2197 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
2198
2199 ilo_cp_begin(cp, cmd_len);
2200 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2201 ilo_cp_write(cp, buf_dw[0]);
2202 ilo_cp_write(cp, buf_dw[1]);
2203 ilo_cp_write(cp, buf_dw[2]);
2204 ilo_cp_write(cp, buf_dw[3]);
2205 ilo_cp_end(cp);
2206 }
2207
2208 static void
2209 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
2210 const uint32_t *bufs, const int *sizes,
2211 int num_bufs,
2212 struct ilo_cp *cp)
2213 {
2214 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
2215 const uint8_t cmd_len = 5;
2216 uint32_t buf_dw[4], buf_enabled;
2217
2218 ILO_GPE_VALID_GEN(dev, 6, 6);
2219 assert(num_bufs <= 4);
2220
2221 /*
2222 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2223 *
2224 * "The sum of all four read length fields (each incremented to
2225 * represent the actual read length) must be less than or equal to 64"
2226 */
2227 buf_enabled = gen6_fill_3dstate_constant(dev,
2228 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2229
2230 ilo_cp_begin(cp, cmd_len);
2231 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2232 ilo_cp_write(cp, buf_dw[0]);
2233 ilo_cp_write(cp, buf_dw[1]);
2234 ilo_cp_write(cp, buf_dw[2]);
2235 ilo_cp_write(cp, buf_dw[3]);
2236 ilo_cp_end(cp);
2237 }
2238
2239 static void
2240 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
2241 const uint32_t *bufs, const int *sizes,
2242 int num_bufs,
2243 struct ilo_cp *cp)
2244 {
2245 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
2246 const uint8_t cmd_len = 5;
2247 uint32_t buf_dw[4], buf_enabled;
2248
2249 ILO_GPE_VALID_GEN(dev, 6, 6);
2250 assert(num_bufs <= 4);
2251
2252 /*
2253 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2254 *
2255 * "The sum of all four read length fields (each incremented to
2256 * represent the actual read length) must be less than or equal to 64"
2257 */
2258 buf_enabled = gen6_fill_3dstate_constant(dev,
2259 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
2260
2261 ilo_cp_begin(cp, cmd_len);
2262 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
2263 ilo_cp_write(cp, buf_dw[0]);
2264 ilo_cp_write(cp, buf_dw[1]);
2265 ilo_cp_write(cp, buf_dw[2]);
2266 ilo_cp_write(cp, buf_dw[3]);
2267 ilo_cp_end(cp);
2268 }
2269
2270 static void
2271 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
2272 unsigned sample_mask,
2273 struct ilo_cp *cp)
2274 {
2275 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
2276 const uint8_t cmd_len = 2;
2277 const unsigned valid_mask = 0xf;
2278
2279 ILO_GPE_VALID_GEN(dev, 6, 6);
2280
2281 sample_mask &= valid_mask;
2282
2283 ilo_cp_begin(cp, cmd_len);
2284 ilo_cp_write(cp, cmd | (cmd_len - 2));
2285 ilo_cp_write(cp, sample_mask);
2286 ilo_cp_end(cp);
2287 }
2288
2289 static void
2290 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
2291 unsigned x, unsigned y,
2292 unsigned width, unsigned height,
2293 struct ilo_cp *cp)
2294 {
2295 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
2296 const uint8_t cmd_len = 4;
2297 unsigned xmax = x + width - 1;
2298 unsigned ymax = y + height - 1;
2299 int rect_limit;
2300
2301 ILO_GPE_VALID_GEN(dev, 6, 7);
2302
2303 if (dev->gen >= ILO_GEN(7)) {
2304 rect_limit = 16383;
2305 }
2306 else {
2307 /*
2308 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2309 *
2310 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2311 * must be an even number"
2312 */
2313 assert(y % 2 == 0);
2314
2315 rect_limit = 8191;
2316 }
2317
2318 if (x > rect_limit) x = rect_limit;
2319 if (y > rect_limit) y = rect_limit;
2320 if (xmax > rect_limit) xmax = rect_limit;
2321 if (ymax > rect_limit) ymax = rect_limit;
2322
2323 ilo_cp_begin(cp, cmd_len);
2324 ilo_cp_write(cp, cmd | (cmd_len - 2));
2325 ilo_cp_write(cp, y << 16 | x);
2326 ilo_cp_write(cp, ymax << 16 | xmax);
2327
2328 /*
2329 * There is no need to set the origin. It is intended to support front
2330 * buffer rendering.
2331 */
2332 ilo_cp_write(cp, 0);
2333
2334 ilo_cp_end(cp);
2335 }
2336
2337 static int
2338 gen6_get_depth_buffer_format(const struct ilo_dev_info *dev,
2339 enum pipe_format format,
2340 bool hiz,
2341 bool separate_stencil,
2342 bool *has_depth,
2343 bool *has_stencil)
2344 {
2345 int depth_format;
2346
2347 ILO_GPE_VALID_GEN(dev, 6, 7);
2348
2349 *has_depth = true;
2350 *has_stencil = false;
2351
2352 /*
2353 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2354 *
2355 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2356 * Surface Format of the depth buffer cannot be
2357 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2358 * requires the separate stencil buffer."
2359 *
2360 * From the Ironlake PRM, volume 2 part 1, page 330:
2361 *
2362 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2363 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2364 *
2365 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2366 * is indeed used, the depth values output by the fragment shaders will
2367 * be different when read back.
2368 *
2369 * As for GEN7+, separate_stencil_buffer is always true.
2370 */
2371 switch (format) {
2372 case PIPE_FORMAT_Z16_UNORM:
2373 depth_format = BRW_DEPTHFORMAT_D16_UNORM;
2374 break;
2375 case PIPE_FORMAT_Z32_FLOAT:
2376 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2377 break;
2378 case PIPE_FORMAT_Z24X8_UNORM:
2379 depth_format = (separate_stencil) ?
2380 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2381 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2382 break;
2383 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2384 depth_format = (separate_stencil) ?
2385 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
2386 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
2387 *has_stencil = true;
2388 break;
2389 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
2390 depth_format = (separate_stencil) ?
2391 BRW_DEPTHFORMAT_D32_FLOAT :
2392 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
2393 *has_stencil = true;
2394 break;
2395 case PIPE_FORMAT_S8_UINT:
2396 if (separate_stencil) {
2397 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2398 *has_depth = false;
2399 *has_stencil = true;
2400 break;
2401 }
2402 /* fall through */
2403 default:
2404 assert(!"unsupported depth/stencil format");
2405 depth_format = BRW_DEPTHFORMAT_D32_FLOAT;
2406 *has_depth = false;
2407 *has_stencil = false;
2408 break;
2409 }
2410
2411 return depth_format;
2412 }
2413
2414 static void
2415 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2416 const struct pipe_surface *surface,
2417 struct ilo_cp *cp)
2418 {
2419 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2420 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2421 const uint8_t cmd_len = 7;
2422 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
2423 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
2424 const bool hiz = false;
2425 struct ilo_texture *tex;
2426 uint32_t dw1, dw3, dw4, dw6;
2427 uint32_t slice_offset, x_offset, y_offset;
2428 int surface_type, depth_format;
2429 unsigned lod, first_layer, num_layers;
2430 unsigned width, height, depth;
2431 bool separate_stencil, has_depth, has_stencil;
2432
2433 ILO_GPE_VALID_GEN(dev, 6, 7);
2434
2435 if (dev->gen >= ILO_GEN(7)) {
2436 separate_stencil = true;
2437 }
2438 else {
2439 /*
2440 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2441 *
2442 * "This field (Separate Stencil Buffer Enable) must be set to the
2443 * same value (enabled or disabled) as Hierarchical Depth Buffer
2444 * Enable."
2445 */
2446 separate_stencil = hiz;
2447 }
2448
2449 if (surface) {
2450 depth_format = gen6_get_depth_buffer_format(dev,
2451 surface->format, hiz, separate_stencil, &has_depth, &has_stencil);
2452 }
2453 else {
2454 has_depth = false;
2455 has_stencil = false;
2456 }
2457
2458 if (!has_depth && !has_stencil) {
2459 dw1 = BRW_SURFACE_NULL << 29 |
2460 BRW_DEPTHFORMAT_D32_FLOAT << 18;
2461
2462 /* Y-tiled */
2463 if (dev->gen == ILO_GEN(6)) {
2464 dw1 |= 1 << 27 |
2465 1 << 26;
2466 }
2467
2468 ilo_cp_begin(cp, cmd_len);
2469 ilo_cp_write(cp, cmd | (cmd_len - 2));
2470 ilo_cp_write(cp, dw1);
2471 ilo_cp_write(cp, 0);
2472 ilo_cp_write(cp, 0);
2473 ilo_cp_write(cp, 0);
2474 ilo_cp_write(cp, 0);
2475 ilo_cp_write(cp, 0);
2476 ilo_cp_end(cp);
2477
2478 return;
2479 }
2480
2481 tex = ilo_texture(surface->texture);
2482
2483 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
2484 lod = surface->u.tex.level;
2485 first_layer = surface->u.tex.first_layer;
2486 num_layers = surface->u.tex.last_layer - first_layer + 1;
2487
2488 width = tex->base.width0;
2489 height = tex->base.height0;
2490 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
2491 tex->base.depth0 : num_layers;
2492
2493 if (surface_type == BRW_SURFACE_CUBE) {
2494 /*
2495 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2496 *
2497 * "For Other Surfaces (Cube Surfaces):
2498 * This field (Minimum Array Element) is ignored."
2499 *
2500 * "For Other Surfaces (Cube Surfaces):
2501 * This field (Render Target View Extent) is ignored."
2502 *
2503 * As such, we cannot set first_layer and num_layers on cube surfaces.
2504 * To work around that, treat it as a 2D surface.
2505 */
2506 surface_type = BRW_SURFACE_2D;
2507 }
2508
2509 /*
2510 * we always treat the resource as non-mipmapped and set the slice/x/y
2511 * offsets manually
2512 */
2513 if (true) {
2514 /* no layered rendering */
2515 assert(num_layers == 1);
2516
2517 slice_offset = ilo_texture_get_slice_offset(tex,
2518 lod, first_layer, &x_offset, &y_offset);
2519
2520 /*
2521 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2522 *
2523 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2524 * Coordinate Offset X) must be zero to ensure correct alignment"
2525 *
2526 * XXX Skip the check for gen6, which seems to be fine. We need to make
2527 * sure that does not happen eventually.
2528 */
2529 if (dev->gen >= ILO_GEN(7)) {
2530 assert((x_offset & 7) == 0 && (y_offset & 7) == 0);
2531 x_offset &= ~7;
2532 y_offset &= ~7;
2533 }
2534
2535 /* the size of the layer */
2536 width = u_minify(width, lod);
2537 height = u_minify(height, lod);
2538 if (surface_type == BRW_SURFACE_3D)
2539 depth = u_minify(depth, lod);
2540 else
2541 depth = 1;
2542
2543 lod = 0;
2544 first_layer = 0;
2545
2546 width += x_offset;
2547 height += y_offset;
2548
2549 /* we have to treat them as 2D surfaces */
2550 if (surface_type == BRW_SURFACE_CUBE) {
2551 assert(tex->base.width0 == tex->base.height0);
2552 /* we will set slice_offset to point to the single face */
2553 surface_type = BRW_SURFACE_2D;
2554 }
2555 else if (surface_type == BRW_SURFACE_1D && height > 1) {
2556 assert(tex->base.height0 == 1);
2557 surface_type = BRW_SURFACE_2D;
2558 }
2559 }
2560 else {
2561 slice_offset = 0;
2562 x_offset = 0;
2563 y_offset = 0;
2564 }
2565
2566 /* required for GEN6+ */
2567 assert(tex->tiling == INTEL_TILING_Y);
2568 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2569 tex->bo_stride % 128 == 0);
2570 assert(width <= tex->bo_stride);
2571
2572 switch (surface_type) {
2573 case BRW_SURFACE_1D:
2574 assert(width <= max_2d_size && height == 1 &&
2575 depth <= max_array_size);
2576 assert(first_layer < max_array_size - 1 &&
2577 num_layers <= max_array_size);
2578 break;
2579 case BRW_SURFACE_2D:
2580 assert(width <= max_2d_size && height <= max_2d_size &&
2581 depth <= max_array_size);
2582 assert(first_layer < max_array_size - 1 &&
2583 num_layers <= max_array_size);
2584 break;
2585 case BRW_SURFACE_3D:
2586 assert(width <= 2048 && height <= 2048 && depth <= 2048);
2587 assert(first_layer < 2048 && num_layers <= max_array_size);
2588 assert(x_offset == 0 && y_offset == 0);
2589 break;
2590 case BRW_SURFACE_CUBE:
2591 assert(width <= max_2d_size && height <= max_2d_size && depth == 1);
2592 assert(first_layer == 0 && num_layers == 1);
2593 assert(width == height);
2594 assert(x_offset == 0 && y_offset == 0);
2595 break;
2596 default:
2597 assert(!"unexpected depth surface type");
2598 break;
2599 }
2600
2601 dw1 = surface_type << 29 |
2602 depth_format << 18 |
2603 (tex->bo_stride - 1);
2604
2605 if (dev->gen >= ILO_GEN(7)) {
2606 if (has_depth)
2607 dw1 |= 1 << 28;
2608
2609 if (has_stencil)
2610 dw1 |= 1 << 27;
2611
2612 if (hiz)
2613 dw1 |= 1 << 22;
2614
2615 dw3 = (height - 1) << 18 |
2616 (width - 1) << 4 |
2617 lod;
2618
2619 dw4 = (depth - 1) << 21 |
2620 first_layer << 10;
2621
2622 dw6 = (num_layers - 1) << 21;
2623 }
2624 else {
2625 dw1 |= (tex->tiling != INTEL_TILING_NONE) << 27 |
2626 (tex->tiling == INTEL_TILING_Y) << 26;
2627
2628 if (hiz) {
2629 dw1 |= 1 << 22 |
2630 1 << 21;
2631 }
2632
2633 dw3 = (height - 1) << 19 |
2634 (width - 1) << 6 |
2635 lod << 2 |
2636 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
2637
2638 dw4 = (depth - 1) << 21 |
2639 first_layer << 10 |
2640 (num_layers - 1) << 1;
2641
2642 dw6 = 0;
2643 }
2644
2645 ilo_cp_begin(cp, cmd_len);
2646 ilo_cp_write(cp, cmd | (cmd_len - 2));
2647 ilo_cp_write(cp, dw1);
2648
2649 if (has_depth) {
2650 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2651 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2652 }
2653 else {
2654 ilo_cp_write(cp, 0);
2655 }
2656
2657 ilo_cp_write(cp, dw3);
2658 ilo_cp_write(cp, dw4);
2659 ilo_cp_write(cp, y_offset << 16 | x_offset);
2660 ilo_cp_write(cp, dw6);
2661 ilo_cp_end(cp);
2662 }
2663
2664 static void
2665 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
2666 int x_offset, int y_offset,
2667 struct ilo_cp *cp)
2668 {
2669 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
2670 const uint8_t cmd_len = 2;
2671
2672 ILO_GPE_VALID_GEN(dev, 6, 7);
2673 assert(x_offset >= 0 && x_offset <= 31);
2674 assert(y_offset >= 0 && y_offset <= 31);
2675
2676 ilo_cp_begin(cp, cmd_len);
2677 ilo_cp_write(cp, cmd | (cmd_len - 2));
2678 ilo_cp_write(cp, x_offset << 8 | y_offset);
2679 ilo_cp_end(cp);
2680 }
2681
2682 static void
2683 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
2684 const struct pipe_poly_stipple *pattern,
2685 struct ilo_cp *cp)
2686 {
2687 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
2688 const uint8_t cmd_len = 33;
2689 int i;
2690
2691 ILO_GPE_VALID_GEN(dev, 6, 7);
2692 assert(Elements(pattern->stipple) == 32);
2693
2694 ilo_cp_begin(cp, cmd_len);
2695 ilo_cp_write(cp, cmd | (cmd_len - 2));
2696 for (i = 0; i < 32; i++)
2697 ilo_cp_write(cp, pattern->stipple[i]);
2698 ilo_cp_end(cp);
2699 }
2700
2701 static void
2702 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
2703 unsigned pattern, unsigned factor,
2704 struct ilo_cp *cp)
2705 {
2706 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
2707 const uint8_t cmd_len = 3;
2708 unsigned inverse;
2709
2710 ILO_GPE_VALID_GEN(dev, 6, 7);
2711 assert((pattern & 0xffff) == pattern);
2712 assert(factor >= 1 && factor <= 256);
2713
2714 ilo_cp_begin(cp, cmd_len);
2715 ilo_cp_write(cp, cmd | (cmd_len - 2));
2716 ilo_cp_write(cp, pattern);
2717
2718 if (dev->gen >= ILO_GEN(7)) {
2719 /* in U1.16 */
2720 inverse = (unsigned) (65536.0f / factor);
2721 ilo_cp_write(cp, inverse << 15 | factor);
2722 }
2723 else {
2724 /* in U1.13 */
2725 inverse = (unsigned) (8192.0f / factor);
2726 ilo_cp_write(cp, inverse << 16 | factor);
2727 }
2728
2729 ilo_cp_end(cp);
2730 }
2731
2732 static void
2733 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
2734 struct ilo_cp *cp)
2735 {
2736 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
2737 const uint8_t cmd_len = 3;
2738
2739 ILO_GPE_VALID_GEN(dev, 6, 7);
2740
2741 ilo_cp_begin(cp, cmd_len);
2742 ilo_cp_write(cp, cmd | (cmd_len - 2));
2743 ilo_cp_write(cp, 0 << 16 | 0);
2744 ilo_cp_write(cp, 0 << 16 | 0);
2745 ilo_cp_end(cp);
2746 }
2747
2748 static void
2749 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
2750 int index, unsigned svbi,
2751 unsigned max_svbi,
2752 bool load_vertex_count,
2753 struct ilo_cp *cp)
2754 {
2755 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
2756 const uint8_t cmd_len = 4;
2757 uint32_t dw1;
2758
2759 ILO_GPE_VALID_GEN(dev, 6, 6);
2760 assert(index >= 0 && index < 4);
2761
2762 dw1 = index << SVB_INDEX_SHIFT;
2763 if (load_vertex_count)
2764 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
2765
2766 ilo_cp_begin(cp, cmd_len);
2767 ilo_cp_write(cp, cmd | (cmd_len - 2));
2768 ilo_cp_write(cp, dw1);
2769 ilo_cp_write(cp, svbi);
2770 ilo_cp_write(cp, max_svbi);
2771 ilo_cp_end(cp);
2772 }
2773
2774 static void
2775 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
2776 int num_samples,
2777 const uint32_t *packed_sample_pos,
2778 bool pixel_location_center,
2779 struct ilo_cp *cp)
2780 {
2781 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
2782 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
2783 uint32_t dw1, dw2, dw3;
2784
2785 ILO_GPE_VALID_GEN(dev, 6, 7);
2786
2787 dw1 = (pixel_location_center) ?
2788 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
2789
2790 switch (num_samples) {
2791 case 0:
2792 case 1:
2793 dw1 |= MS_NUMSAMPLES_1;
2794 dw2 = 0;
2795 dw3 = 0;
2796 break;
2797 case 4:
2798 dw1 |= MS_NUMSAMPLES_4;
2799 dw2 = packed_sample_pos[0];
2800 dw3 = 0;
2801 break;
2802 case 8:
2803 assert(dev->gen >= ILO_GEN(7));
2804 dw1 |= MS_NUMSAMPLES_8;
2805 dw2 = packed_sample_pos[0];
2806 dw3 = packed_sample_pos[1];
2807 break;
2808 default:
2809 assert(!"unsupported sample count");
2810 dw1 |= MS_NUMSAMPLES_1;
2811 dw2 = 0;
2812 dw3 = 0;
2813 break;
2814 }
2815
2816 ilo_cp_begin(cp, cmd_len);
2817 ilo_cp_write(cp, cmd | (cmd_len - 2));
2818 ilo_cp_write(cp, dw1);
2819 ilo_cp_write(cp, dw2);
2820 if (dev->gen >= ILO_GEN(7))
2821 ilo_cp_write(cp, dw3);
2822 ilo_cp_end(cp);
2823 }
2824
2825 static void
2826 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
2827 const struct pipe_surface *surface,
2828 struct ilo_cp *cp)
2829 {
2830 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2831 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2832 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2833 const uint8_t cmd_len = 3;
2834 struct ilo_texture *tex;
2835 uint32_t slice_offset, x_offset, y_offset;
2836 int pitch;
2837
2838 ILO_GPE_VALID_GEN(dev, 6, 7);
2839
2840 tex = (surface) ? ilo_texture(surface->texture) : NULL;
2841 if (tex && surface->format != PIPE_FORMAT_S8_UINT)
2842 tex = tex->separate_s8;
2843
2844 if (!tex) {
2845 ilo_cp_begin(cp, cmd_len);
2846 ilo_cp_write(cp, cmd | (cmd_len - 2));
2847 ilo_cp_write(cp, 0);
2848 ilo_cp_write(cp, 0);
2849 ilo_cp_end(cp);
2850
2851 return;
2852 }
2853
2854 if (true) {
2855 slice_offset = ilo_texture_get_slice_offset(tex,
2856 surface->u.tex.level, surface->u.tex.first_layer,
2857 &x_offset, &y_offset);
2858 /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
2859 }
2860 else {
2861 slice_offset = 0;
2862 x_offset = 0;
2863 y_offset = 0;
2864 }
2865
2866 /*
2867 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2868 *
2869 * "The pitch must be set to 2x the value computed based on width, as
2870 * the stencil buffer is stored with two rows interleaved."
2871 *
2872 * According to the classic driver, we need to do the same for GEN7+ even
2873 * though the Ivy Bridge PRM does not say anything about it.
2874 */
2875 pitch = 2 * tex->bo_stride;
2876 assert(pitch > 0 && pitch < 128 * 1024 && pitch % 128 == 0);
2877
2878 ilo_cp_begin(cp, cmd_len);
2879 ilo_cp_write(cp, cmd | (cmd_len - 2));
2880 ilo_cp_write(cp, pitch - 1);
2881 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2882 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2883 ilo_cp_end(cp);
2884 }
2885
2886 static void
2887 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
2888 const struct pipe_surface *surface,
2889 struct ilo_cp *cp)
2890 {
2891 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
2892 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2893 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2894 const uint8_t cmd_len = 3;
2895 const bool hiz = false;
2896 struct ilo_texture *tex;
2897 uint32_t slice_offset;
2898
2899 ILO_GPE_VALID_GEN(dev, 6, 7);
2900
2901 if (!surface || !hiz) {
2902 ilo_cp_begin(cp, cmd_len);
2903 ilo_cp_write(cp, cmd | (cmd_len - 2));
2904 ilo_cp_write(cp, 0);
2905 ilo_cp_write(cp, 0);
2906 ilo_cp_end(cp);
2907
2908 return;
2909 }
2910
2911 tex = ilo_texture(surface->texture);
2912
2913 /* TODO */
2914 slice_offset = 0;
2915
2916 assert(tex->bo_stride > 0 && tex->bo_stride < 128 * 1024 &&
2917 tex->bo_stride % 128 == 0);
2918
2919 ilo_cp_begin(cp, cmd_len);
2920 ilo_cp_write(cp, cmd | (cmd_len - 2));
2921 ilo_cp_write(cp, tex->bo_stride - 1);
2922 ilo_cp_write_bo(cp, slice_offset, tex->bo,
2923 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
2924 ilo_cp_end(cp);
2925 }
2926
2927 static void
2928 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
2929 uint32_t clear_val,
2930 struct ilo_cp *cp)
2931 {
2932 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
2933 const uint8_t cmd_len = 2;
2934
2935 ILO_GPE_VALID_GEN(dev, 6, 6);
2936
2937 ilo_cp_begin(cp, cmd_len);
2938 ilo_cp_write(cp, cmd | (cmd_len - 2) |
2939 GEN5_DEPTH_CLEAR_VALID);
2940 ilo_cp_write(cp, clear_val);
2941 ilo_cp_end(cp);
2942 }
2943
2944 static void
2945 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
2946 uint32_t dw1,
2947 struct intel_bo *bo, uint32_t bo_offset,
2948 bool write_qword,
2949 struct ilo_cp *cp)
2950 {
2951 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
2952 const uint8_t cmd_len = (write_qword) ? 5 : 4;
2953 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
2954 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
2955
2956 ILO_GPE_VALID_GEN(dev, 6, 7);
2957
2958 if (dw1 & PIPE_CONTROL_CS_STALL) {
2959 /*
2960 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2961 *
2962 * "1 of the following must also be set (when CS stall is set):
2963 *
2964 * * Depth Cache Flush Enable ([0] of DW1)
2965 * * Stall at Pixel Scoreboard ([1] of DW1)
2966 * * Depth Stall ([13] of DW1)
2967 * * Post-Sync Operation ([13] of DW1)
2968 * * Render Target Cache Flush Enable ([12] of DW1)
2969 * * Notify Enable ([8] of DW1)"
2970 *
2971 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2972 *
2973 * "One of the following must also be set (when CS stall is set):
2974 *
2975 * * Render Target Cache Flush Enable ([12] of DW1)
2976 * * Depth Cache Flush Enable ([0] of DW1)
2977 * * Stall at Pixel Scoreboard ([1] of DW1)
2978 * * Depth Stall ([13] of DW1)
2979 * * Post-Sync Operation ([13] of DW1)"
2980 */
2981 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
2982 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2983 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2984 PIPE_CONTROL_DEPTH_STALL;
2985
2986 /* post-sync op */
2987 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
2988 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2989 PIPE_CONTROL_WRITE_TIMESTAMP;
2990
2991 if (dev->gen == ILO_GEN(6))
2992 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
2993
2994 assert(dw1 & bit_test);
2995 }
2996
2997 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
2998 /*
2999 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
3000 *
3001 * "Following bits must be clear (when Depth Stall is set):
3002 *
3003 * * Render Target Cache Flush Enable ([12] of DW1)
3004 * * Depth Cache Flush Enable ([0] of DW1)"
3005 */
3006 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
3007 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
3008 }
3009
3010 ilo_cp_begin(cp, cmd_len);
3011 ilo_cp_write(cp, cmd | (cmd_len - 2));
3012 ilo_cp_write(cp, dw1);
3013 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
3014 ilo_cp_write(cp, 0);
3015 if (write_qword)
3016 ilo_cp_write(cp, 0);
3017 ilo_cp_end(cp);
3018 }
3019
3020 static void
3021 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
3022 const struct pipe_draw_info *info,
3023 bool rectlist,
3024 struct ilo_cp *cp)
3025 {
3026 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
3027 const uint8_t cmd_len = 6;
3028 const int prim = (rectlist) ?
3029 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
3030 const int vb_access = (info->indexed) ?
3031 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
3032 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
3033
3034 ILO_GPE_VALID_GEN(dev, 6, 6);
3035
3036 ilo_cp_begin(cp, cmd_len);
3037 ilo_cp_write(cp, cmd | (cmd_len - 2) |
3038 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
3039 vb_access);
3040 ilo_cp_write(cp, info->count);
3041 ilo_cp_write(cp, info->start);
3042 ilo_cp_write(cp, info->instance_count);
3043 ilo_cp_write(cp, info->start_instance);
3044 ilo_cp_write(cp, info->index_bias);
3045 ilo_cp_end(cp);
3046 }
3047
3048 static uint32_t
3049 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
3050 const struct ilo_shader **cs,
3051 uint32_t *sampler_state,
3052 int *num_samplers,
3053 uint32_t *binding_table_state,
3054 int *num_surfaces,
3055 int num_ids,
3056 struct ilo_cp *cp)
3057 {
3058 /*
3059 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
3060 *
3061 * "(Interface Descriptor Total Length) This field must have the same
3062 * alignment as the Interface Descriptor Data Start Address.
3063 *
3064 * It must be DQWord (32-byte) aligned..."
3065 *
3066 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
3067 *
3068 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
3069 * aligned address of the Interface Descriptor data."
3070 */
3071 const int state_align = 32 / 4;
3072 const int state_len = (32 / 4) * num_ids;
3073 uint32_t state_offset, *dw;
3074 int i;
3075
3076 ILO_GPE_VALID_GEN(dev, 6, 6);
3077
3078 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
3079 state_len, state_align, &state_offset);
3080
3081 for (i = 0; i < num_ids; i++) {
3082 int curbe_read_len;
3083
3084 curbe_read_len = (cs[i]->pcb.clip_state_size + 31) / 32;
3085
3086 dw[0] = cs[i]->cache_offset;
3087 dw[1] = 1 << 18; /* SPF */
3088 dw[2] = sampler_state[i] |
3089 (num_samplers[i] + 3) / 4 << 2;
3090 dw[3] = binding_table_state[i] |
3091 num_surfaces[i];
3092 dw[4] = curbe_read_len << 16 | /* CURBE Read Length */
3093 0; /* CURBE Read Offset */
3094 dw[5] = 0; /* Barrier ID */
3095 dw[6] = 0;
3096 dw[7] = 0;
3097
3098 dw += 8;
3099 }
3100
3101 return state_offset;
3102 }
3103
3104 static void
3105 viewport_get_guardband(const struct ilo_dev_info *dev,
3106 int center_x, int center_y,
3107 int *min_gbx, int *max_gbx,
3108 int *min_gby, int *max_gby)
3109 {
3110 /*
3111 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
3112 *
3113 * "Per-Device Guardband Extents
3114 *
3115 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
3116 * - Maximum Post-Clamp Delta (X or Y): 16K"
3117 *
3118 * "In addition, in order to be correctly rendered, objects must have a
3119 * screenspace bounding box not exceeding 8K in the X or Y direction.
3120 * This additional restriction must also be comprehended by software,
3121 * i.e., enforced by use of clipping."
3122 *
3123 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
3124 *
3125 * "Per-Device Guardband Extents
3126 *
3127 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3128 * - Maximum Post-Clamp Delta (X or Y): N/A"
3129 *
3130 * "In addition, in order to be correctly rendered, objects must have a
3131 * screenspace bounding box not exceeding 8K in the X or Y direction.
3132 * This additional restriction must also be comprehended by software,
3133 * i.e., enforced by use of clipping."
3134 *
3135 * Combined, the bounding box of any object can not exceed 8K in both
3136 * width and height.
3137 *
3138 * Below we set the guardband as a squre of length 8K, centered at where
3139 * the viewport is. This makes sure all objects passing the GB test are
3140 * valid to the renderer, and those failing the XY clipping have a
3141 * better chance of passing the GB test.
3142 */
3143 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
3144 const int half_len = 8192 / 2;
3145
3146 /* make sure the guardband is within the valid range */
3147 if (center_x - half_len < -max_extent)
3148 center_x = -max_extent + half_len;
3149 else if (center_x + half_len > max_extent - 1)
3150 center_x = max_extent - half_len;
3151
3152 if (center_y - half_len < -max_extent)
3153 center_y = -max_extent + half_len;
3154 else if (center_y + half_len > max_extent - 1)
3155 center_y = max_extent - half_len;
3156
3157 *min_gbx = (float) (center_x - half_len);
3158 *max_gbx = (float) (center_x + half_len);
3159 *min_gby = (float) (center_y - half_len);
3160 *max_gby = (float) (center_y + half_len);
3161 }
3162
3163 void
3164 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
3165 const struct pipe_viewport_state *state,
3166 struct ilo_viewport_cso *vp)
3167 {
3168 const float scale_x = fabs(state->scale[0]);
3169 const float scale_y = fabs(state->scale[1]);
3170 const float scale_z = fabs(state->scale[2]);
3171 int min_gbx, max_gbx, min_gby, max_gby;
3172
3173 ILO_GPE_VALID_GEN(dev, 6, 7);
3174
3175 viewport_get_guardband(dev,
3176 (int) state->translate[0],
3177 (int) state->translate[1],
3178 &min_gbx, &max_gbx, &min_gby, &max_gby);
3179
3180 /* matrix form */
3181 vp->m00 = state->scale[0];
3182 vp->m11 = state->scale[1];
3183 vp->m22 = state->scale[2];
3184 vp->m30 = state->translate[0];
3185 vp->m31 = state->translate[1];
3186 vp->m32 = state->translate[2];
3187
3188 /* guardband in NDC space */
3189 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
3190 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
3191 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
3192 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
3193
3194 /* viewport in screen space */
3195 vp->min_x = scale_x * -1.0f + state->translate[0];
3196 vp->max_x = scale_x * 1.0f + state->translate[0];
3197 vp->min_y = scale_y * -1.0f + state->translate[1];
3198 vp->max_y = scale_y * 1.0f + state->translate[1];
3199 vp->min_z = scale_z * -1.0f + state->translate[2];
3200 vp->max_z = scale_z * 1.0f + state->translate[2];
3201 }
3202
3203 static uint32_t
3204 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
3205 const struct ilo_viewport_cso *viewports,
3206 unsigned num_viewports,
3207 struct ilo_cp *cp)
3208 {
3209 const int state_align = 32 / 4;
3210 const int state_len = 8 * num_viewports;
3211 uint32_t state_offset, *dw;
3212 unsigned i;
3213
3214 ILO_GPE_VALID_GEN(dev, 6, 6);
3215
3216 /*
3217 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3218 *
3219 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3220 * stored as an array of up to 16 elements..."
3221 */
3222 assert(num_viewports && num_viewports <= 16);
3223
3224 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
3225 state_len, state_align, &state_offset);
3226
3227 for (i = 0; i < num_viewports; i++) {
3228 const struct ilo_viewport_cso *vp = &viewports[i];
3229
3230 dw[0] = fui(vp->m00);
3231 dw[1] = fui(vp->m11);
3232 dw[2] = fui(vp->m22);
3233 dw[3] = fui(vp->m30);
3234 dw[4] = fui(vp->m31);
3235 dw[5] = fui(vp->m32);
3236 dw[6] = 0;
3237 dw[7] = 0;
3238
3239 dw += 8;
3240 }
3241
3242 return state_offset;
3243 }
3244
3245 static uint32_t
3246 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
3247 const struct ilo_viewport_cso *viewports,
3248 unsigned num_viewports,
3249 struct ilo_cp *cp)
3250 {
3251 const int state_align = 32 / 4;
3252 const int state_len = 4 * num_viewports;
3253 uint32_t state_offset, *dw;
3254 unsigned i;
3255
3256 ILO_GPE_VALID_GEN(dev, 6, 6);
3257
3258 /*
3259 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3260 *
3261 * "The viewport-related state is stored as an array of up to 16
3262 * elements..."
3263 */
3264 assert(num_viewports && num_viewports <= 16);
3265
3266 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
3267 state_len, state_align, &state_offset);
3268
3269 for (i = 0; i < num_viewports; i++) {
3270 const struct ilo_viewport_cso *vp = &viewports[i];
3271
3272 dw[0] = fui(vp->min_gbx);
3273 dw[1] = fui(vp->max_gbx);
3274 dw[2] = fui(vp->min_gby);
3275 dw[3] = fui(vp->max_gby);
3276
3277 dw += 4;
3278 }
3279
3280 return state_offset;
3281 }
3282
3283 static uint32_t
3284 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
3285 const struct ilo_viewport_cso *viewports,
3286 unsigned num_viewports,
3287 struct ilo_cp *cp)
3288 {
3289 const int state_align = 32 / 4;
3290 const int state_len = 2 * num_viewports;
3291 uint32_t state_offset, *dw;
3292 unsigned i;
3293
3294 ILO_GPE_VALID_GEN(dev, 6, 7);
3295
3296 /*
3297 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3298 *
3299 * "The viewport state is stored as an array of up to 16 elements..."
3300 */
3301 assert(num_viewports && num_viewports <= 16);
3302
3303 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
3304 state_len, state_align, &state_offset);
3305
3306 for (i = 0; i < num_viewports; i++) {
3307 const struct ilo_viewport_cso *vp = &viewports[i];
3308
3309 dw[0] = fui(vp->min_z);
3310 dw[1] = fui(vp->max_z);
3311
3312 dw += 2;
3313 }
3314
3315 return state_offset;
3316 }
3317
3318 static uint32_t
3319 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
3320 const struct pipe_stencil_ref *stencil_ref,
3321 float alpha_ref,
3322 const struct pipe_blend_color *blend_color,
3323 struct ilo_cp *cp)
3324 {
3325 const int state_align = 64 / 4;
3326 const int state_len = 6;
3327 uint32_t state_offset, *dw;
3328
3329 ILO_GPE_VALID_GEN(dev, 6, 7);
3330
3331 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
3332 state_len, state_align, &state_offset);
3333
3334 dw[0] = stencil_ref->ref_value[0] << 24 |
3335 stencil_ref->ref_value[1] << 16 |
3336 BRW_ALPHATEST_FORMAT_UNORM8;
3337 dw[1] = float_to_ubyte(alpha_ref);
3338 dw[2] = fui(blend_color->color[0]);
3339 dw[3] = fui(blend_color->color[1]);
3340 dw[4] = fui(blend_color->color[2]);
3341 dw[5] = fui(blend_color->color[3]);
3342
3343 return state_offset;
3344 }
3345
3346 static int
3347 gen6_blend_factor_dst_alpha_forced_one(int factor)
3348 {
3349 switch (factor) {
3350 case BRW_BLENDFACTOR_DST_ALPHA:
3351 return BRW_BLENDFACTOR_ONE;
3352 case BRW_BLENDFACTOR_INV_DST_ALPHA:
3353 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
3354 return BRW_BLENDFACTOR_ZERO;
3355 default:
3356 return factor;
3357 }
3358 }
3359
3360 static uint32_t
3361 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
3362 const struct pipe_rt_blend_state *rt,
3363 bool dst_alpha_forced_one)
3364 {
3365 int rgb_src, rgb_dst, a_src, a_dst;
3366 uint32_t dw;
3367
3368 if (!rt->blend_enable)
3369 return 0;
3370
3371 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
3372 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
3373 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
3374 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
3375
3376 if (dst_alpha_forced_one) {
3377 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
3378 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
3379 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
3380 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
3381 }
3382
3383 dw = 1 << 31 |
3384 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
3385 a_src << 20 |
3386 a_dst << 15 |
3387 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
3388 rgb_src << 5 |
3389 rgb_dst;
3390
3391 if (rt->rgb_func != rt->alpha_func ||
3392 rgb_src != a_src || rgb_dst != a_dst)
3393 dw |= 1 << 30;
3394
3395 return dw;
3396 }
3397
3398 void
3399 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
3400 const struct pipe_blend_state *state,
3401 struct ilo_blend_state *blend)
3402 {
3403 unsigned num_cso, i;
3404
3405 ILO_GPE_VALID_GEN(dev, 6, 7);
3406
3407 if (state->independent_blend_enable) {
3408 num_cso = Elements(blend->cso);
3409 }
3410 else {
3411 memset(blend->cso, 0, sizeof(blend->cso));
3412 num_cso = 1;
3413 }
3414
3415 blend->independent_blend_enable = state->independent_blend_enable;
3416 blend->alpha_to_coverage = state->alpha_to_coverage;
3417 blend->dual_blend = false;
3418
3419 for (i = 0; i < num_cso; i++) {
3420 const struct pipe_rt_blend_state *rt = &state->rt[i];
3421 struct ilo_blend_cso *cso = &blend->cso[i];
3422 bool dual_blend;
3423
3424 cso->payload[0] = 0;
3425 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
3426 0x3;
3427
3428 if (!(rt->colormask & PIPE_MASK_A))
3429 cso->payload[1] |= 1 << 27;
3430 if (!(rt->colormask & PIPE_MASK_R))
3431 cso->payload[1] |= 1 << 26;
3432 if (!(rt->colormask & PIPE_MASK_G))
3433 cso->payload[1] |= 1 << 25;
3434 if (!(rt->colormask & PIPE_MASK_B))
3435 cso->payload[1] |= 1 << 24;
3436
3437 if (state->dither)
3438 cso->payload[1] |= 1 << 12;
3439
3440 /*
3441 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3442 *
3443 * "Color Buffer Blending and Logic Ops must not be enabled
3444 * simultaneously, or behavior is UNDEFINED."
3445 *
3446 * Since state->logicop_enable takes precedence over rt->blend_enable,
3447 * no special care is needed.
3448 */
3449 if (state->logicop_enable) {
3450 cso->dw_logicop = 1 << 22 |
3451 gen6_translate_pipe_logicop(state->logicop_func) << 18;
3452
3453 cso->dw_blend = 0;
3454 cso->dw_blend_dst_alpha_forced_one = 0;
3455
3456 dual_blend = false;
3457 }
3458 else {
3459 cso->dw_logicop = 0;
3460
3461 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
3462 cso->dw_blend_dst_alpha_forced_one =
3463 blend_get_rt_blend_enable(dev, rt, true);
3464
3465 dual_blend = (rt->blend_enable &&
3466 util_blend_state_is_dual(state, i));
3467 }
3468
3469 cso->dw_alpha_mod = 0;
3470
3471 if (state->alpha_to_coverage) {
3472 cso->dw_alpha_mod |= 1 << 31;
3473
3474 if (dev->gen >= ILO_GEN(7))
3475 cso->dw_alpha_mod |= 1 << 29;
3476 }
3477
3478 /*
3479 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3480 *
3481 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
3482 * must be disabled."
3483 */
3484 if (state->alpha_to_one && !dual_blend)
3485 cso->dw_alpha_mod |= 1 << 30;
3486
3487 if (dual_blend)
3488 blend->dual_blend = true;
3489 }
3490 }
3491
3492 static uint32_t
3493 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
3494 const struct ilo_blend_state *blend,
3495 const struct ilo_fb_state *fb,
3496 const struct pipe_alpha_state *alpha,
3497 struct ilo_cp *cp)
3498 {
3499 const int state_align = 64 / 4;
3500 int state_len;
3501 uint32_t state_offset, *dw;
3502 unsigned num_targets, i;
3503
3504 ILO_GPE_VALID_GEN(dev, 6, 7);
3505
3506 /*
3507 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3508 *
3509 * "The blend state is stored as an array of up to 8 elements..."
3510 */
3511 num_targets = fb->state.nr_cbufs;
3512 assert(num_targets <= 8);
3513
3514 if (!num_targets) {
3515 if (!alpha->enabled)
3516 return 0;
3517 /* to be able to reference alpha func */
3518 num_targets = 1;
3519 }
3520
3521 state_len = 2 * num_targets;
3522
3523 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
3524 state_len, state_align, &state_offset);
3525
3526 for (i = 0; i < num_targets; i++) {
3527 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
3528 const struct ilo_blend_cso *cso = &blend->cso[idx];
3529 const int num_samples = fb->num_samples;
3530 const struct util_format_description *format_desc =
3531 (idx < fb->state.nr_cbufs) ?
3532 util_format_description(fb->state.cbufs[idx]->format) : NULL;
3533 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
3534
3535 rt_is_unorm = true;
3536 rt_is_pure_integer = false;
3537 rt_dst_alpha_forced_one = false;
3538
3539 if (format_desc) {
3540 int ch;
3541
3542 switch (format_desc->format) {
3543 case PIPE_FORMAT_B8G8R8X8_UNORM:
3544 /* force alpha to one when the HW format has alpha */
3545 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
3546 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
3547 rt_dst_alpha_forced_one = true;
3548 break;
3549 default:
3550 break;
3551 }
3552
3553 for (ch = 0; ch < 4; ch++) {
3554 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
3555 continue;
3556
3557 if (format_desc->channel[ch].pure_integer) {
3558 rt_is_unorm = false;
3559 rt_is_pure_integer = true;
3560 break;
3561 }
3562
3563 if (!format_desc->channel[ch].normalized ||
3564 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
3565 rt_is_unorm = false;
3566 }
3567 }
3568
3569 dw[0] = cso->payload[0];
3570 dw[1] = cso->payload[1];
3571
3572 if (!rt_is_pure_integer) {
3573 if (rt_dst_alpha_forced_one)
3574 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
3575 else
3576 dw[0] |= cso->dw_blend;
3577 }
3578
3579 /*
3580 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3581 *
3582 * "Logic Ops are only supported on *_UNORM surfaces (excluding
3583 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3584 *
3585 * Since logicop is ignored for non-UNORM color buffers, no special care
3586 * is needed.
3587 */
3588 if (rt_is_unorm)
3589 dw[1] |= cso->dw_logicop;
3590
3591 /*
3592 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3593 *
3594 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3595 * Dither both must be disabled."
3596 *
3597 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3598 * requires that anyway.
3599 */
3600 if (num_samples > 1)
3601 dw[1] |= cso->dw_alpha_mod;
3602
3603 /*
3604 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3605 *
3606 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3607 * alpha value."
3608 */
3609 if (alpha->enabled && !rt_is_pure_integer) {
3610 dw[1] |= 1 << 16 |
3611 gen6_translate_dsa_func(alpha->func) << 13;
3612 }
3613
3614 dw += 2;
3615 }
3616
3617 return state_offset;
3618 }
3619
3620 void
3621 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
3622 const struct pipe_depth_stencil_alpha_state *state,
3623 struct ilo_dsa_state *dsa)
3624 {
3625 const struct pipe_depth_state *depth = &state->depth;
3626 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
3627 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
3628 uint32_t *dw;
3629
3630 ILO_GPE_VALID_GEN(dev, 6, 7);
3631
3632 /* copy alpha state for later use */
3633 dsa->alpha = state->alpha;
3634
3635 STATIC_ASSERT(Elements(dsa->payload) >= 3);
3636 dw = dsa->payload;
3637
3638 /*
3639 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3640 *
3641 * "If the Depth Buffer is either undefined or does not have a surface
3642 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3643 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3644 *
3645 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3646 *
3647 * "This field (Stencil Test Enable) cannot be enabled if
3648 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3649 *
3650 * TODO We do not check these yet.
3651 */
3652 if (stencil0->enabled) {
3653 dw[0] = 1 << 31 |
3654 gen6_translate_dsa_func(stencil0->func) << 28 |
3655 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
3656 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
3657 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
3658 if (stencil0->writemask)
3659 dw[0] |= 1 << 18;
3660
3661 dw[1] = stencil0->valuemask << 24 |
3662 stencil0->writemask << 16;
3663
3664 if (stencil1->enabled) {
3665 dw[0] |= 1 << 15 |
3666 gen6_translate_dsa_func(stencil1->func) << 12 |
3667 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
3668 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
3669 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
3670 if (stencil1->writemask)
3671 dw[0] |= 1 << 18;
3672
3673 dw[1] |= stencil1->valuemask << 8 |
3674 stencil1->writemask;
3675 }
3676 }
3677 else {
3678 dw[0] = 0;
3679 dw[1] = 0;
3680 }
3681
3682 /*
3683 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3684 *
3685 * "Enabling the Depth Test function without defining a Depth Buffer is
3686 * UNDEFINED."
3687 *
3688 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3689 *
3690 * "A Depth Buffer must be defined before enabling writes to it, or
3691 * operation is UNDEFINED."
3692 *
3693 * TODO We do not check these yet.
3694 */
3695 dw[2] = depth->enabled << 31 |
3696 depth->writemask << 26;
3697 if (depth->enabled)
3698 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
3699 else
3700 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
3701 }
3702
3703 static uint32_t
3704 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
3705 const struct ilo_dsa_state *dsa,
3706 struct ilo_cp *cp)
3707 {
3708 const int state_align = 64 / 4;
3709 const int state_len = 3;
3710 uint32_t state_offset, *dw;
3711
3712
3713 ILO_GPE_VALID_GEN(dev, 6, 7);
3714
3715 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
3716 state_len, state_align, &state_offset);
3717
3718 dw[0] = dsa->payload[0];
3719 dw[1] = dsa->payload[1];
3720 dw[2] = dsa->payload[2];
3721
3722 return state_offset;
3723 }
3724
3725 void
3726 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
3727 unsigned start_slot,
3728 unsigned num_states,
3729 const struct pipe_scissor_state *states,
3730 struct ilo_scissor_state *scissor)
3731 {
3732 unsigned i;
3733
3734 ILO_GPE_VALID_GEN(dev, 6, 7);
3735
3736 for (i = 0; i < num_states; i++) {
3737 uint16_t min_x, min_y, max_x, max_y;
3738
3739 /* both max and min are inclusive in SCISSOR_RECT */
3740 if (states[i].minx < states[i].maxx &&
3741 states[i].miny < states[i].maxy) {
3742 min_x = states[i].minx;
3743 min_y = states[i].miny;
3744 max_x = states[i].maxx - 1;
3745 max_y = states[i].maxy - 1;
3746 }
3747 else {
3748 /* we have to make min greater than max */
3749 min_x = 1;
3750 min_y = 1;
3751 max_x = 0;
3752 max_y = 0;
3753 }
3754
3755 scissor->payload[start_slot * 2 + 0] = min_y << 16 | min_x;
3756 scissor->payload[start_slot * 2 + 1] = max_y << 16 | max_x;
3757 start_slot++;
3758 }
3759 }
3760
3761 void
3762 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
3763 struct ilo_scissor_state *scissor)
3764 {
3765 unsigned i;
3766
3767 for (i = 0; i < Elements(scissor->payload); i += 2) {
3768 scissor->payload[i + 0] = 1 << 16 | 1;
3769 scissor->payload[i + 1] = 0;
3770 }
3771 }
3772
3773 static uint32_t
3774 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
3775 const struct ilo_scissor_state *scissor,
3776 unsigned num_viewports,
3777 struct ilo_cp *cp)
3778 {
3779 const int state_align = 32 / 4;
3780 const int state_len = 2 * num_viewports;
3781 uint32_t state_offset, *dw;
3782
3783 ILO_GPE_VALID_GEN(dev, 6, 7);
3784
3785 /*
3786 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3787 *
3788 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3789 * stored as an array of up to 16 elements..."
3790 */
3791 assert(num_viewports && num_viewports <= 16);
3792
3793 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
3794 state_len, state_align, &state_offset);
3795
3796 memcpy(dw, scissor->payload, state_len * 4);
3797
3798 return state_offset;
3799 }
3800
3801 static uint32_t
3802 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
3803 uint32_t *surface_states,
3804 int num_surface_states,
3805 struct ilo_cp *cp)
3806 {
3807 const int state_align = 32 / 4;
3808 const int state_len = num_surface_states;
3809 uint32_t state_offset, *dw;
3810
3811 ILO_GPE_VALID_GEN(dev, 6, 7);
3812
3813 /*
3814 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3815 *
3816 * "It is stored as an array of up to 256 elements..."
3817 */
3818 assert(num_surface_states <= 256);
3819
3820 if (!num_surface_states)
3821 return 0;
3822
3823 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
3824 state_len, state_align, &state_offset);
3825 memcpy(dw, surface_states,
3826 num_surface_states * sizeof(surface_states[0]));
3827
3828 return state_offset;
3829 }
3830
3831 void
3832 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
3833 unsigned width, unsigned height,
3834 unsigned depth, unsigned level,
3835 struct ilo_view_surface *surf)
3836 {
3837 uint32_t *dw;
3838
3839 ILO_GPE_VALID_GEN(dev, 6, 6);
3840
3841 /*
3842 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3843 *
3844 * "A null surface will be used in instances where an actual surface is
3845 * not bound. When a write message is generated to a null surface, no
3846 * actual surface is written to. When a read message (including any
3847 * sampling engine message) is generated to a null surface, the result
3848 * is all zeros. Note that a null surface type is allowed to be used
3849 * with all messages, even if it is not specificially indicated as
3850 * supported. All of the remaining fields in surface state are ignored
3851 * for null surfaces, with the following exceptions:
3852 *
3853 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3854 * depth buffer's corresponding state for all render target
3855 * surfaces, including null.
3856 * * Surface Format must be R8G8B8A8_UNORM."
3857 *
3858 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3859 *
3860 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3861 * true"
3862 */
3863
3864 STATIC_ASSERT(Elements(surf->payload) >= 6);
3865 dw = surf->payload;
3866
3867 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
3868 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
3869
3870 dw[1] = 0;
3871
3872 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
3873 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
3874 level << BRW_SURFACE_LOD_SHIFT;
3875
3876 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
3877 BRW_SURFACE_TILED;
3878
3879 dw[4] = 0;
3880 dw[5] = 0;
3881
3882 surf->bo = NULL;
3883 }
3884
3885 void
3886 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
3887 const struct ilo_buffer *buf,
3888 unsigned offset, unsigned size,
3889 unsigned struct_size,
3890 enum pipe_format elem_format,
3891 bool is_rt, bool render_cache_rw,
3892 struct ilo_view_surface *surf)
3893 {
3894 const int elem_size = util_format_get_blocksize(elem_format);
3895 int width, height, depth, pitch;
3896 int surface_format, num_entries;
3897 uint32_t *dw;
3898
3899 ILO_GPE_VALID_GEN(dev, 6, 6);
3900
3901 /*
3902 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3903 * structure in a buffer.
3904 */
3905
3906 surface_format = ilo_translate_color_format(elem_format);
3907
3908 num_entries = size / struct_size;
3909 /* see if there is enough space to fit another element */
3910 if (size % struct_size >= elem_size)
3911 num_entries++;
3912
3913 /*
3914 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3915 *
3916 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3917 * Address) specifies the base address of first element of the
3918 * surface. The surface is interpreted as a simple array of that
3919 * single element type. The address must be naturally-aligned to the
3920 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3921 * must be 16-byte aligned).
3922 *
3923 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3924 * the base address of the first element of the surface, computed in
3925 * software by adding the surface base address to the byte offset of
3926 * the element in the buffer."
3927 */
3928 if (is_rt)
3929 assert(offset % elem_size == 0);
3930
3931 /*
3932 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3933 *
3934 * "For buffer surfaces, the number of entries in the buffer ranges
3935 * from 1 to 2^27."
3936 */
3937 assert(num_entries >= 1 && num_entries <= 1 << 27);
3938
3939 /*
3940 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3941 *
3942 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3943 * indicates the size of the structure."
3944 */
3945 pitch = struct_size;
3946
3947 pitch--;
3948 num_entries--;
3949 /* bits [6:0] */
3950 width = (num_entries & 0x0000007f);
3951 /* bits [19:7] */
3952 height = (num_entries & 0x000fff80) >> 7;
3953 /* bits [26:20] */
3954 depth = (num_entries & 0x07f00000) >> 20;
3955
3956 STATIC_ASSERT(Elements(surf->payload) >= 6);
3957 dw = surf->payload;
3958
3959 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
3960 surface_format << BRW_SURFACE_FORMAT_SHIFT;
3961 if (render_cache_rw)
3962 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
3963
3964 dw[1] = offset;
3965
3966 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
3967 width << BRW_SURFACE_WIDTH_SHIFT;
3968
3969 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
3970 pitch << BRW_SURFACE_PITCH_SHIFT;
3971
3972 dw[4] = 0;
3973 dw[5] = 0;
3974
3975 /* do not increment reference count */
3976 surf->bo = buf->bo;
3977 }
3978
3979 void
3980 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
3981 const struct ilo_texture *tex,
3982 enum pipe_format format,
3983 unsigned first_level,
3984 unsigned num_levels,
3985 unsigned first_layer,
3986 unsigned num_layers,
3987 bool is_rt, bool render_cache_rw,
3988 struct ilo_view_surface *surf)
3989 {
3990 int surface_type, surface_format;
3991 int width, height, depth, pitch, lod;
3992 unsigned layer_offset, x_offset, y_offset;
3993 uint32_t *dw;
3994
3995 ILO_GPE_VALID_GEN(dev, 6, 6);
3996
3997 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
3998 assert(surface_type != BRW_SURFACE_BUFFER);
3999
4000 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
4001 format = PIPE_FORMAT_Z32_FLOAT;
4002
4003 if (is_rt)
4004 surface_format = ilo_translate_render_format(format);
4005 else
4006 surface_format = ilo_translate_texture_format(format);
4007 assert(surface_format >= 0);
4008
4009 width = tex->base.width0;
4010 height = tex->base.height0;
4011 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
4012 tex->base.depth0 : num_layers;
4013 pitch = tex->bo_stride;
4014
4015 if (surface_type == BRW_SURFACE_CUBE) {
4016 /*
4017 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4018 *
4019 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
4020 * range of this field (Depth) is [0,84], indicating the number of
4021 * cube array elements (equal to the number of underlying 2D array
4022 * elements divided by 6). For other surfaces, this field must be
4023 * zero."
4024 *
4025 * When is_rt is true, we treat the texture as a 2D one to avoid the
4026 * restriction.
4027 */
4028 if (is_rt) {
4029 surface_type = BRW_SURFACE_2D;
4030 }
4031 else {
4032 assert(num_layers % 6 == 0);
4033 depth = num_layers / 6;
4034 }
4035 }
4036
4037 /* sanity check the size */
4038 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
4039 switch (surface_type) {
4040 case BRW_SURFACE_1D:
4041 assert(width <= 8192 && height == 1 && depth <= 512);
4042 assert(first_layer < 512 && num_layers <= 512);
4043 break;
4044 case BRW_SURFACE_2D:
4045 assert(width <= 8192 && height <= 8192 && depth <= 512);
4046 assert(first_layer < 512 && num_layers <= 512);
4047 break;
4048 case BRW_SURFACE_3D:
4049 assert(width <= 2048 && height <= 2048 && depth <= 2048);
4050 assert(first_layer < 2048 && num_layers <= 512);
4051 if (!is_rt)
4052 assert(first_layer == 0);
4053 break;
4054 case BRW_SURFACE_CUBE:
4055 assert(width <= 8192 && height <= 8192 && depth <= 85);
4056 assert(width == height);
4057 assert(first_layer < 512 && num_layers <= 512);
4058 if (is_rt)
4059 assert(first_layer == 0);
4060 break;
4061 default:
4062 assert(!"unexpected surface type");
4063 break;
4064 }
4065
4066 /* non-full array spacing is supported only on GEN7+ */
4067 assert(tex->array_spacing_full);
4068 /* non-interleaved samples are supported only on GEN7+ */
4069 if (tex->base.nr_samples > 1)
4070 assert(tex->interleaved);
4071
4072 if (is_rt) {
4073 /*
4074 * Compute the offset to the layer manually.
4075 *
4076 * For rendering, the hardware requires LOD to be the same for all
4077 * render targets and the depth buffer. We need to compute the offset
4078 * to the layer manually and always set LOD to 0.
4079 */
4080 if (true) {
4081 /* we lose the capability for layered rendering */
4082 assert(num_layers == 1);
4083
4084 layer_offset = ilo_texture_get_slice_offset(tex,
4085 first_level, first_layer, &x_offset, &y_offset);
4086
4087 assert(x_offset % 4 == 0);
4088 assert(y_offset % 2 == 0);
4089 x_offset /= 4;
4090 y_offset /= 2;
4091
4092 /* derive the size for the LOD */
4093 width = u_minify(width, first_level);
4094 height = u_minify(height, first_level);
4095 if (surface_type == BRW_SURFACE_3D)
4096 depth = u_minify(depth, first_level);
4097 else
4098 depth = 1;
4099
4100 first_level = 0;
4101 first_layer = 0;
4102 lod = 0;
4103 }
4104 else {
4105 layer_offset = 0;
4106 x_offset = 0;
4107 y_offset = 0;
4108 }
4109
4110 assert(num_levels == 1);
4111 lod = first_level;
4112 }
4113 else {
4114 layer_offset = 0;
4115 x_offset = 0;
4116 y_offset = 0;
4117
4118 lod = num_levels - 1;
4119 }
4120
4121 /*
4122 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
4123 *
4124 * "Linear render target surface base addresses must be element-size
4125 * aligned, for non-YUV surface formats, or a multiple of 2
4126 * element-sizes for YUV surface formats. Other linear surfaces have
4127 * no alignment requirements (byte alignment is sufficient.)"
4128 *
4129 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4130 *
4131 * "For linear render target surfaces, the pitch must be a multiple
4132 * of the element size for non-YUV surface formats. Pitch must be a
4133 * multiple of 2 * element size for YUV surface formats."
4134 *
4135 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
4136 *
4137 * "For linear surfaces, this field (X Offset) must be zero"
4138 */
4139 if (tex->tiling == INTEL_TILING_NONE) {
4140 if (is_rt) {
4141 const int elem_size = util_format_get_blocksize(format);
4142 assert(layer_offset % elem_size == 0);
4143 assert(pitch % elem_size == 0);
4144 }
4145
4146 assert(!x_offset);
4147 }
4148
4149 STATIC_ASSERT(Elements(surf->payload) >= 6);
4150 dw = surf->payload;
4151
4152 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
4153 surface_format << BRW_SURFACE_FORMAT_SHIFT |
4154 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
4155
4156 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
4157 dw[0] |= 1 << 9 |
4158 BRW_SURFACE_CUBEFACE_ENABLES;
4159 }
4160
4161 if (render_cache_rw)
4162 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
4163
4164 dw[1] = layer_offset;
4165
4166 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
4167 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
4168 lod << BRW_SURFACE_LOD_SHIFT;
4169
4170 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
4171 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
4172 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
4173
4174 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
4175 first_layer << 17 |
4176 (num_layers - 1) << 8 |
4177 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
4178 BRW_SURFACE_MULTISAMPLECOUNT_1);
4179
4180 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
4181 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
4182 if (tex->valign_4)
4183 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
4184
4185 /* do not increment reference count */
4186 surf->bo = tex->bo;
4187 }
4188
4189 static uint32_t
4190 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
4191 const struct ilo_view_surface *surf,
4192 bool for_render,
4193 struct ilo_cp *cp)
4194 {
4195 const int state_align = 32 / 4;
4196 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
4197 uint32_t state_offset;
4198 uint32_t read_domains, write_domain;
4199
4200 ILO_GPE_VALID_GEN(dev, 6, 7);
4201
4202 if (for_render) {
4203 read_domains = INTEL_DOMAIN_RENDER;
4204 write_domain = INTEL_DOMAIN_RENDER;
4205 }
4206 else {
4207 read_domains = INTEL_DOMAIN_SAMPLER;
4208 write_domain = 0;
4209 }
4210
4211 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
4212
4213 STATIC_ASSERT(Elements(surf->payload) >= 8);
4214
4215 ilo_cp_write(cp, surf->payload[0]);
4216 ilo_cp_write_bo(cp, surf->payload[1],
4217 surf->bo, read_domains, write_domain);
4218 ilo_cp_write(cp, surf->payload[2]);
4219 ilo_cp_write(cp, surf->payload[3]);
4220 ilo_cp_write(cp, surf->payload[4]);
4221 ilo_cp_write(cp, surf->payload[5]);
4222
4223 if (dev->gen >= ILO_GEN(7)) {
4224 ilo_cp_write(cp, surf->payload[6]);
4225 ilo_cp_write(cp, surf->payload[7]);
4226 }
4227
4228 ilo_cp_end(cp);
4229
4230 return state_offset;
4231 }
4232
4233 static uint32_t
4234 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
4235 const struct pipe_stream_output_target *so,
4236 const struct pipe_stream_output_info *so_info,
4237 int so_index,
4238 struct ilo_cp *cp)
4239 {
4240 struct ilo_buffer *buf = ilo_buffer(so->buffer);
4241 unsigned bo_offset, struct_size;
4242 enum pipe_format elem_format;
4243 struct ilo_view_surface surf;
4244
4245 ILO_GPE_VALID_GEN(dev, 6, 6);
4246
4247 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
4248 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
4249
4250 switch (so_info->output[so_index].num_components) {
4251 case 1:
4252 elem_format = PIPE_FORMAT_R32_FLOAT;
4253 break;
4254 case 2:
4255 elem_format = PIPE_FORMAT_R32G32_FLOAT;
4256 break;
4257 case 3:
4258 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
4259 break;
4260 case 4:
4261 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
4262 break;
4263 default:
4264 assert(!"unexpected SO components length");
4265 elem_format = PIPE_FORMAT_R32_FLOAT;
4266 break;
4267 }
4268
4269 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
4270 struct_size, elem_format, false, true, &surf);
4271
4272 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
4273 }
4274
4275 static void
4276 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
4277 const union pipe_color_union *color,
4278 uint32_t *dw, int num_dwords)
4279 {
4280 float rgba[4] = {
4281 color->f[0], color->f[1], color->f[2], color->f[3],
4282 };
4283
4284 ILO_GPE_VALID_GEN(dev, 6, 6);
4285
4286 assert(num_dwords >= 12);
4287
4288 /*
4289 * This state is not documented in the Sandy Bridge PRM, but in the
4290 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4291 */
4292
4293 /* IEEE_FP */
4294 dw[1] = fui(rgba[0]);
4295 dw[2] = fui(rgba[1]);
4296 dw[3] = fui(rgba[2]);
4297 dw[4] = fui(rgba[3]);
4298
4299 /* FLOAT_16 */
4300 dw[5] = util_float_to_half(rgba[0]) |
4301 util_float_to_half(rgba[1]) << 16;
4302 dw[6] = util_float_to_half(rgba[2]) |
4303 util_float_to_half(rgba[3]) << 16;
4304
4305 /* clamp to [-1.0f, 1.0f] */
4306 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
4307 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
4308 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
4309 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
4310
4311 /* SNORM16 */
4312 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
4313 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
4314 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
4315 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
4316
4317 /* SNORM8 */
4318 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
4319 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
4320 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
4321 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
4322
4323 /* clamp to [0.0f, 1.0f] */
4324 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
4325 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
4326 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
4327 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
4328
4329 /* UNORM8 */
4330 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
4331 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
4332 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
4333 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
4334
4335 /* UNORM16 */
4336 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
4337 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
4338 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
4339 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
4340 }
4341
4342 void
4343 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
4344 const struct pipe_sampler_state *state,
4345 struct ilo_sampler_cso *sampler)
4346 {
4347 int mip_filter, min_filter, mag_filter, max_aniso;
4348 int lod_bias, max_lod, min_lod;
4349 int wrap_s, wrap_t, wrap_r, wrap_cube;
4350 bool clamp_is_to_edge;
4351 uint32_t dw0, dw1, dw3;
4352
4353 ILO_GPE_VALID_GEN(dev, 6, 7);
4354
4355 memset(sampler, 0, sizeof(*sampler));
4356
4357 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
4358 min_filter = gen6_translate_tex_filter(state->min_img_filter);
4359 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
4360
4361 sampler->anisotropic = state->max_anisotropy;
4362
4363 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
4364 max_aniso = state->max_anisotropy / 2 - 1;
4365 else if (state->max_anisotropy > 16)
4366 max_aniso = BRW_ANISORATIO_16;
4367 else
4368 max_aniso = BRW_ANISORATIO_2;
4369
4370 /*
4371 *
4372 * Here is how the hardware calculate per-pixel LOD, from my reading of the
4373 * PRMs:
4374 *
4375 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4376 * other ways. The number of texels is measured using level
4377 * SurfMinLod.
4378 * 2) Bias is added to LOD.
4379 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4380 * compared with Base to determine whether magnification or
4381 * minification is needed. (if preclamp is disabled, LOD is compared
4382 * with Base before clamping)
4383 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4384 * set to floor(MinLod).
4385 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4386 *
4387 * With Gallium interface, Base is always zero and
4388 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
4389 */
4390 if (dev->gen >= ILO_GEN(7)) {
4391 const float scale = 256.0f;
4392
4393 /* [-16.0, 16.0) in S4.8 */
4394 lod_bias = (int)
4395 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4396 lod_bias &= 0x1fff;
4397
4398 /* [0.0, 14.0] in U4.8 */
4399 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
4400 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
4401 }
4402 else {
4403 const float scale = 64.0f;
4404
4405 /* [-16.0, 16.0) in S4.6 */
4406 lod_bias = (int)
4407 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
4408 lod_bias &= 0x7ff;
4409
4410 /* [0.0, 13.0] in U4.6 */
4411 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
4412 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
4413 }
4414
4415 /*
4416 * We want LOD to be clamped to determine magnification/minification, and
4417 * get set to zero when it is magnification or when mipmapping is disabled.
4418 * The hardware would set LOD to floor(MinLod) and that is a problem when
4419 * MinLod is greater than or equal to 1.0f.
4420 *
4421 * With Base being zero, it is always minification when MinLod is non-zero.
4422 * To achieve our goal, we just need to set MinLod to zero and set
4423 * MagFilter to MinFilter when mipmapping is disabled.
4424 */
4425 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
4426 min_lod = 0;
4427 mag_filter = min_filter;
4428 }
4429
4430 /*
4431 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4432 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
4433 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
4434 * texture coordinates to [0.0, 1.0].
4435 *
4436 * The clamping will be taken care of in the shaders. There are two
4437 * filters here, but let the minification one has a say.
4438 */
4439 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
4440 if (!clamp_is_to_edge) {
4441 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
4442 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
4443 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
4444 }
4445
4446 /* determine wrap s/t/r */
4447 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
4448 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
4449 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
4450
4451 /*
4452 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4453 *
4454 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
4455 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
4456 * must have the same Address Control mode."
4457 *
4458 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4459 *
4460 * "This field (Cube Surface Control Mode) must be set to
4461 * CUBECTRLMODE_PROGRAMMED"
4462 *
4463 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
4464 * map filtering.
4465 */
4466 if (state->seamless_cube_map &&
4467 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
4468 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
4469 wrap_cube = BRW_TEXCOORDMODE_CUBE;
4470 }
4471 else {
4472 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
4473 }
4474
4475 if (!state->normalized_coords) {
4476 /*
4477 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4478 *
4479 * "The following state must be set as indicated if this field
4480 * (Non-normalized Coordinate Enable) is enabled:
4481 *
4482 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4483 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4484 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4485 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4486 * MAPFILTER_LINEAR.
4487 * - Min Mode Filter must be MAPFILTER_NEAREST or
4488 * MAPFILTER_LINEAR.
4489 * - Mip Mode Filter must be MIPFILTER_NONE.
4490 * - Min LOD must be 0.
4491 * - Max LOD must be 0.
4492 * - MIP Count must be 0.
4493 * - Surface Min LOD must be 0.
4494 * - Texture LOD Bias must be 0."
4495 */
4496 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
4497 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
4498 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
4499 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
4500 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
4501 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
4502
4503 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
4504 mag_filter == BRW_MAPFILTER_LINEAR);
4505 assert(min_filter == BRW_MAPFILTER_NEAREST ||
4506 min_filter == BRW_MAPFILTER_LINEAR);
4507
4508 /* work around a bug in util_blitter */
4509 mip_filter = BRW_MIPFILTER_NONE;
4510
4511 assert(mip_filter == BRW_MIPFILTER_NONE);
4512 }
4513
4514 if (dev->gen >= ILO_GEN(7)) {
4515 dw0 = 1 << 28 |
4516 mip_filter << 20 |
4517 lod_bias << 1;
4518
4519 sampler->dw_filter = mag_filter << 17 |
4520 min_filter << 14;
4521
4522 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4523 BRW_MAPFILTER_ANISOTROPIC << 14 |
4524 1;
4525
4526 dw1 = min_lod << 20 |
4527 max_lod << 8;
4528
4529 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4530 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
4531
4532 dw3 = max_aniso << 19;
4533
4534 /* round the coordinates for linear filtering */
4535 if (min_filter != BRW_MAPFILTER_NEAREST) {
4536 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4537 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4538 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4539 }
4540 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4541 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4542 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4543 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4544 }
4545
4546 if (!state->normalized_coords)
4547 dw3 |= 1 << 10;
4548
4549 sampler->dw_wrap = wrap_s << 6 |
4550 wrap_t << 3 |
4551 wrap_r;
4552
4553 /*
4554 * As noted in the classic i965 driver, the HW may still reference
4555 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
4556 * mode
4557 */
4558 sampler->dw_wrap_1d = wrap_s << 6 |
4559 BRW_TEXCOORDMODE_WRAP << 3 |
4560 BRW_TEXCOORDMODE_WRAP;
4561
4562 sampler->dw_wrap_cube = wrap_cube << 6 |
4563 wrap_cube << 3 |
4564 wrap_cube;
4565
4566 STATIC_ASSERT(Elements(sampler->payload) >= 7);
4567
4568 sampler->payload[0] = dw0;
4569 sampler->payload[1] = dw1;
4570 sampler->payload[2] = dw3;
4571
4572 memcpy(&sampler->payload[3],
4573 state->border_color.ui, sizeof(state->border_color.ui));
4574 }
4575 else {
4576 dw0 = 1 << 28 |
4577 mip_filter << 20 |
4578 lod_bias << 3;
4579
4580 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
4581 dw0 |= gen6_translate_shadow_func(state->compare_func);
4582
4583 sampler->dw_filter = (min_filter != mag_filter) << 27 |
4584 mag_filter << 17 |
4585 min_filter << 14;
4586
4587 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
4588 BRW_MAPFILTER_ANISOTROPIC << 14;
4589
4590 dw1 = min_lod << 22 |
4591 max_lod << 12;
4592
4593 sampler->dw_wrap = wrap_s << 6 |
4594 wrap_t << 3 |
4595 wrap_r;
4596
4597 sampler->dw_wrap_1d = wrap_s << 6 |
4598 BRW_TEXCOORDMODE_WRAP << 3 |
4599 BRW_TEXCOORDMODE_WRAP;
4600
4601 sampler->dw_wrap_cube = wrap_cube << 6 |
4602 wrap_cube << 3 |
4603 wrap_cube;
4604
4605 dw3 = max_aniso << 19;
4606
4607 /* round the coordinates for linear filtering */
4608 if (min_filter != BRW_MAPFILTER_NEAREST) {
4609 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
4610 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
4611 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
4612 }
4613 if (mag_filter != BRW_MAPFILTER_NEAREST) {
4614 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
4615 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
4616 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
4617 }
4618
4619 if (!state->normalized_coords)
4620 dw3 |= 1;
4621
4622 STATIC_ASSERT(Elements(sampler->payload) >= 15);
4623
4624 sampler->payload[0] = dw0;
4625 sampler->payload[1] = dw1;
4626 sampler->payload[2] = dw3;
4627
4628 sampler_init_border_color_gen6(dev,
4629 &state->border_color, &sampler->payload[3], 12);
4630 }
4631 }
4632
4633 static uint32_t
4634 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
4635 const struct ilo_sampler_cso * const *samplers,
4636 const struct pipe_sampler_view * const *views,
4637 const uint32_t *sampler_border_colors,
4638 int num_samplers,
4639 struct ilo_cp *cp)
4640 {
4641 const int state_align = 32 / 4;
4642 const int state_len = 4 * num_samplers;
4643 uint32_t state_offset, *dw;
4644 int i;
4645
4646 ILO_GPE_VALID_GEN(dev, 6, 7);
4647
4648 /*
4649 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4650 *
4651 * "The sampler state is stored as an array of up to 16 elements..."
4652 */
4653 assert(num_samplers <= 16);
4654
4655 if (!num_samplers)
4656 return 0;
4657
4658 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
4659 state_len, state_align, &state_offset);
4660
4661 for (i = 0; i < num_samplers; i++) {
4662 const struct ilo_sampler_cso *sampler = samplers[i];
4663 const struct pipe_sampler_view *view = views[i];
4664 const uint32_t border_color = sampler_border_colors[i];
4665 uint32_t dw_filter, dw_wrap;
4666
4667 /* there may be holes */
4668 if (!sampler || !view) {
4669 /* disabled sampler */
4670 dw[0] = 1 << 31;
4671 dw[1] = 0;
4672 dw[2] = 0;
4673 dw[3] = 0;
4674 dw += 4;
4675
4676 continue;
4677 }
4678
4679 /* determine filter and wrap modes */
4680 switch (view->texture->target) {
4681 case PIPE_TEXTURE_1D:
4682 dw_filter = (sampler->anisotropic) ?
4683 sampler->dw_filter_aniso : sampler->dw_filter;
4684 dw_wrap = sampler->dw_wrap_1d;
4685 break;
4686 case PIPE_TEXTURE_3D:
4687 /*
4688 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4689 *
4690 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4691 * surfaces of type SURFTYPE_3D."
4692 */
4693 dw_filter = sampler->dw_filter;
4694 dw_wrap = sampler->dw_wrap;
4695 break;
4696 case PIPE_TEXTURE_CUBE:
4697 dw_filter = (sampler->anisotropic) ?
4698 sampler->dw_filter_aniso : sampler->dw_filter;
4699 dw_wrap = sampler->dw_wrap_cube;
4700 break;
4701 default:
4702 dw_filter = (sampler->anisotropic) ?
4703 sampler->dw_filter_aniso : sampler->dw_filter;
4704 dw_wrap = sampler->dw_wrap;
4705 break;
4706 }
4707
4708 dw[0] = sampler->payload[0];
4709 dw[1] = sampler->payload[1];
4710 assert(!(border_color & 0x1f));
4711 dw[2] = border_color;
4712 dw[3] = sampler->payload[2];
4713
4714 dw[0] |= dw_filter;
4715
4716 if (dev->gen >= ILO_GEN(7)) {
4717 dw[3] |= dw_wrap;
4718 }
4719 else {
4720 /*
4721 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4722 *
4723 * "[DevSNB] Errata: Incorrect behavior is observed in cases
4724 * where the min and mag mode filters are different and
4725 * SurfMinLOD is nonzero. The determination of MagMode uses the
4726 * following equation instead of the one in the above
4727 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
4728 *
4729 * As a way to work around that, we set Base to
4730 * view->u.tex.first_level.
4731 */
4732 dw[0] |= view->u.tex.first_level << 22;
4733
4734 dw[1] |= dw_wrap;
4735 }
4736
4737 dw += 4;
4738 }
4739
4740 return state_offset;
4741 }
4742
4743 static uint32_t
4744 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
4745 const struct ilo_sampler_cso *sampler,
4746 struct ilo_cp *cp)
4747 {
4748 const int state_align = 32 / 4;
4749 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
4750 uint32_t state_offset, *dw;
4751
4752 ILO_GPE_VALID_GEN(dev, 6, 7);
4753
4754 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
4755 state_len, state_align, &state_offset);
4756
4757 memcpy(dw, &sampler->payload[3], state_len * 4);
4758
4759 return state_offset;
4760 }
4761
4762 static uint32_t
4763 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
4764 int size, void **pcb,
4765 struct ilo_cp *cp)
4766 {
4767 /*
4768 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4769 * to 32 bytes, and their sizes are specified in 256-bit units.
4770 */
4771 const int state_align = 32 / 4;
4772 const int state_len = align(size, 32) / 4;
4773 uint32_t state_offset;
4774 char *buf;
4775
4776 ILO_GPE_VALID_GEN(dev, 6, 7);
4777
4778 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
4779 state_len, state_align, &state_offset);
4780
4781 /* zero out the unused range */
4782 if (size < state_len * 4)
4783 memset(&buf[size], 0, state_len * 4 - size);
4784
4785 if (pcb)
4786 *pcb = buf;
4787
4788 return state_offset;
4789 }
4790
4791 static int
4792 gen6_estimate_command_size(const struct ilo_dev_info *dev,
4793 enum ilo_gpe_gen6_command cmd,
4794 int arg)
4795 {
4796 static const struct {
4797 int header;
4798 int body;
4799 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
4800 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
4801 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
4802 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
4803 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
4804 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
4805 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
4806 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
4807 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
4808 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
4809 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
4810 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
4811 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
4812 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
4813 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
4814 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
4815 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
4816 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
4817 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
4818 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
4819 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
4820 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
4821 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
4822 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
4823 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
4824 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
4825 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
4826 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
4827 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
4828 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
4829 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
4830 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
4831 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
4832 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
4833 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
4834 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
4835 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
4836 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
4837 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
4838 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
4839 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
4840 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
4841 };
4842 const int header = gen6_command_size_table[cmd].header;
4843 const int body = gen6_command_size_table[arg].body;
4844 const int count = arg;
4845
4846 ILO_GPE_VALID_GEN(dev, 6, 6);
4847 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
4848
4849 return (likely(count)) ? header + body * count : 0;
4850 }
4851
4852 static int
4853 gen6_estimate_state_size(const struct ilo_dev_info *dev,
4854 enum ilo_gpe_gen6_state state,
4855 int arg)
4856 {
4857 static const struct {
4858 int alignment;
4859 int body;
4860 bool is_array;
4861 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
4862 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
4863 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
4864 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
4865 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
4866 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
4867 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
4868 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
4869 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
4870 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
4871 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
4872 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
4873 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
4874 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
4875 };
4876 const int alignment = gen6_state_size_table[state].alignment;
4877 const int body = gen6_state_size_table[state].body;
4878 const bool is_array = gen6_state_size_table[state].is_array;
4879 const int count = arg;
4880 int estimate;
4881
4882 ILO_GPE_VALID_GEN(dev, 6, 6);
4883 assert(state < ILO_GPE_GEN6_STATE_COUNT);
4884
4885 if (likely(count)) {
4886 if (is_array) {
4887 estimate = (alignment - 1) + body * count;
4888 }
4889 else {
4890 estimate = (alignment - 1) + body;
4891 /* all states are aligned */
4892 if (count > 1)
4893 estimate += util_align_npot(body, alignment) * (count - 1);
4894 }
4895 }
4896 else {
4897 estimate = 0;
4898 }
4899
4900 return estimate;
4901 }
4902
4903 static const struct ilo_gpe_gen6 gen6_gpe = {
4904 .estimate_command_size = gen6_estimate_command_size,
4905 .estimate_state_size = gen6_estimate_state_size,
4906
4907 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4908 GEN6_SET(STATE_BASE_ADDRESS),
4909 GEN6_SET(STATE_SIP),
4910 GEN6_SET(3DSTATE_VF_STATISTICS),
4911 GEN6_SET(PIPELINE_SELECT),
4912 GEN6_SET(MEDIA_VFE_STATE),
4913 GEN6_SET(MEDIA_CURBE_LOAD),
4914 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD),
4915 GEN6_SET(MEDIA_GATEWAY_STATE),
4916 GEN6_SET(MEDIA_STATE_FLUSH),
4917 GEN6_SET(MEDIA_OBJECT_WALKER),
4918 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS),
4919 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS),
4920 GEN6_SET(3DSTATE_URB),
4921 GEN6_SET(3DSTATE_VERTEX_BUFFERS),
4922 GEN6_SET(3DSTATE_VERTEX_ELEMENTS),
4923 GEN6_SET(3DSTATE_INDEX_BUFFER),
4924 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS),
4925 GEN6_SET(3DSTATE_CC_STATE_POINTERS),
4926 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS),
4927 GEN6_SET(3DSTATE_VS),
4928 GEN6_SET(3DSTATE_GS),
4929 GEN6_SET(3DSTATE_CLIP),
4930 GEN6_SET(3DSTATE_SF),
4931 GEN6_SET(3DSTATE_WM),
4932 GEN6_SET(3DSTATE_CONSTANT_VS),
4933 GEN6_SET(3DSTATE_CONSTANT_GS),
4934 GEN6_SET(3DSTATE_CONSTANT_PS),
4935 GEN6_SET(3DSTATE_SAMPLE_MASK),
4936 GEN6_SET(3DSTATE_DRAWING_RECTANGLE),
4937 GEN6_SET(3DSTATE_DEPTH_BUFFER),
4938 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET),
4939 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN),
4940 GEN6_SET(3DSTATE_LINE_STIPPLE),
4941 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS),
4942 GEN6_SET(3DSTATE_GS_SVB_INDEX),
4943 GEN6_SET(3DSTATE_MULTISAMPLE),
4944 GEN6_SET(3DSTATE_STENCIL_BUFFER),
4945 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER),
4946 GEN6_SET(3DSTATE_CLEAR_PARAMS),
4947 GEN6_SET(PIPE_CONTROL),
4948 GEN6_SET(3DPRIMITIVE),
4949 GEN6_SET(INTERFACE_DESCRIPTOR_DATA),
4950 GEN6_SET(SF_VIEWPORT),
4951 GEN6_SET(CLIP_VIEWPORT),
4952 GEN6_SET(CC_VIEWPORT),
4953 GEN6_SET(COLOR_CALC_STATE),
4954 GEN6_SET(BLEND_STATE),
4955 GEN6_SET(DEPTH_STENCIL_STATE),
4956 GEN6_SET(SCISSOR_RECT),
4957 GEN6_SET(BINDING_TABLE_STATE),
4958 GEN6_SET(SURFACE_STATE),
4959 GEN6_SET(so_SURFACE_STATE),
4960 GEN6_SET(SAMPLER_STATE),
4961 GEN6_SET(SAMPLER_BORDER_COLOR_STATE),
4962 GEN6_SET(push_constant_buffer),
4963 #undef GEN6_SET
4964 };
4965
4966 const struct ilo_gpe_gen6 *
4967 ilo_gpe_gen6_get(void)
4968 {
4969 return &gen6_gpe;
4970 }