2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
33 #include "ilo_context.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
42 * Translate winsys tiling to hardware tiling.
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
48 case INTEL_TILING_NONE
:
51 return BRW_SURFACE_TILED
;
53 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
55 assert(!"unknown tiling");
61 * Translate a pipe primitive type to the matching hardware primitive type.
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
66 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
67 [PIPE_PRIM_POINTS
] = _3DPRIM_POINTLIST
,
68 [PIPE_PRIM_LINES
] = _3DPRIM_LINELIST
,
69 [PIPE_PRIM_LINE_LOOP
] = _3DPRIM_LINELOOP
,
70 [PIPE_PRIM_LINE_STRIP
] = _3DPRIM_LINESTRIP
,
71 [PIPE_PRIM_TRIANGLES
] = _3DPRIM_TRILIST
,
72 [PIPE_PRIM_TRIANGLE_STRIP
] = _3DPRIM_TRISTRIP
,
73 [PIPE_PRIM_TRIANGLE_FAN
] = _3DPRIM_TRIFAN
,
74 [PIPE_PRIM_QUADS
] = _3DPRIM_QUADLIST
,
75 [PIPE_PRIM_QUAD_STRIP
] = _3DPRIM_QUADSTRIP
,
76 [PIPE_PRIM_POLYGON
] = _3DPRIM_POLYGON
,
77 [PIPE_PRIM_LINES_ADJACENCY
] = _3DPRIM_LINELIST_ADJ
,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = _3DPRIM_LINESTRIP_ADJ
,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = _3DPRIM_TRILIST_ADJ
,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = _3DPRIM_TRISTRIP_ADJ
,
83 assert(prim_mapping
[prim
]);
85 return prim_mapping
[prim
];
89 * Translate a pipe texture target to the matching hardware surface type.
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
96 return BRW_SURFACE_BUFFER
;
98 case PIPE_TEXTURE_1D_ARRAY
:
99 return BRW_SURFACE_1D
;
100 case PIPE_TEXTURE_2D
:
101 case PIPE_TEXTURE_RECT
:
102 case PIPE_TEXTURE_2D_ARRAY
:
103 return BRW_SURFACE_2D
;
104 case PIPE_TEXTURE_3D
:
105 return BRW_SURFACE_3D
;
106 case PIPE_TEXTURE_CUBE
:
107 case PIPE_TEXTURE_CUBE_ARRAY
:
108 return BRW_SURFACE_CUBE
;
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER
;
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
120 gen6_translate_depth_format(enum pipe_format format
)
123 case PIPE_FORMAT_Z16_UNORM
:
124 return BRW_DEPTHFORMAT_D16_UNORM
;
125 case PIPE_FORMAT_Z32_FLOAT
:
126 return BRW_DEPTHFORMAT_D32_FLOAT
;
127 case PIPE_FORMAT_Z24X8_UNORM
:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
139 * Translate a pipe logicop to the matching hardware logicop.
142 gen6_translate_pipe_logicop(unsigned logicop
)
145 case PIPE_LOGICOP_CLEAR
: return BRW_LOGICOPFUNCTION_CLEAR
;
146 case PIPE_LOGICOP_NOR
: return BRW_LOGICOPFUNCTION_NOR
;
147 case PIPE_LOGICOP_AND_INVERTED
: return BRW_LOGICOPFUNCTION_AND_INVERTED
;
148 case PIPE_LOGICOP_COPY_INVERTED
: return BRW_LOGICOPFUNCTION_COPY_INVERTED
;
149 case PIPE_LOGICOP_AND_REVERSE
: return BRW_LOGICOPFUNCTION_AND_REVERSE
;
150 case PIPE_LOGICOP_INVERT
: return BRW_LOGICOPFUNCTION_INVERT
;
151 case PIPE_LOGICOP_XOR
: return BRW_LOGICOPFUNCTION_XOR
;
152 case PIPE_LOGICOP_NAND
: return BRW_LOGICOPFUNCTION_NAND
;
153 case PIPE_LOGICOP_AND
: return BRW_LOGICOPFUNCTION_AND
;
154 case PIPE_LOGICOP_EQUIV
: return BRW_LOGICOPFUNCTION_EQUIV
;
155 case PIPE_LOGICOP_NOOP
: return BRW_LOGICOPFUNCTION_NOOP
;
156 case PIPE_LOGICOP_OR_INVERTED
: return BRW_LOGICOPFUNCTION_OR_INVERTED
;
157 case PIPE_LOGICOP_COPY
: return BRW_LOGICOPFUNCTION_COPY
;
158 case PIPE_LOGICOP_OR_REVERSE
: return BRW_LOGICOPFUNCTION_OR_REVERSE
;
159 case PIPE_LOGICOP_OR
: return BRW_LOGICOPFUNCTION_OR
;
160 case PIPE_LOGICOP_SET
: return BRW_LOGICOPFUNCTION_SET
;
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR
;
168 * Translate a pipe blend function to the matching hardware blend function.
171 gen6_translate_pipe_blend(unsigned blend
)
174 case PIPE_BLEND_ADD
: return BRW_BLENDFUNCTION_ADD
;
175 case PIPE_BLEND_SUBTRACT
: return BRW_BLENDFUNCTION_SUBTRACT
;
176 case PIPE_BLEND_REVERSE_SUBTRACT
: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT
;
177 case PIPE_BLEND_MIN
: return BRW_BLENDFUNCTION_MIN
;
178 case PIPE_BLEND_MAX
: return BRW_BLENDFUNCTION_MAX
;
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD
;
186 * Translate a pipe blend factor to the matching hardware blend factor.
189 gen6_translate_pipe_blendfactor(unsigned blendfactor
)
191 switch (blendfactor
) {
192 case PIPE_BLENDFACTOR_ONE
: return BRW_BLENDFACTOR_ONE
;
193 case PIPE_BLENDFACTOR_SRC_COLOR
: return BRW_BLENDFACTOR_SRC_COLOR
;
194 case PIPE_BLENDFACTOR_SRC_ALPHA
: return BRW_BLENDFACTOR_SRC_ALPHA
;
195 case PIPE_BLENDFACTOR_DST_ALPHA
: return BRW_BLENDFACTOR_DST_ALPHA
;
196 case PIPE_BLENDFACTOR_DST_COLOR
: return BRW_BLENDFACTOR_DST_COLOR
;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
;
198 case PIPE_BLENDFACTOR_CONST_COLOR
: return BRW_BLENDFACTOR_CONST_COLOR
;
199 case PIPE_BLENDFACTOR_CONST_ALPHA
: return BRW_BLENDFACTOR_CONST_ALPHA
;
200 case PIPE_BLENDFACTOR_SRC1_COLOR
: return BRW_BLENDFACTOR_SRC1_COLOR
;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA
: return BRW_BLENDFACTOR_SRC1_ALPHA
;
202 case PIPE_BLENDFACTOR_ZERO
: return BRW_BLENDFACTOR_ZERO
;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR
: return BRW_BLENDFACTOR_INV_SRC_COLOR
;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: return BRW_BLENDFACTOR_INV_SRC_ALPHA
;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: return BRW_BLENDFACTOR_INV_DST_ALPHA
;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR
: return BRW_BLENDFACTOR_INV_DST_COLOR
;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR
: return BRW_BLENDFACTOR_INV_CONST_COLOR
;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: return BRW_BLENDFACTOR_INV_CONST_ALPHA
;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
: return BRW_BLENDFACTOR_INV_SRC1_COLOR
;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
: return BRW_BLENDFACTOR_INV_SRC1_ALPHA
;
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE
;
218 * Translate a pipe stencil op to the matching hardware stencil op.
221 gen6_translate_pipe_stencil_op(unsigned stencil_op
)
223 switch (stencil_op
) {
224 case PIPE_STENCIL_OP_KEEP
: return BRW_STENCILOP_KEEP
;
225 case PIPE_STENCIL_OP_ZERO
: return BRW_STENCILOP_ZERO
;
226 case PIPE_STENCIL_OP_REPLACE
: return BRW_STENCILOP_REPLACE
;
227 case PIPE_STENCIL_OP_INCR
: return BRW_STENCILOP_INCRSAT
;
228 case PIPE_STENCIL_OP_DECR
: return BRW_STENCILOP_DECRSAT
;
229 case PIPE_STENCIL_OP_INCR_WRAP
: return BRW_STENCILOP_INCR
;
230 case PIPE_STENCIL_OP_DECR_WRAP
: return BRW_STENCILOP_DECR
;
231 case PIPE_STENCIL_OP_INVERT
: return BRW_STENCILOP_INVERT
;
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP
;
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
242 gen6_translate_tex_mipfilter(unsigned filter
)
245 case PIPE_TEX_MIPFILTER_NEAREST
: return BRW_MIPFILTER_NEAREST
;
246 case PIPE_TEX_MIPFILTER_LINEAR
: return BRW_MIPFILTER_LINEAR
;
247 case PIPE_TEX_MIPFILTER_NONE
: return BRW_MIPFILTER_NONE
;
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE
;
255 * Translate a pipe texture filter to the matching hardware mapfilter.
258 gen6_translate_tex_filter(unsigned filter
)
261 case PIPE_TEX_FILTER_NEAREST
: return BRW_MAPFILTER_NEAREST
;
262 case PIPE_TEX_FILTER_LINEAR
: return BRW_MAPFILTER_LINEAR
;
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST
;
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
274 gen6_translate_tex_wrap(unsigned wrap
, bool clamp_to_edge
)
276 /* clamp to edge or border? */
277 if (wrap
== PIPE_TEX_WRAP_CLAMP
) {
278 wrap
= (clamp_to_edge
) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE
: PIPE_TEX_WRAP_CLAMP_TO_BORDER
;
283 case PIPE_TEX_WRAP_REPEAT
: return BRW_TEXCOORDMODE_WRAP
;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
: return BRW_TEXCOORDMODE_CLAMP
;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
: return BRW_TEXCOORDMODE_CLAMP_BORDER
;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT
: return BRW_TEXCOORDMODE_MIRROR
;
287 case PIPE_TEX_WRAP_CLAMP
:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP
;
298 * Translate a pipe DSA test function to the matching hardware compare
302 gen6_translate_dsa_func(unsigned func
)
305 case PIPE_FUNC_NEVER
: return BRW_COMPAREFUNCTION_NEVER
;
306 case PIPE_FUNC_LESS
: return BRW_COMPAREFUNCTION_LESS
;
307 case PIPE_FUNC_EQUAL
: return BRW_COMPAREFUNCTION_EQUAL
;
308 case PIPE_FUNC_LEQUAL
: return BRW_COMPAREFUNCTION_LEQUAL
;
309 case PIPE_FUNC_GREATER
: return BRW_COMPAREFUNCTION_GREATER
;
310 case PIPE_FUNC_NOTEQUAL
: return BRW_COMPAREFUNCTION_NOTEQUAL
;
311 case PIPE_FUNC_GEQUAL
: return BRW_COMPAREFUNCTION_GEQUAL
;
312 case PIPE_FUNC_ALWAYS
: return BRW_COMPAREFUNCTION_ALWAYS
;
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER
;
320 * Translate a pipe shadow compare function to the matching hardware shadow
324 gen6_translate_shadow_func(unsigned func
)
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
334 case PIPE_FUNC_NEVER
: return BRW_PREFILTER_ALWAYS
;
335 case PIPE_FUNC_LESS
: return BRW_PREFILTER_LEQUAL
;
336 case PIPE_FUNC_EQUAL
: return BRW_PREFILTER_NOTEQUAL
;
337 case PIPE_FUNC_LEQUAL
: return BRW_PREFILTER_LESS
;
338 case PIPE_FUNC_GREATER
: return BRW_PREFILTER_GEQUAL
;
339 case PIPE_FUNC_NOTEQUAL
: return BRW_PREFILTER_EQUAL
;
340 case PIPE_FUNC_GEQUAL
: return BRW_PREFILTER_GREATER
;
341 case PIPE_FUNC_ALWAYS
: return BRW_PREFILTER_NEVER
;
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER
;
349 * Translate an index size to the matching hardware index format.
352 gen6_translate_index_size(int size
)
355 case 4: return BRW_INDEX_DWORD
;
356 case 2: return BRW_INDEX_WORD
;
357 case 1: return BRW_INDEX_BYTE
;
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE
;
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
366 struct intel_bo
*general_state_bo
,
367 struct intel_bo
*surface_state_bo
,
368 struct intel_bo
*dynamic_state_bo
,
369 struct intel_bo
*indirect_object_bo
,
370 struct intel_bo
*instruction_bo
,
371 uint32_t general_state_size
,
372 uint32_t dynamic_state_size
,
373 uint32_t indirect_object_size
,
374 uint32_t instruction_size
,
377 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len
= 10;
380 ILO_GPE_VALID_GEN(dev
, 6, 7);
382 /* 4K-page aligned */
383 assert(((general_state_size
| dynamic_state_size
|
384 indirect_object_size
| instruction_size
) & 0xfff) == 0);
386 ilo_cp_begin(cp
, cmd_len
);
387 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
389 ilo_cp_write_bo(cp
, 1, general_state_bo
,
392 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
393 INTEL_DOMAIN_SAMPLER
,
395 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
396 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
398 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
401 ilo_cp_write_bo(cp
, 1, instruction_bo
,
402 INTEL_DOMAIN_INSTRUCTION
,
405 if (general_state_size
) {
406 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
411 /* skip range check */
415 if (dynamic_state_size
) {
416 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
417 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
421 /* skip range check */
422 ilo_cp_write(cp
, 0xfffff000 + 1);
425 if (indirect_object_size
) {
426 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
431 /* skip range check */
432 ilo_cp_write(cp
, 0xfffff000 + 1);
435 if (instruction_size
) {
436 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
437 INTEL_DOMAIN_INSTRUCTION
,
441 /* skip range check */
449 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
453 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len
= 2;
456 ILO_GPE_VALID_GEN(dev
, 6, 7);
458 ilo_cp_begin(cp
, cmd_len
| (cmd_len
- 2));
459 ilo_cp_write(cp
, cmd
);
460 ilo_cp_write(cp
, sip
);
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
469 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len
= 1;
472 ILO_GPE_VALID_GEN(dev
, 6, 7);
474 ilo_cp_begin(cp
, cmd_len
);
475 ilo_cp_write(cp
, cmd
| enable
);
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
484 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len
= 1;
487 ILO_GPE_VALID_GEN(dev
, 6, 7);
490 assert(pipeline
== 0x0 || pipeline
== 0x1);
492 ilo_cp_begin(cp
, cmd_len
);
493 ilo_cp_write(cp
, cmd
| pipeline
);
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
499 int max_threads
, int num_urb_entries
,
503 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len
= 8;
507 ILO_GPE_VALID_GEN(dev
, 6, 6);
509 dw2
= (max_threads
- 1) << 16 |
510 num_urb_entries
<< 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
514 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
517 ilo_cp_begin(cp
, cmd_len
);
518 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
519 ilo_cp_write(cp
, 0); /* scratch */
520 ilo_cp_write(cp
, dw2
);
521 ilo_cp_write(cp
, 0); /* MBZ */
522 ilo_cp_write(cp
, dw4
);
523 ilo_cp_write(cp
, 0); /* scoreboard */
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
531 uint32_t buf
, int size
,
534 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len
= 4;
537 ILO_GPE_VALID_GEN(dev
, 6, 6);
539 assert(buf
% 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size
= align(size
, 32);
543 ilo_cp_begin(cp
, cmd_len
);
544 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
545 ilo_cp_write(cp
, 0); /* MBZ */
546 ilo_cp_write(cp
, size
);
547 ilo_cp_write(cp
, buf
);
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
553 uint32_t offset
, int num_ids
,
556 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len
= 4;
559 ILO_GPE_VALID_GEN(dev
, 6, 6);
561 assert(offset
% 32 == 0);
563 ilo_cp_begin(cp
, cmd_len
);
564 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
565 ilo_cp_write(cp
, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp
, num_ids
* 8 * 4);
568 ilo_cp_write(cp
, offset
);
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
574 int id
, int byte
, int thread_count
,
577 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len
= 2;
581 ILO_GPE_VALID_GEN(dev
, 6, 6);
587 ilo_cp_begin(cp
, cmd_len
);
588 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
589 ilo_cp_write(cp
, dw1
);
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
595 int thread_count_water_mark
,
599 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len
= 2;
603 ILO_GPE_VALID_GEN(dev
, 6, 6);
605 dw1
= thread_count_water_mark
<< 16 |
608 ilo_cp_begin(cp
, cmd_len
);
609 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
610 ilo_cp_write(cp
, dw1
);
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
623 uint32_t vs_binding_table
,
624 uint32_t gs_binding_table
,
625 uint32_t ps_binding_table
,
628 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len
= 4;
631 ILO_GPE_VALID_GEN(dev
, 6, 6);
633 ilo_cp_begin(cp
, cmd_len
);
634 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS
|
636 GEN6_BINDING_TABLE_MODIFY_GS
|
637 GEN6_BINDING_TABLE_MODIFY_PS
);
638 ilo_cp_write(cp
, vs_binding_table
);
639 ilo_cp_write(cp
, gs_binding_table
);
640 ilo_cp_write(cp
, ps_binding_table
);
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
646 uint32_t vs_sampler_state
,
647 uint32_t gs_sampler_state
,
648 uint32_t ps_sampler_state
,
651 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len
= 4;
654 ILO_GPE_VALID_GEN(dev
, 6, 6);
656 ilo_cp_begin(cp
, cmd_len
);
657 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
658 VS_SAMPLER_STATE_CHANGE
|
659 GS_SAMPLER_STATE_CHANGE
|
660 PS_SAMPLER_STATE_CHANGE
);
661 ilo_cp_write(cp
, vs_sampler_state
);
662 ilo_cp_write(cp
, gs_sampler_state
);
663 ilo_cp_write(cp
, ps_sampler_state
);
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
669 int vs_total_size
, int gs_total_size
,
670 int vs_entry_size
, int gs_entry_size
,
673 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len
= 3;
675 const int row_size
= 128; /* 1024 bits */
676 int vs_alloc_size
, gs_alloc_size
;
677 int vs_num_entries
, gs_num_entries
;
679 ILO_GPE_VALID_GEN(dev
, 6, 6);
681 /* in 1024-bit URB rows */
682 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
683 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
685 /* the valid range is [1, 5] */
690 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
694 if (vs_num_entries
> 256)
695 vs_num_entries
= 256;
696 assert(vs_num_entries
>= 24);
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
700 if (gs_num_entries
> 256)
701 gs_num_entries
= 256;
703 ilo_cp_begin(cp
, cmd_len
);
704 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
705 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_VS_SIZE_SHIFT
|
706 vs_num_entries
<< GEN6_URB_VS_ENTRIES_SHIFT
);
707 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_GS_ENTRIES_SHIFT
|
708 (gs_alloc_size
- 1) << GEN6_URB_GS_SIZE_SHIFT
);
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
714 const struct pipe_vertex_buffer
*vbuffers
,
715 const int *instance_divisors
,
716 uint32_t vbuffer_mask
,
719 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
722 ILO_GPE_VALID_GEN(dev
, 6, 7);
725 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
727 * "From 1 to 33 VBs can be specified..."
729 * Because of the type of vbuffer_mask, this is always the case.
731 assert(vbuffer_mask
<= (1UL << 33));
736 cmd_len
= 4 * util_bitcount(vbuffer_mask
) + 1;
738 ilo_cp_begin(cp
, cmd_len
);
739 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
741 while (vbuffer_mask
) {
742 const int index
= u_bit_scan(&vbuffer_mask
);
743 const struct pipe_vertex_buffer
*vb
= &vbuffers
[index
];
744 const int instance_divisor
=
745 (instance_divisors
) ? instance_divisors
[index
] : 0;
748 dw
= index
<< GEN6_VB0_INDEX_SHIFT
;
750 if (instance_divisor
)
751 dw
|= GEN6_VB0_ACCESS_INSTANCEDATA
;
753 dw
|= GEN6_VB0_ACCESS_VERTEXDATA
;
755 if (dev
->gen
>= ILO_GEN(7))
756 dw
|= GEN7_VB0_ADDRESS_MODIFYENABLE
;
758 /* use null vb if there is no buffer or the stride is out of range */
759 if (vb
->buffer
&& vb
->stride
<= 2048) {
760 const struct ilo_buffer
*buf
= ilo_buffer(vb
->buffer
);
761 const uint32_t start_offset
= vb
->buffer_offset
;
762 const uint32_t end_offset
= buf
->bo
->get_size(buf
->bo
) - 1;
764 dw
|= vb
->stride
<< BRW_VB0_PITCH_SHIFT
;
766 ilo_cp_write(cp
, dw
);
767 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
768 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
769 ilo_cp_write(cp
, instance_divisor
);
774 ilo_cp_write(cp
, dw
);
777 ilo_cp_write(cp
, instance_divisor
);
785 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
786 const struct pipe_vertex_element
*velements
,
788 bool last_velement_edgeflag
,
789 bool prepend_generated_ids
,
792 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
796 ILO_GPE_VALID_GEN(dev
, 6, 7);
799 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
801 * "Up to 34 (DevSNB+) vertex elements are supported."
803 assert(num_velements
+ prepend_generated_ids
<= 34);
805 if (!num_velements
&& !prepend_generated_ids
) {
807 format
= BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
;
809 ilo_cp_begin(cp
, cmd_len
);
810 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
812 0 << GEN6_VE0_INDEX_SHIFT
|
814 format
<< BRW_VE0_FORMAT_SHIFT
|
815 0 << BRW_VE0_SRC_OFFSET_SHIFT
);
817 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_0_SHIFT
|
818 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_1_SHIFT
|
819 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_2_SHIFT
|
820 BRW_VE1_COMPONENT_STORE_1_FLT
<< BRW_VE1_COMPONENT_3_SHIFT
);
826 cmd_len
= 2 * (num_velements
+ prepend_generated_ids
) + 1;
828 ilo_cp_begin(cp
, cmd_len
);
829 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
831 if (prepend_generated_ids
) {
832 ilo_cp_write(cp
, GEN6_VE0_VALID
);
834 BRW_VE1_COMPONENT_STORE_VID
<< BRW_VE1_COMPONENT_0_SHIFT
|
835 BRW_VE1_COMPONENT_STORE_IID
<< BRW_VE1_COMPONENT_1_SHIFT
|
836 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_2_SHIFT
|
837 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_3_SHIFT
);
840 for (i
= 0; i
< num_velements
; i
++) {
841 const struct pipe_vertex_element
*ve
= &velements
[i
];
843 BRW_VE1_COMPONENT_STORE_SRC
,
844 BRW_VE1_COMPONENT_STORE_SRC
,
845 BRW_VE1_COMPONENT_STORE_SRC
,
846 BRW_VE1_COMPONENT_STORE_SRC
,
850 if (last_velement_edgeflag
&& i
== num_velements
- 1) {
852 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
854 * "* This bit (Edge Flag Enable) must only be ENABLED on the
855 * last valid VERTEX_ELEMENT structure.
857 * * When set, Component 0 Control must be set to
858 * VFCOMP_STORE_SRC, and Component 1-3 Control must be set to
861 * * The Source Element Format must be set to the UINT format.
863 * * [DevSNB]: Edge Flags are not supported for QUADLIST
864 * primitives. Software may elect to convert QUADLIST
865 * primitives to some set of corresponding edge-flag-supported
866 * primitive types (e.g., POLYGONs) prior to submission to the
869 * Only a limitied set of primitive types could have Edge Flag Enable
870 * set. The caller should not set last_velement_edgeflag for such
873 comp
[1] = BRW_VE1_COMPONENT_NOSTORE
;
874 comp
[2] = BRW_VE1_COMPONENT_NOSTORE
;
875 comp
[3] = BRW_VE1_COMPONENT_NOSTORE
;
877 switch (ve
->src_format
) {
878 case PIPE_FORMAT_R32_FLOAT
:
879 format
= ilo_translate_vertex_format(PIPE_FORMAT_R32_UINT
);
882 assert(ve
->src_format
== PIPE_FORMAT_R8_UINT
);
883 format
= ilo_translate_vertex_format(ve
->src_format
);
887 edgeflag_enable
= GEN6_VE0_EDGE_FLAG_ENABLE
;
890 switch (util_format_get_nr_components(ve
->src_format
)) {
891 case 1: comp
[1] = BRW_VE1_COMPONENT_STORE_0
;
892 case 2: comp
[2] = BRW_VE1_COMPONENT_STORE_0
;
893 case 3: comp
[3] = (util_format_is_pure_integer(ve
->src_format
)) ?
894 BRW_VE1_COMPONENT_STORE_1_INT
:
895 BRW_VE1_COMPONENT_STORE_1_FLT
;
898 format
= ilo_translate_vertex_format(ve
->src_format
);
904 ve
->vertex_buffer_index
<< GEN6_VE0_INDEX_SHIFT
|
906 format
<< BRW_VE0_FORMAT_SHIFT
|
908 ve
->src_offset
<< BRW_VE0_SRC_OFFSET_SHIFT
);
911 comp
[0] << BRW_VE1_COMPONENT_0_SHIFT
|
912 comp
[1] << BRW_VE1_COMPONENT_1_SHIFT
|
913 comp
[2] << BRW_VE1_COMPONENT_2_SHIFT
|
914 comp
[3] << BRW_VE1_COMPONENT_3_SHIFT
);
921 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
922 const struct pipe_index_buffer
*ib
,
923 bool enable_cut_index
,
926 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
927 const uint8_t cmd_len
= 3;
928 const struct ilo_buffer
*buf
= ilo_buffer(ib
->buffer
);
929 uint32_t start_offset
, end_offset
;
932 ILO_GPE_VALID_GEN(dev
, 6, 7);
937 format
= gen6_translate_index_size(ib
->index_size
);
939 start_offset
= ib
->offset
;
940 /* start_offset must be aligned to index size */
941 if (start_offset
% ib
->index_size
) {
942 /* TODO need a u_upload_mgr to upload the IB to an aligned address */
943 assert(!"unaligned index buffer offset");
944 start_offset
-= start_offset
% ib
->index_size
;
947 /* end_offset must also be aligned */
948 end_offset
= buf
->bo
->get_size(buf
->bo
);
949 end_offset
-= (end_offset
% ib
->index_size
);
950 /* it is inclusive */
953 ilo_cp_begin(cp
, cmd_len
);
954 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
955 ((enable_cut_index
) ? BRW_CUT_INDEX_ENABLE
: 0) |
957 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
958 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
963 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
964 uint32_t clip_viewport
,
965 uint32_t sf_viewport
,
966 uint32_t cc_viewport
,
969 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
970 const uint8_t cmd_len
= 4;
972 ILO_GPE_VALID_GEN(dev
, 6, 6);
974 ilo_cp_begin(cp
, cmd_len
);
975 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
976 GEN6_CLIP_VIEWPORT_MODIFY
|
977 GEN6_SF_VIEWPORT_MODIFY
|
978 GEN6_CC_VIEWPORT_MODIFY
);
979 ilo_cp_write(cp
, clip_viewport
);
980 ilo_cp_write(cp
, sf_viewport
);
981 ilo_cp_write(cp
, cc_viewport
);
986 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
987 uint32_t blend_state
,
988 uint32_t depth_stencil_state
,
989 uint32_t color_calc_state
,
992 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
993 const uint8_t cmd_len
= 4;
995 ILO_GPE_VALID_GEN(dev
, 6, 6);
997 ilo_cp_begin(cp
, cmd_len
);
998 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
999 ilo_cp_write(cp
, blend_state
| 1);
1000 ilo_cp_write(cp
, depth_stencil_state
| 1);
1001 ilo_cp_write(cp
, color_calc_state
| 1);
1006 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1007 uint32_t scissor_rect
,
1010 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1011 const uint8_t cmd_len
= 2;
1013 ILO_GPE_VALID_GEN(dev
, 6, 7);
1015 ilo_cp_begin(cp
, cmd_len
);
1016 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1017 ilo_cp_write(cp
, scissor_rect
);
1022 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1023 const struct ilo_shader
*vs
,
1027 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1028 const uint8_t cmd_len
= 6;
1029 uint32_t dw2
, dw4
, dw5
;
1030 int vue_read_len
, max_threads
;
1032 ILO_GPE_VALID_GEN(dev
, 6, 7);
1035 ilo_cp_begin(cp
, cmd_len
);
1036 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1037 ilo_cp_write(cp
, 0);
1038 ilo_cp_write(cp
, 0);
1039 ilo_cp_write(cp
, 0);
1040 ilo_cp_write(cp
, 0);
1041 ilo_cp_write(cp
, 0);
1047 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1049 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1050 * 128-bit vertex elements to be passed into the payload for each
1053 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1054 * data to be read and passed to the thread."
1056 vue_read_len
= (vs
->in
.count
+ 1) / 2;
1063 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1065 * "Device # of EUs #Threads/EU
1069 max_threads
= (dev
->gt
== 2) ? 60 : 24;
1073 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1075 * "Device # of EUs #Threads/EU
1076 * Ivy Bridge (GT2) 16 8
1077 * Ivy Bridge (GT1) 6 6"
1079 max_threads
= (dev
->gt
== 2) ? 128 : 36;
1082 /* see brwCreateContext() */
1083 max_threads
= (dev
->gt
== 2) ? 280 : 70;
1090 dw2
= ((num_samplers
+ 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT
;
1092 dw2
|= GEN6_VS_FLOATING_POINT_MODE_ALT
;
1094 dw4
= vs
->in
.start_grf
<< GEN6_VS_DISPATCH_START_GRF_SHIFT
|
1095 vue_read_len
<< GEN6_VS_URB_READ_LENGTH_SHIFT
|
1096 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT
;
1098 dw5
= GEN6_VS_STATISTICS_ENABLE
|
1101 if (dev
->gen
>= ILO_GEN(7.5))
1102 dw5
|= (max_threads
- 1) << HSW_VS_MAX_THREADS_SHIFT
;
1104 dw5
|= (max_threads
- 1) << GEN6_VS_MAX_THREADS_SHIFT
;
1106 ilo_cp_begin(cp
, cmd_len
);
1107 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1108 ilo_cp_write(cp
, vs
->cache_offset
);
1109 ilo_cp_write(cp
, dw2
);
1110 ilo_cp_write(cp
, 0); /* scratch */
1111 ilo_cp_write(cp
, dw4
);
1112 ilo_cp_write(cp
, dw5
);
1117 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1118 const struct ilo_shader
*gs
,
1119 const struct ilo_shader
*vs
,
1123 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1124 const uint8_t cmd_len
= 7;
1125 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1128 ILO_GPE_VALID_GEN(dev
, 6, 6);
1130 if (!gs
&& (!vs
|| !vs
->stream_output
)) {
1133 dw4
= 1 << GEN6_GS_URB_READ_LENGTH_SHIFT
;
1134 dw5
= GEN6_GS_STATISTICS_ENABLE
;
1138 int max_threads
, vue_read_len
;
1141 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1143 * "Maximum Number of Threads valid range is [0,27] when Rendering
1144 * Enabled bit is set."
1146 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1148 * "Programming Note: If the GS stage is enabled, software must
1149 * always allocate at least one GS URB Entry. This is true even if
1150 * the GS thread never needs to output vertices to the pipeline,
1151 * e.g., when only performing stream output. This is an artifact of
1152 * the need to pass the GS thread an initial destination URB
1155 * As such, we always enable rendering, and limit the number of threads.
1158 /* maximum is 60, but limited to 28 */
1162 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1166 if (max_threads
> 28)
1169 dw2
= GEN6_GS_SPF_MODE
;
1171 dw5
= (max_threads
- 1) << GEN6_GS_MAX_THREADS_SHIFT
|
1172 GEN6_GS_STATISTICS_ENABLE
|
1173 GEN6_GS_SO_STATISTICS_ENABLE
|
1174 GEN6_GS_RENDERING_ENABLE
;
1177 * we cannot make use of GEN6_GS_REORDER because it will reorder
1178 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1179 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1180 * (2N+2, 2N+1, 2N+3)).
1182 dw6
= GEN6_GS_ENABLE
;
1185 /* VS ouputs must match GS inputs */
1186 assert(gs
->in
.count
== vs
->out
.count
);
1187 for (i
= 0; i
< gs
->in
.count
; i
++) {
1188 assert(gs
->in
.semantic_names
[i
] == vs
->out
.semantic_names
[i
]);
1189 assert(gs
->in
.semantic_indices
[i
] == vs
->out
.semantic_indices
[i
]);
1193 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1195 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1196 * Length) to 0 indicating no Vertex URB data to be read and
1197 * passed to the thread."
1199 vue_read_len
= (gs
->in
.count
+ 1) / 2;
1203 dw1
= gs
->cache_offset
;
1204 dw4
= vue_read_len
<< GEN6_GS_URB_READ_LENGTH_SHIFT
|
1205 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT
|
1206 gs
->in
.start_grf
<< GEN6_GS_DISPATCH_START_GRF_SHIFT
;
1208 if (gs
->in
.discard_adj
)
1209 dw6
|= GEN6_GS_DISCARD_ADJACENCY
;
1211 if (gs
->stream_output
) {
1212 dw6
|= GEN6_GS_SVBI_PAYLOAD_ENABLE
;
1213 if (gs
->svbi_post_inc
) {
1214 dw6
|= GEN6_GS_SVBI_POSTINCREMENT_ENABLE
|
1215 gs
->svbi_post_inc
<< GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT
;
1221 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1223 * "It is UNDEFINED to set this field (Vertex URB Entry Read
1224 * Length) to 0 indicating no Vertex URB data to be read and
1225 * passed to the thread."
1227 vue_read_len
= (vs
->out
.count
+ 1) / 2;
1232 dw4
= vue_read_len
<< GEN6_GS_URB_READ_LENGTH_SHIFT
|
1233 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT
|
1234 vs
->gs_start_grf
<< GEN6_GS_DISPATCH_START_GRF_SHIFT
;
1236 if (vs
->in
.discard_adj
)
1237 dw6
|= GEN6_GS_DISCARD_ADJACENCY
;
1239 dw6
|= GEN6_GS_SVBI_PAYLOAD_ENABLE
;
1240 if (vs
->svbi_post_inc
) {
1241 dw6
|= GEN6_GS_SVBI_POSTINCREMENT_ENABLE
|
1242 vs
->svbi_post_inc
<< GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT
;
1247 ilo_cp_begin(cp
, cmd_len
);
1248 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1249 ilo_cp_write(cp
, dw1
);
1250 ilo_cp_write(cp
, dw2
);
1251 ilo_cp_write(cp
, 0);
1252 ilo_cp_write(cp
, dw4
);
1253 ilo_cp_write(cp
, dw5
);
1254 ilo_cp_write(cp
, dw6
);
1259 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1260 const struct pipe_rasterizer_state
*rasterizer
,
1261 bool has_linear_interp
,
1262 bool enable_guardband
,
1266 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1267 const uint8_t cmd_len
= 4;
1268 uint32_t dw1
, dw2
, dw3
;
1270 ILO_GPE_VALID_GEN(dev
, 6, 7);
1273 ilo_cp_begin(cp
, cmd_len
);
1274 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1275 ilo_cp_write(cp
, 0);
1276 ilo_cp_write(cp
, 0);
1277 ilo_cp_write(cp
, 0);
1283 dw1
= GEN6_CLIP_STATISTICS_ENABLE
;
1285 if (dev
->gen
>= ILO_GEN(7)) {
1287 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1289 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1290 * enabled only for the cases where the incoming primitive topology
1291 * into the clipper guaranteed to be Trilist."
1293 * What does this mean?
1296 GEN7_CLIP_EARLY_CULL
;
1298 if (rasterizer
->front_ccw
)
1299 dw1
|= GEN7_CLIP_WINDING_CCW
;
1301 switch (rasterizer
->cull_face
) {
1302 case PIPE_FACE_NONE
:
1303 dw1
|= GEN7_CLIP_CULLMODE_NONE
;
1305 case PIPE_FACE_FRONT
:
1306 dw1
|= GEN7_CLIP_CULLMODE_FRONT
;
1308 case PIPE_FACE_BACK
:
1309 dw1
|= GEN7_CLIP_CULLMODE_BACK
;
1311 case PIPE_FACE_FRONT_AND_BACK
:
1312 dw1
|= GEN7_CLIP_CULLMODE_BOTH
;
1317 dw2
= GEN6_CLIP_ENABLE
|
1319 rasterizer
->clip_plane_enable
<< GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT
|
1320 GEN6_CLIP_MODE_NORMAL
;
1322 if (rasterizer
->clip_halfz
)
1323 dw2
|= GEN6_CLIP_API_D3D
;
1325 dw2
|= GEN6_CLIP_API_OGL
;
1327 if (rasterizer
->depth_clip
)
1328 dw2
|= GEN6_CLIP_Z_TEST
;
1331 * There are several reasons that guard band test should be disabled
1333 * - when the renderer does not perform 2D clipping
1334 * - GL wide points (to avoid partially visibie object)
1335 * - GL wide or AA lines (to avoid partially visibie object)
1337 if (enable_guardband
&& true /* API_GL */) {
1338 if (rasterizer
->point_size_per_vertex
|| rasterizer
->point_size
> 1.0f
)
1339 enable_guardband
= false;
1340 if (rasterizer
->line_smooth
|| rasterizer
->line_width
> 1.0f
)
1341 enable_guardband
= false;
1344 if (enable_guardband
)
1345 dw2
|= GEN6_CLIP_GB_TEST
;
1347 if (has_linear_interp
)
1348 dw2
|= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE
;
1350 if (rasterizer
->flatshade_first
) {
1351 dw2
|= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
1352 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
1353 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
1356 dw2
|= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
1357 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
1358 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
1361 dw3
= 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT
|
1362 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT
|
1363 GEN6_CLIP_FORCE_ZERO_RTAINDEX
|
1364 (num_viewports
- 1);
1366 ilo_cp_begin(cp
, cmd_len
);
1367 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1368 ilo_cp_write(cp
, dw1
);
1369 ilo_cp_write(cp
, dw2
);
1370 ilo_cp_write(cp
, dw3
);
1375 * Fill in DW2 to DW7 of 3DSTATE_SF.
1378 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
1379 const struct pipe_rasterizer_state
*rasterizer
,
1381 enum pipe_format depth_format
,
1382 bool separate_stencil
,
1383 uint32_t *dw
, int num_dwords
)
1385 float offset_const
, offset_scale
, offset_clamp
;
1386 int format
, line_width
, point_width
;
1388 ILO_GPE_VALID_GEN(dev
, 6, 7);
1389 assert(num_dwords
== 6);
1393 dw
[1] = (num_samples
> 1) ? GEN6_SF_MSRAST_ON_PATTERN
: 0;
1403 * Scale the constant term. The minimum representable value used by the HW
1404 * is not large enouch to be the minimum resolvable difference.
1406 offset_const
= rasterizer
->offset_units
* 2.0f
;
1408 offset_scale
= rasterizer
->offset_scale
;
1409 offset_clamp
= rasterizer
->offset_clamp
;
1411 if (separate_stencil
) {
1412 switch (depth_format
) {
1413 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
1414 depth_format
= PIPE_FORMAT_Z24X8_UNORM
;
1416 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
1417 depth_format
= PIPE_FORMAT_Z32_FLOAT
;;
1419 case PIPE_FORMAT_S8_UINT
:
1420 depth_format
= PIPE_FORMAT_NONE
;
1427 format
= gen6_translate_depth_format(depth_format
);
1428 /* FLOAT surface is assumed when there is no depth buffer */
1430 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
1433 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1434 * pixels in the minor direction. We have to make the lines slightly
1435 * thicker, 0.5 pixel on both sides, so that they intersect that many
1436 * pixels are considered into the lines.
1438 * Line width is in U3.7.
1440 line_width
= (int) ((rasterizer
->line_width
+
1441 (float) rasterizer
->line_smooth
) * 128.0f
+ 0.5f
);
1442 line_width
= CLAMP(line_width
, 0, 1023);
1445 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1447 * "Software must not program a value of 0.0 when running in
1448 * MSRASTMODE_ON_xxx modes - zero-width lines are not available when
1449 * multisampling rasterization is enabled."
1451 if (rasterizer
->multisample
) {
1453 line_width
= 128; /* 1.0f */
1455 else if (line_width
== 128 && !rasterizer
->line_smooth
) {
1461 point_width
= (int) (rasterizer
->point_size
* 8.0f
+ 0.5f
);
1462 point_width
= CLAMP(point_width
, 1, 2047);
1465 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1467 * "This bit (Statistics Enable) should be set whenever clipping is
1468 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1469 * should be cleared if clipping is disabled or Statistics Enable in
1470 * CLIP_STATE is clear."
1472 dw
[0] = GEN6_SF_STATISTICS_ENABLE
|
1473 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE
;
1475 /* XXX GEN6 path seems to work fine for GEN7 */
1476 if (false && dev
->gen
>= ILO_GEN(7)) {
1477 dw
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
1480 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1482 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1483 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1484 * Depth Offset Enable Point) should be set whenever non zero depth
1485 * bias (Slope, Bias) values are used. Setting this bit may have
1486 * some degradation of performance for some workloads."
1488 if (rasterizer
->offset_tri
||
1489 rasterizer
->offset_line
||
1490 rasterizer
->offset_point
) {
1491 /* XXX need to scale offset_const according to the depth format */
1492 dw
[0] |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS
;
1494 dw
[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
|
1495 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
|
1496 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
1499 offset_const
= 0.0f
;
1500 offset_scale
= 0.0f
;
1501 offset_clamp
= 0.0f
;
1505 if (dev
->gen
>= ILO_GEN(7))
1506 dw
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
1508 if (rasterizer
->offset_tri
)
1509 dw
[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
;
1510 if (rasterizer
->offset_line
)
1511 dw
[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
;
1512 if (rasterizer
->offset_point
)
1513 dw
[0] |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
1516 switch (rasterizer
->fill_front
) {
1517 case PIPE_POLYGON_MODE_FILL
:
1518 dw
[0] |= GEN6_SF_FRONT_SOLID
;
1520 case PIPE_POLYGON_MODE_LINE
:
1521 dw
[0] |= GEN6_SF_FRONT_WIREFRAME
;
1523 case PIPE_POLYGON_MODE_POINT
:
1524 dw
[0] |= GEN6_SF_FRONT_POINT
;
1528 switch (rasterizer
->fill_back
) {
1529 case PIPE_POLYGON_MODE_FILL
:
1530 dw
[0] |= GEN6_SF_BACK_SOLID
;
1532 case PIPE_POLYGON_MODE_LINE
:
1533 dw
[0] |= GEN6_SF_BACK_WIREFRAME
;
1535 case PIPE_POLYGON_MODE_POINT
:
1536 dw
[0] |= GEN6_SF_BACK_POINT
;
1540 if (rasterizer
->front_ccw
)
1541 dw
[0] |= GEN6_SF_WINDING_CCW
;
1545 if (rasterizer
->line_smooth
) {
1547 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1549 * "This field (Anti-aliasing Enable) must be disabled if any of the
1550 * render targets have integer (UINT or SINT) surface format."
1552 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1554 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1555 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1557 * TODO We do not check those yet.
1559 dw
[1] |= GEN6_SF_LINE_AA_ENABLE
|
1560 GEN6_SF_LINE_END_CAP_WIDTH_1_0
;
1563 switch (rasterizer
->cull_face
) {
1564 case PIPE_FACE_NONE
:
1565 dw
[1] |= GEN6_SF_CULL_NONE
;
1567 case PIPE_FACE_FRONT
:
1568 dw
[1] |= GEN6_SF_CULL_FRONT
;
1570 case PIPE_FACE_BACK
:
1571 dw
[1] |= GEN6_SF_CULL_BACK
;
1573 case PIPE_FACE_FRONT_AND_BACK
:
1574 dw
[1] |= GEN6_SF_CULL_BOTH
;
1578 dw
[1] |= line_width
<< GEN6_SF_LINE_WIDTH_SHIFT
;
1580 if (rasterizer
->scissor
)
1581 dw
[1] |= GEN6_SF_SCISSOR_ENABLE
;
1583 if (num_samples
> 1 && rasterizer
->multisample
)
1584 dw
[1] |= GEN6_SF_MSRAST_ON_PATTERN
;
1586 dw
[2] = GEN6_SF_LINE_AA_MODE_TRUE
|
1587 GEN6_SF_VERTEX_SUBPIXEL_8BITS
;
1589 if (rasterizer
->line_last_pixel
)
1592 if (rasterizer
->flatshade_first
) {
1593 dw
[2] |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT
|
1594 0 << GEN6_SF_LINE_PROVOKE_SHIFT
|
1595 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
1598 dw
[2] |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT
|
1599 1 << GEN6_SF_LINE_PROVOKE_SHIFT
|
1600 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
1603 if (!rasterizer
->point_size_per_vertex
)
1604 dw
[2] |= GEN6_SF_USE_STATE_POINT_WIDTH
;
1606 dw
[2] |= point_width
;
1608 dw
[3] = fui(offset_const
);
1609 dw
[4] = fui(offset_scale
);
1610 dw
[5] = fui(offset_clamp
);
1614 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1617 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
1618 const struct pipe_rasterizer_state
*rasterizer
,
1619 const struct ilo_shader
*fs
,
1620 const struct ilo_shader
*last_sh
,
1621 uint32_t *dw
, int num_dwords
)
1623 uint32_t point_sprite_enable
, const_interp_enable
;
1624 uint16_t attr_ctrl
[PIPE_MAX_SHADER_INPUTS
];
1625 int vue_offset
, vue_len
;
1626 int dst
, max_src
, i
;
1628 ILO_GPE_VALID_GEN(dev
, 6, 7);
1629 assert(num_dwords
== 13);
1632 if (dev
->gen
>= ILO_GEN(7))
1633 dw
[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
;
1635 dw
[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
;
1637 for (i
= 1; i
< num_dwords
; i
++)
1644 /* skip PSIZE and POSITION (how about the optional CLIPDISTs?) */
1645 assert(last_sh
->out
.semantic_names
[0] == TGSI_SEMANTIC_PSIZE
);
1646 assert(last_sh
->out
.semantic_names
[1] == TGSI_SEMANTIC_POSITION
);
1648 vue_len
= last_sh
->out
.count
- vue_offset
;
1652 vue_len
= fs
->in
.count
;
1655 point_sprite_enable
= 0;
1656 const_interp_enable
= 0;
1657 max_src
= (last_sh
) ? 0 : fs
->in
.count
- 1;
1659 for (dst
= 0; dst
< fs
->in
.count
; dst
++) {
1660 const int semantic
= fs
->in
.semantic_names
[dst
];
1661 const int index
= fs
->in
.semantic_indices
[dst
];
1662 const int interp
= fs
->in
.interp
[dst
];
1667 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1669 * "This field (Point Sprite Texture Coordinate Enable) must be
1670 * programmed to 0 when non-point primitives are rendered."
1672 * TODO We do not check that yet.
1674 if (semantic
== TGSI_SEMANTIC_GENERIC
&&
1675 (rasterizer
->sprite_coord_enable
& (1 << index
)))
1676 point_sprite_enable
|= 1 << dst
;
1678 if (interp
== TGSI_INTERPOLATE_CONSTANT
||
1679 (interp
== TGSI_INTERPOLATE_COLOR
&& rasterizer
->flatshade
))
1680 const_interp_enable
|= 1 << dst
;
1687 /* find the matching VS/GS OUT for FS IN[i] */
1689 for (src
= 0; src
< vue_len
; src
++) {
1690 if (last_sh
->out
.semantic_names
[vue_offset
+ src
] != semantic
||
1691 last_sh
->out
.semantic_indices
[vue_offset
+ src
] != index
)
1696 if (semantic
== TGSI_SEMANTIC_COLOR
&& rasterizer
->light_twoside
&&
1697 src
< vue_len
- 1) {
1698 const int next
= src
+ 1;
1700 if (last_sh
->out
.semantic_names
[vue_offset
+ next
] ==
1701 TGSI_SEMANTIC_BCOLOR
&&
1702 last_sh
->out
.semantic_indices
[vue_offset
+ next
] == index
) {
1703 ctrl
|= ATTRIBUTE_SWIZZLE_INPUTATTR_FACING
<<
1704 ATTRIBUTE_SWIZZLE_SHIFT
;
1712 /* if there is no COLOR, try BCOLOR */
1713 if (src
>= vue_len
&& semantic
== TGSI_SEMANTIC_COLOR
) {
1714 for (src
= 0; src
< vue_len
; src
++) {
1715 if (last_sh
->out
.semantic_names
[vue_offset
+ src
] !=
1716 TGSI_SEMANTIC_BCOLOR
||
1717 last_sh
->out
.semantic_indices
[vue_offset
+ src
] != index
)
1725 if (src
< vue_len
) {
1726 attr_ctrl
[dst
] = ctrl
;
1732 * The previous shader stage does not output this attribute. The
1733 * value is supposed to be undefined for fs, unless the attribute
1734 * goes through point sprite replacement or the attribute is
1735 * TGSI_SEMANTIC_POSITION. In all cases, we do not care which source
1736 * attribute is picked.
1738 * We should update the fs code and omit the output of
1739 * TGSI_SEMANTIC_POSITION here.
1745 for (; dst
< Elements(attr_ctrl
); dst
++)
1748 /* only the first 16 attributes can be remapped */
1749 for (dst
= 16; dst
< Elements(attr_ctrl
); dst
++)
1750 assert(attr_ctrl
[dst
] == 0 || attr_ctrl
[dst
] == dst
);
1753 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1755 * "It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1756 * 0 indicating no Vertex URB data to be read.
1758 * This field should be set to the minimum length required to read the
1759 * maximum source attribute. The maximum source attribute is indicated
1760 * by the maximum value of the enabled Attribute # Source Attribute if
1761 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1762 * enable is not set.
1764 * read_length = ceiling((max_source_attr+1)/2)
1766 * [errata] Corruption/Hang possible if length programmed larger than
1769 vue_len
= max_src
+ 1;
1771 assert(fs
->in
.count
<= 32);
1772 assert(vue_offset
% 2 == 0);
1774 if (dev
->gen
>= ILO_GEN(7)) {
1775 dw
[0] = fs
->in
.count
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
1776 (vue_len
+ 1) / 2 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
1777 vue_offset
/ 2 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
;
1780 dw
[0] |= GEN7_SBE_SWIZZLE_ENABLE
;
1783 dw
[0] = fs
->in
.count
<< GEN6_SF_NUM_OUTPUTS_SHIFT
|
1784 (vue_len
+ 1) / 2 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
|
1785 vue_offset
/ 2 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT
;
1788 dw
[0] |= GEN6_SF_SWIZZLE_ENABLE
;
1791 switch (rasterizer
->sprite_coord_mode
) {
1792 case PIPE_SPRITE_COORD_UPPER_LEFT
:
1793 dw
[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT
;
1795 case PIPE_SPRITE_COORD_LOWER_LEFT
:
1796 dw
[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT
;
1800 for (i
= 0; i
< 8; i
++)
1801 dw
[1 + i
] = attr_ctrl
[2 * i
+ 1] << 16 | attr_ctrl
[2 * i
];
1803 dw
[9] = point_sprite_enable
;
1804 dw
[10] = const_interp_enable
;
1806 /* WrapShortest enables */
1812 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1813 const struct pipe_rasterizer_state
*rasterizer
,
1814 const struct ilo_shader
*fs
,
1815 const struct ilo_shader
*last_sh
,
1818 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1819 const uint8_t cmd_len
= 20;
1820 uint32_t dw_raster
[6], dw_sbe
[13];
1822 ILO_GPE_VALID_GEN(dev
, 6, 6);
1824 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1825 1, PIPE_FORMAT_NONE
, false, dw_raster
, Elements(dw_raster
));
1826 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1827 fs
, last_sh
, dw_sbe
, Elements(dw_sbe
));
1829 ilo_cp_begin(cp
, cmd_len
);
1830 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1831 ilo_cp_write(cp
, dw_sbe
[0]);
1832 ilo_cp_write_multi(cp
, dw_raster
, 6);
1833 ilo_cp_write_multi(cp
, &dw_sbe
[1], 12);
1838 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1839 const struct ilo_shader
*fs
,
1841 const struct pipe_rasterizer_state
*rasterizer
,
1842 bool dual_blend
, bool cc_may_kill
,
1845 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
1846 const uint8_t cmd_len
= 9;
1847 const int num_samples
= 1;
1848 uint32_t dw2
, dw4
, dw5
, dw6
;
1851 ILO_GPE_VALID_GEN(dev
, 6, 6);
1853 /* see brwCreateContext() */
1854 max_threads
= (dev
->gt
== 2) ? 80 : 40;
1857 ilo_cp_begin(cp
, cmd_len
);
1858 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1859 ilo_cp_write(cp
, 0);
1860 ilo_cp_write(cp
, 0);
1861 ilo_cp_write(cp
, 0);
1862 ilo_cp_write(cp
, 0);
1863 /* honor the valid range even if dispatching is disabled */
1864 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
);
1865 ilo_cp_write(cp
, 0);
1866 ilo_cp_write(cp
, 0);
1867 ilo_cp_write(cp
, 0);
1873 dw2
= (num_samplers
+ 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT
;
1875 dw2
|= GEN6_WM_FLOATING_POINT_MODE_ALT
;
1877 dw4
= fs
->in
.start_grf
<< GEN6_WM_DISPATCH_START_GRF_SHIFT_0
|
1878 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1
|
1879 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2
;
1882 dw4
|= GEN6_WM_STATISTICS_ENABLE
;
1886 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1888 * "This bit (Statistics Enable) must be disabled if either of these
1889 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1890 * Resolve Enable or Depth Buffer Resolve Enable."
1892 dw4
|= GEN6_WM_DEPTH_CLEAR
;
1893 dw4
|= GEN6_WM_DEPTH_RESOLVE
;
1894 dw4
|= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE
;
1897 dw5
= (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
|
1898 GEN6_WM_LINE_AA_WIDTH_2_0
;
1901 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1903 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1904 * PS kernel or color calculator has the ability to kill (discard)
1905 * pixels or samples, other than due to depth or stencil testing.
1906 * This bit is required to be ENABLED in the following situations:
1908 * The API pixel shader program contains "killpix" or "discard"
1909 * instructions, or other code in the pixel shader kernel that can
1910 * cause the final pixel mask to differ from the pixel mask received
1913 * A sampler with chroma key enabled with kill pixel mode is used by
1916 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
1919 * The pixel shader kernel generates and outputs oMask.
1921 * Note: As ClipDistance clipping is fully supported in hardware and
1922 * therefore not via PS instructions, there should be no need to
1923 * ENABLE this bit due to ClipDistance clipping."
1925 if (fs
->has_kill
|| cc_may_kill
)
1926 dw5
|= GEN6_WM_KILL_ENABLE
;
1929 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1931 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
1932 * field must be set to disabled."
1934 * TODO This is not checked yet.
1936 if (fs
->out
.has_pos
)
1937 dw5
|= GEN6_WM_COMPUTED_DEPTH
;
1940 dw5
|= GEN6_WM_USES_SOURCE_DEPTH
| GEN6_WM_USES_SOURCE_W
;
1945 * a) fs writes colors and color is not masked, or
1946 * b) fs writes depth, or
1950 dw5
|= GEN6_WM_DISPATCH_ENABLE
;
1952 /* same value as in 3DSTATE_SF */
1953 if (rasterizer
->line_smooth
)
1954 dw5
|= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0
;
1956 if (rasterizer
->poly_stipple_enable
)
1957 dw5
|= GEN6_WM_POLYGON_STIPPLE_ENABLE
;
1958 if (rasterizer
->line_stipple_enable
)
1959 dw5
|= GEN6_WM_LINE_STIPPLE_ENABLE
;
1962 dw5
|= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE
;
1964 if (fs
->dispatch_16
)
1965 dw5
|= GEN6_WM_16_DISPATCH_ENABLE
;
1967 dw5
|= GEN6_WM_8_DISPATCH_ENABLE
;
1969 dw6
= fs
->in
.count
<< GEN6_WM_NUM_SF_OUTPUTS_SHIFT
|
1970 GEN6_WM_POSOFFSET_NONE
|
1971 GEN6_WM_POSITION_ZW_PIXEL
|
1972 fs
->in
.barycentric_interpolation_mode
<<
1973 GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT
;
1975 if (rasterizer
->bottom_edge_rule
)
1976 dw6
|= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT
;
1978 if (num_samples
> 1) {
1979 if (rasterizer
->multisample
)
1980 dw6
|= GEN6_WM_MSRAST_ON_PATTERN
;
1982 dw6
|= GEN6_WM_MSRAST_OFF_PIXEL
;
1983 dw6
|= GEN6_WM_MSDISPMODE_PERPIXEL
;
1986 dw6
|= GEN6_WM_MSRAST_OFF_PIXEL
|
1987 GEN6_WM_MSDISPMODE_PERSAMPLE
;
1990 ilo_cp_begin(cp
, cmd_len
);
1991 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1992 ilo_cp_write(cp
, fs
->cache_offset
);
1993 ilo_cp_write(cp
, dw2
);
1994 ilo_cp_write(cp
, 0); /* scratch */
1995 ilo_cp_write(cp
, dw4
);
1996 ilo_cp_write(cp
, dw5
);
1997 ilo_cp_write(cp
, dw6
);
1998 ilo_cp_write(cp
, 0); /* kernel 1 */
1999 ilo_cp_write(cp
, 0); /* kernel 2 */
2004 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
2005 const uint32_t *bufs
, const int *sizes
,
2006 int num_bufs
, int max_read_length
,
2007 uint32_t *dw
, int num_dwords
)
2009 unsigned enabled
= 0x0;
2010 int total_read_length
, i
;
2012 assert(num_dwords
== 4);
2014 total_read_length
= 0;
2015 for (i
= 0; i
< 4; i
++) {
2016 if (i
< num_bufs
&& sizes
[i
]) {
2017 /* in 256-bit units minus one */
2018 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
2020 assert(bufs
[i
] % 32 == 0);
2021 assert(read_len
< 32);
2024 dw
[i
] = bufs
[i
] | read_len
;
2026 total_read_length
+= read_len
+ 1;
2033 assert(total_read_length
<= max_read_length
);
2039 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
2040 const uint32_t *bufs
, const int *sizes
,
2044 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
2045 const uint8_t cmd_len
= 5;
2046 uint32_t buf_dw
[4], buf_enabled
;
2048 ILO_GPE_VALID_GEN(dev
, 6, 6);
2049 assert(num_bufs
<= 4);
2052 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2054 * "The sum of all four read length fields (each incremented to
2055 * represent the actual read length) must be less than or equal to 32"
2057 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2058 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
2060 ilo_cp_begin(cp
, cmd_len
);
2061 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2062 ilo_cp_write(cp
, buf_dw
[0]);
2063 ilo_cp_write(cp
, buf_dw
[1]);
2064 ilo_cp_write(cp
, buf_dw
[2]);
2065 ilo_cp_write(cp
, buf_dw
[3]);
2070 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
2071 const uint32_t *bufs
, const int *sizes
,
2075 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
2076 const uint8_t cmd_len
= 5;
2077 uint32_t buf_dw
[4], buf_enabled
;
2079 ILO_GPE_VALID_GEN(dev
, 6, 6);
2080 assert(num_bufs
<= 4);
2083 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2085 * "The sum of all four read length fields (each incremented to
2086 * represent the actual read length) must be less than or equal to 64"
2088 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2089 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
2091 ilo_cp_begin(cp
, cmd_len
);
2092 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2093 ilo_cp_write(cp
, buf_dw
[0]);
2094 ilo_cp_write(cp
, buf_dw
[1]);
2095 ilo_cp_write(cp
, buf_dw
[2]);
2096 ilo_cp_write(cp
, buf_dw
[3]);
2101 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
2102 const uint32_t *bufs
, const int *sizes
,
2106 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
2107 const uint8_t cmd_len
= 5;
2108 uint32_t buf_dw
[4], buf_enabled
;
2110 ILO_GPE_VALID_GEN(dev
, 6, 6);
2111 assert(num_bufs
<= 4);
2114 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2116 * "The sum of all four read length fields (each incremented to
2117 * represent the actual read length) must be less than or equal to 64"
2119 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2120 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
2122 ilo_cp_begin(cp
, cmd_len
);
2123 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2124 ilo_cp_write(cp
, buf_dw
[0]);
2125 ilo_cp_write(cp
, buf_dw
[1]);
2126 ilo_cp_write(cp
, buf_dw
[2]);
2127 ilo_cp_write(cp
, buf_dw
[3]);
2132 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
2133 unsigned sample_mask
,
2136 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
2137 const uint8_t cmd_len
= 2;
2138 const unsigned valid_mask
= 0xf;
2140 ILO_GPE_VALID_GEN(dev
, 6, 6);
2142 sample_mask
&= valid_mask
;
2144 ilo_cp_begin(cp
, cmd_len
);
2145 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2146 ilo_cp_write(cp
, sample_mask
);
2151 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
2152 unsigned x
, unsigned y
,
2153 unsigned width
, unsigned height
,
2156 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
2157 const uint8_t cmd_len
= 4;
2158 unsigned xmax
= x
+ width
- 1;
2159 unsigned ymax
= y
+ height
- 1;
2162 ILO_GPE_VALID_GEN(dev
, 6, 7);
2164 if (dev
->gen
>= ILO_GEN(7)) {
2169 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2171 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2172 * must be an even number"
2179 if (x
> rect_limit
) x
= rect_limit
;
2180 if (y
> rect_limit
) y
= rect_limit
;
2181 if (xmax
> rect_limit
) xmax
= rect_limit
;
2182 if (ymax
> rect_limit
) ymax
= rect_limit
;
2184 ilo_cp_begin(cp
, cmd_len
);
2185 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2186 ilo_cp_write(cp
, y
<< 16 | x
);
2187 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
2190 * There is no need to set the origin. It is intended to support front
2193 ilo_cp_write(cp
, 0);
2199 gen6_get_depth_buffer_format(const struct ilo_dev_info
*dev
,
2200 enum pipe_format format
,
2202 bool separate_stencil
,
2208 ILO_GPE_VALID_GEN(dev
, 6, 7);
2211 *has_stencil
= false;
2214 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2216 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2217 * Surface Format of the depth buffer cannot be
2218 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2219 * requires the separate stencil buffer."
2221 * From the Ironlake PRM, volume 2 part 1, page 330:
2223 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2224 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2226 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2227 * is indeed used, the depth values output by the fragment shaders will
2228 * be different when read back.
2230 * As for GEN7+, separate_stencil_buffer is always true.
2233 case PIPE_FORMAT_Z16_UNORM
:
2234 depth_format
= BRW_DEPTHFORMAT_D16_UNORM
;
2236 case PIPE_FORMAT_Z32_FLOAT
:
2237 depth_format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2239 case PIPE_FORMAT_Z24X8_UNORM
:
2240 depth_format
= (separate_stencil
) ?
2241 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
:
2242 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
2244 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
2245 depth_format
= (separate_stencil
) ?
2246 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
:
2247 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
2248 *has_stencil
= true;
2250 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
2251 depth_format
= (separate_stencil
) ?
2252 BRW_DEPTHFORMAT_D32_FLOAT
:
2253 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
2254 *has_stencil
= true;
2256 case PIPE_FORMAT_S8_UINT
:
2257 if (separate_stencil
) {
2258 depth_format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2260 *has_stencil
= true;
2265 assert(!"unsupported depth/stencil format");
2266 depth_format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2268 *has_stencil
= false;
2272 return depth_format
;
2276 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
2277 const struct pipe_surface
*surface
,
2278 const struct pipe_depth_stencil_alpha_state
*dsa
,
2282 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2283 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2284 const uint8_t cmd_len
= 7;
2285 const int max_2d_size
= (dev
->gen
>= ILO_GEN(7)) ? 16384 : 8192;
2286 struct ilo_texture
*tex
;
2288 uint32_t slice_offset
, x_offset
, y_offset
;
2289 int surface_type
, depth_format
, width
, height
;
2290 bool separate_stencil
, has_depth
, has_stencil
;
2292 ILO_GPE_VALID_GEN(dev
, 6, 7);
2294 if (dev
->gen
>= ILO_GEN(7)) {
2295 separate_stencil
= true;
2299 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2301 * "This field (Separate Stencil Buffer Enable) must be set to the
2302 * same value (enabled or disabled) as Hierarchical Depth Buffer
2305 separate_stencil
= hiz
;
2309 depth_format
= gen6_get_depth_buffer_format(dev
,
2310 surface
->format
, hiz
, separate_stencil
, &has_depth
, &has_stencil
);
2314 has_stencil
= false;
2317 if (!has_depth
&& !has_stencil
) {
2318 dw1
= BRW_SURFACE_NULL
<< 29 |
2319 BRW_DEPTHFORMAT_D32_FLOAT
<< 18;
2322 if (dev
->gen
== ILO_GEN(6)) {
2327 ilo_cp_begin(cp
, cmd_len
);
2328 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2329 ilo_cp_write(cp
, dw1
);
2330 ilo_cp_write(cp
, 0);
2331 ilo_cp_write(cp
, 0);
2332 ilo_cp_write(cp
, 0);
2333 ilo_cp_write(cp
, 0);
2334 ilo_cp_write(cp
, 0);
2340 tex
= ilo_texture(surface
->texture
);
2341 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
2342 width
= surface
->width
;
2343 height
= surface
->height
;
2346 * we always treat the resource as non-mipmapped and set the slice/x/y
2349 slice_offset
= ilo_texture_get_slice_offset(tex
,
2350 surface
->u
.tex
.level
, surface
->u
.tex
.first_layer
,
2351 &x_offset
, &y_offset
);
2354 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2356 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2357 * Coordinate Offset X) must be zero to ensure correct alignment"
2359 * XXX Skip the check for gen6, which seems to be fine. We need to make
2360 * sure that does not happen eventually.
2362 if (dev
->gen
>= ILO_GEN(7)) {
2363 assert((x_offset
& 7) == 0 && (y_offset
& 7) == 0);
2371 /* required for GEN6+ */
2372 assert(tex
->tiling
== INTEL_TILING_Y
);
2374 assert(tex
->bo_stride
> 0 && tex
->bo_stride
< 128 * 1024 &&
2375 tex
->bo_stride
% 128 == 0);
2376 assert(surface
->u
.tex
.first_layer
== surface
->u
.tex
.last_layer
);
2377 assert(width
<= tex
->bo_stride
);
2379 /* we have to treat them as 2D surfaces */
2380 if (surface_type
== BRW_SURFACE_CUBE
) {
2381 assert(surface
->width
== surface
->height
);
2382 /* we will set slice_offset to point to the single face */
2383 surface_type
= BRW_SURFACE_2D
;
2385 else if (surface_type
== BRW_SURFACE_1D
&& height
> 1) {
2386 assert(surface
->height
== 1);
2387 surface_type
= BRW_SURFACE_2D
;
2390 switch (surface_type
) {
2391 case BRW_SURFACE_1D
:
2392 assert(width
<= max_2d_size
&& height
== 1);
2394 case BRW_SURFACE_2D
:
2395 assert(width
<= max_2d_size
&& height
<= max_2d_size
);
2397 case BRW_SURFACE_3D
:
2398 assert(width
<= 2048 && height
<= 2048);
2399 assert(x_offset
== 0 && y_offset
== 0);
2401 case BRW_SURFACE_CUBE
:
2402 assert(width
<= max_2d_size
&& height
<= max_2d_size
&&
2404 assert(x_offset
== 0 && y_offset
== 0);
2407 assert(!"unexpected depth surface type");
2411 dw1
= surface_type
<< 29 |
2412 depth_format
<< 18 |
2413 (tex
->bo_stride
- 1);
2415 if (dev
->gen
>= ILO_GEN(7)) {
2417 if (dsa
->depth
.writemask
)
2424 (dsa
->stencil
[0].writemask
|| dsa
->stencil
[1].writemask
))
2427 dw3
= (height
- 1) << 18 |
2431 dw1
|= (tex
->tiling
!= INTEL_TILING_NONE
) << 27 |
2432 (tex
->tiling
== INTEL_TILING_Y
) << 26;
2439 dw3
= (height
- 1) << 19 |
2441 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< 1;
2444 ilo_cp_begin(cp
, cmd_len
);
2445 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2446 ilo_cp_write(cp
, dw1
);
2449 ilo_cp_write_bo(cp
, slice_offset
, tex
->bo
,
2450 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2453 ilo_cp_write(cp
, 0);
2456 ilo_cp_write(cp
, dw3
);
2457 ilo_cp_write(cp
, 0);
2458 ilo_cp_write(cp
, y_offset
<< 16 | x_offset
);
2459 ilo_cp_write(cp
, 0);
2464 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
2465 const struct pipe_surface
*surface
,
2469 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(dev
, surface
, NULL
, hiz
, cp
);
2473 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
2474 int x_offset
, int y_offset
,
2477 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
2478 const uint8_t cmd_len
= 2;
2480 ILO_GPE_VALID_GEN(dev
, 6, 7);
2481 assert(x_offset
>= 0 && x_offset
<= 31);
2482 assert(y_offset
>= 0 && y_offset
<= 31);
2484 ilo_cp_begin(cp
, cmd_len
);
2485 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2486 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
2491 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
2492 const struct pipe_poly_stipple
*pattern
,
2495 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
2496 const uint8_t cmd_len
= 33;
2499 ILO_GPE_VALID_GEN(dev
, 6, 7);
2500 assert(Elements(pattern
->stipple
) == 32);
2502 ilo_cp_begin(cp
, cmd_len
);
2503 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2504 for (i
= 0; i
< 32; i
++)
2505 ilo_cp_write(cp
, pattern
->stipple
[i
]);
2510 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
2511 unsigned pattern
, unsigned factor
,
2514 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
2515 const uint8_t cmd_len
= 3;
2518 ILO_GPE_VALID_GEN(dev
, 6, 7);
2519 assert((pattern
& 0xffff) == pattern
);
2520 assert(factor
>= 1 && factor
<= 256);
2522 ilo_cp_begin(cp
, cmd_len
);
2523 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2524 ilo_cp_write(cp
, pattern
);
2526 if (dev
->gen
>= ILO_GEN(7)) {
2528 inverse
= (unsigned) (65536.0f
/ factor
);
2529 ilo_cp_write(cp
, inverse
<< 15 | factor
);
2533 inverse
= (unsigned) (8192.0f
/ factor
);
2534 ilo_cp_write(cp
, inverse
<< 16 | factor
);
2541 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
2544 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
2545 const uint8_t cmd_len
= 3;
2547 ILO_GPE_VALID_GEN(dev
, 6, 7);
2549 ilo_cp_begin(cp
, cmd_len
);
2550 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2551 ilo_cp_write(cp
, 0 << 16 | 0);
2552 ilo_cp_write(cp
, 0 << 16 | 0);
2557 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
2558 int index
, unsigned svbi
,
2560 bool load_vertex_count
,
2563 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
2564 const uint8_t cmd_len
= 4;
2567 ILO_GPE_VALID_GEN(dev
, 6, 6);
2568 assert(index
>= 0 && index
< 4);
2570 dw1
= index
<< SVB_INDEX_SHIFT
;
2571 if (load_vertex_count
)
2572 dw1
|= SVB_LOAD_INTERNAL_VERTEX_COUNT
;
2574 ilo_cp_begin(cp
, cmd_len
);
2575 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2576 ilo_cp_write(cp
, dw1
);
2577 ilo_cp_write(cp
, svbi
);
2578 ilo_cp_write(cp
, max_svbi
);
2583 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
2585 const uint32_t *packed_sample_pos
,
2586 bool pixel_location_center
,
2589 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
2590 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
2591 uint32_t dw1
, dw2
, dw3
;
2593 ILO_GPE_VALID_GEN(dev
, 6, 7);
2595 dw1
= (pixel_location_center
) ?
2596 MS_PIXEL_LOCATION_CENTER
: MS_PIXEL_LOCATION_UPPER_LEFT
;
2598 switch (num_samples
) {
2601 dw1
|= MS_NUMSAMPLES_1
;
2606 dw1
|= MS_NUMSAMPLES_4
;
2607 dw2
= packed_sample_pos
[0];
2611 assert(dev
->gen
>= ILO_GEN(7));
2612 dw1
|= MS_NUMSAMPLES_8
;
2613 dw2
= packed_sample_pos
[0];
2614 dw3
= packed_sample_pos
[1];
2617 assert(!"unsupported sample count");
2618 dw1
|= MS_NUMSAMPLES_1
;
2624 ilo_cp_begin(cp
, cmd_len
);
2625 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2626 ilo_cp_write(cp
, dw1
);
2627 ilo_cp_write(cp
, dw2
);
2628 if (dev
->gen
>= ILO_GEN(7))
2629 ilo_cp_write(cp
, dw3
);
2634 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
2635 const struct pipe_surface
*surface
,
2638 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2639 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2640 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2641 const uint8_t cmd_len
= 3;
2642 struct ilo_texture
*tex
;
2643 uint32_t slice_offset
, x_offset
, y_offset
;
2646 ILO_GPE_VALID_GEN(dev
, 6, 7);
2648 tex
= (surface
) ? ilo_texture(surface
->texture
) : NULL
;
2649 if (tex
&& surface
->format
!= PIPE_FORMAT_S8_UINT
)
2650 tex
= tex
->separate_s8
;
2653 ilo_cp_begin(cp
, cmd_len
);
2654 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2655 ilo_cp_write(cp
, 0);
2656 ilo_cp_write(cp
, 0);
2662 slice_offset
= ilo_texture_get_slice_offset(tex
,
2663 surface
->u
.tex
.level
, surface
->u
.tex
.first_layer
,
2664 &x_offset
, &y_offset
);
2665 /* XXX X/Y offsets inherit from 3DSTATE_DEPTH_BUFFER */
2668 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2670 * "The pitch must be set to 2x the value computed based on width, as
2671 * the stencil buffer is stored with two rows interleaved."
2673 * According to the classic driver, we need to do the same for GEN7+ even
2674 * though the Ivy Bridge PRM does not say anything about it.
2676 pitch
= 2 * tex
->bo_stride
;
2677 assert(pitch
> 0 && pitch
< 128 * 1024 && pitch
% 128 == 0);
2679 ilo_cp_begin(cp
, cmd_len
);
2680 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2681 ilo_cp_write(cp
, pitch
- 1);
2682 ilo_cp_write_bo(cp
, slice_offset
, tex
->bo
,
2683 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2688 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
2689 const struct pipe_surface
*surface
,
2692 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2693 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2694 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2695 const uint8_t cmd_len
= 3;
2696 struct ilo_texture
*tex
;
2697 uint32_t slice_offset
;
2699 ILO_GPE_VALID_GEN(dev
, 6, 7);
2702 ilo_cp_begin(cp
, cmd_len
);
2703 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2704 ilo_cp_write(cp
, 0);
2705 ilo_cp_write(cp
, 0);
2711 tex
= ilo_texture(surface
->texture
);
2716 assert(tex
->bo_stride
> 0 && tex
->bo_stride
< 128 * 1024 &&
2717 tex
->bo_stride
% 128 == 0);
2719 ilo_cp_begin(cp
, cmd_len
);
2720 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2721 ilo_cp_write(cp
, tex
->bo_stride
- 1);
2722 ilo_cp_write_bo(cp
, slice_offset
, tex
->bo
,
2723 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2728 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
2732 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
2733 const uint8_t cmd_len
= 2;
2735 ILO_GPE_VALID_GEN(dev
, 6, 6);
2737 ilo_cp_begin(cp
, cmd_len
);
2738 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
2739 GEN5_DEPTH_CLEAR_VALID
);
2740 ilo_cp_write(cp
, clear_val
);
2745 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
2747 struct intel_bo
*bo
, uint32_t bo_offset
,
2751 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
2752 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
2753 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
2754 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
2756 ILO_GPE_VALID_GEN(dev
, 6, 7);
2758 if (dw1
& PIPE_CONTROL_CS_STALL
) {
2760 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2762 * "1 of the following must also be set (when CS stall is set):
2764 * * Depth Cache Flush Enable ([0] of DW1)
2765 * * Stall at Pixel Scoreboard ([1] of DW1)
2766 * * Depth Stall ([13] of DW1)
2767 * * Post-Sync Operation ([13] of DW1)
2768 * * Render Target Cache Flush Enable ([12] of DW1)
2769 * * Notify Enable ([8] of DW1)"
2771 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
2773 * "One of the following must also be set (when CS stall is set):
2775 * * Render Target Cache Flush Enable ([12] of DW1)
2776 * * Depth Cache Flush Enable ([0] of DW1)
2777 * * Stall at Pixel Scoreboard ([1] of DW1)
2778 * * Depth Stall ([13] of DW1)
2779 * * Post-Sync Operation ([13] of DW1)"
2781 uint32_t bit_test
= PIPE_CONTROL_WRITE_FLUSH
|
2782 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
2783 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
2784 PIPE_CONTROL_DEPTH_STALL
;
2787 bit_test
|= PIPE_CONTROL_WRITE_IMMEDIATE
|
2788 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
2789 PIPE_CONTROL_WRITE_TIMESTAMP
;
2791 if (dev
->gen
== ILO_GEN(6))
2792 bit_test
|= PIPE_CONTROL_INTERRUPT_ENABLE
;
2794 assert(dw1
& bit_test
);
2797 if (dw1
& PIPE_CONTROL_DEPTH_STALL
) {
2799 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
2801 * "Following bits must be clear (when Depth Stall is set):
2803 * * Render Target Cache Flush Enable ([12] of DW1)
2804 * * Depth Cache Flush Enable ([0] of DW1)"
2806 assert(!(dw1
& (PIPE_CONTROL_WRITE_FLUSH
|
2807 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
2810 ilo_cp_begin(cp
, cmd_len
);
2811 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2812 ilo_cp_write(cp
, dw1
);
2813 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
2814 ilo_cp_write(cp
, 0);
2816 ilo_cp_write(cp
, 0);
2821 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
2822 const struct pipe_draw_info
*info
,
2826 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
2827 const uint8_t cmd_len
= 6;
2828 const int prim
= (rectlist
) ?
2829 _3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
2830 const int vb_access
= (info
->indexed
) ?
2831 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM
:
2832 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
;
2834 ILO_GPE_VALID_GEN(dev
, 6, 6);
2836 ilo_cp_begin(cp
, cmd_len
);
2837 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
2838 prim
<< GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT
|
2840 ilo_cp_write(cp
, info
->count
);
2841 ilo_cp_write(cp
, info
->start
);
2842 ilo_cp_write(cp
, info
->instance_count
);
2843 ilo_cp_write(cp
, info
->start_instance
);
2844 ilo_cp_write(cp
, info
->index_bias
);
2849 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
2850 const struct ilo_shader
**cs
,
2851 uint32_t *sampler_state
,
2853 uint32_t *binding_table_state
,
2859 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
2861 * "(Interface Descriptor Total Length) This field must have the same
2862 * alignment as the Interface Descriptor Data Start Address.
2864 * It must be DQWord (32-byte) aligned..."
2866 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
2868 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
2869 * aligned address of the Interface Descriptor data."
2871 const int state_align
= 32 / 4;
2872 const int state_len
= (32 / 4) * num_ids
;
2873 uint32_t state_offset
, *dw
;
2876 ILO_GPE_VALID_GEN(dev
, 6, 6);
2878 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
2879 state_len
, state_align
, &state_offset
);
2881 for (i
= 0; i
< num_ids
; i
++) {
2884 curbe_read_len
= (cs
[i
]->pcb
.clip_state_size
+ 31) / 32;
2886 dw
[0] = cs
[i
]->cache_offset
;
2887 dw
[1] = 1 << 18; /* SPF */
2888 dw
[2] = sampler_state
[i
] |
2889 (num_samplers
[i
] + 3) / 4 << 2;
2890 dw
[3] = binding_table_state
[i
] |
2892 dw
[4] = curbe_read_len
<< 16 | /* CURBE Read Length */
2893 0; /* CURBE Read Offset */
2894 dw
[5] = 0; /* Barrier ID */
2901 return state_offset
;
2905 ilo_gpe_gen6_fill_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
2906 const struct pipe_viewport_state
*viewports
,
2908 uint32_t *dw
, int num_dwords
)
2912 ILO_GPE_VALID_GEN(dev
, 6, 7);
2913 assert(num_dwords
== 8 * num_viewports
);
2915 for (i
= 0; i
< num_viewports
; i
++) {
2916 const struct pipe_viewport_state
*vp
= &viewports
[i
];
2918 dw
[0] = fui(vp
->scale
[0]);
2919 dw
[1] = fui(vp
->scale
[1]);
2920 dw
[2] = fui(vp
->scale
[2]);
2921 dw
[3] = fui(vp
->translate
[0]);
2922 dw
[4] = fui(vp
->translate
[1]);
2923 dw
[5] = fui(vp
->translate
[2]);
2934 ilo_gpe_gen6_fill_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
2935 const struct pipe_viewport_state
*viewports
,
2937 uint32_t *dw
, int num_dwords
)
2941 ILO_GPE_VALID_GEN(dev
, 6, 7);
2942 assert(num_dwords
== 4 * num_viewports
);
2945 * CLIP_VIEWPORT specifies the guard band.
2947 * Clipping an object that is not entirely inside or outside the viewport
2948 * (that is, trivially accepted or rejected) is expensive. Guard band test
2949 * allows clipping to be skipped in this stage and let the renderer dicards
2950 * pixels that are outside the viewport.
2952 * The reason that we need CLIP_VIEWPORT is that the renderer has a limit
2953 * on the object size. We have to clip normally when the object exceeds
2957 for (i
= 0; i
< num_viewports
; i
++) {
2958 const struct pipe_viewport_state
*vp
= &viewports
[i
];
2960 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
2962 * "Per-Device Guardband Extents
2964 * * Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
2965 * * Maximum Post-Clamp Delta (X or Y): 16K"
2967 * "In addition, in order to be correctly rendered, objects must
2968 * have a screenspace bounding box not exceeding 8K in the X or Y
2969 * direction. This additional restriction must also be
2970 * comprehended by software, i.e., enforced by use of clipping."
2972 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
2974 * "Per-Device Guardband Extents
2976 * * Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
2977 * * Maximum Post-Clamp Delta (X or Y): N/A"
2979 * "In addition, in order to be correctly rendered, objects must
2980 * have a screenspace bounding box not exceeding 8K in the X or Y
2981 * direction. This additional restriction must also be comprehended
2982 * by software, i.e., enforced by use of clipping."
2984 * Combined, the bounding box of any object can not exceed 8K in both
2987 * Below we set the guardband as a squre of length 8K, centered at where
2988 * the viewport is. This makes sure all objects passing the GB test are
2989 * valid to the renderer, and those failing the XY clipping have a
2990 * better chance of passing the GB test.
2992 const float xscale
= fabs(vp
->scale
[0]);
2993 const float yscale
= fabs(vp
->scale
[1]);
2994 const int max_extent
= (dev
->gen
>= ILO_GEN(7)) ? 32768 : 16384;
2995 const int half_len
= 8192 / 2;
2996 int center_x
= (int) vp
->translate
[0];
2997 int center_y
= (int) vp
->translate
[1];
2998 float xmin
, xmax
, ymin
, ymax
;
3000 /* make sure the guardband is within the valid range */
3001 if (center_x
- half_len
< -max_extent
)
3002 center_x
= -max_extent
+ half_len
;
3003 else if (center_x
+ half_len
> max_extent
)
3004 center_x
= max_extent
- half_len
;
3006 if (center_y
- half_len
< -max_extent
)
3007 center_y
= -max_extent
+ half_len
;
3008 else if (center_y
+ half_len
> max_extent
)
3009 center_y
= max_extent
- half_len
;
3011 xmin
= (float) (center_x
- half_len
);
3012 xmax
= (float) (center_x
+ half_len
);
3013 ymin
= (float) (center_y
- half_len
);
3014 ymax
= (float) (center_y
+ half_len
);
3016 /* screen space to NDC space */
3017 xmin
= (xmin
- vp
->translate
[0]) / xscale
;
3018 xmax
= (xmax
- vp
->translate
[0]) / xscale
;
3019 ymin
= (ymin
- vp
->translate
[1]) / yscale
;
3020 ymax
= (ymax
- vp
->translate
[1]) / yscale
;
3032 gen6_fill_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
3033 const struct pipe_viewport_state
*viewports
,
3035 uint32_t *dw
, int num_dwords
)
3039 ILO_GPE_VALID_GEN(dev
, 6, 7);
3040 assert(num_dwords
== 2 * num_viewports
);
3042 for (i
= 0; i
< num_viewports
; i
++) {
3043 const struct pipe_viewport_state
*vp
= &viewports
[i
];
3044 const float scale
= fabs(vp
->scale
[2]);
3045 const float min
= vp
->translate
[2] - scale
;
3046 const float max
= vp
->translate
[2] + scale
;
3056 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
3057 const struct pipe_viewport_state
*viewports
,
3061 const int state_align
= 32 / 4;
3062 const int state_len
= 8 * num_viewports
;
3063 uint32_t state_offset
, *dw
;
3065 ILO_GPE_VALID_GEN(dev
, 6, 6);
3068 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3070 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3071 * stored as an array of up to 16 elements..."
3073 assert(num_viewports
&& num_viewports
<= 16);
3075 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
3076 state_len
, state_align
, &state_offset
);
3078 ilo_gpe_gen6_fill_SF_VIEWPORT(dev
,
3079 viewports
, num_viewports
, dw
, state_len
);
3081 return state_offset
;
3085 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
3086 const struct pipe_viewport_state
*viewports
,
3090 const int state_align
= 32 / 4;
3091 const int state_len
= 4 * num_viewports
;
3092 uint32_t state_offset
, *dw
;
3094 ILO_GPE_VALID_GEN(dev
, 6, 6);
3097 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3099 * "The viewport-related state is stored as an array of up to 16
3102 assert(num_viewports
&& num_viewports
<= 16);
3104 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
3105 state_len
, state_align
, &state_offset
);
3107 ilo_gpe_gen6_fill_CLIP_VIEWPORT(dev
,
3108 viewports
, num_viewports
, dw
, state_len
);
3110 return state_offset
;
3114 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
3115 const struct pipe_viewport_state
*viewports
,
3119 const int state_align
= 32 / 4;
3120 const int state_len
= 2 * num_viewports
;
3121 uint32_t state_offset
, *dw
;
3123 ILO_GPE_VALID_GEN(dev
, 6, 7);
3126 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3128 * "The viewport state is stored as an array of up to 16 elements..."
3130 assert(num_viewports
&& num_viewports
<= 16);
3132 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
3133 state_len
, state_align
, &state_offset
);
3135 gen6_fill_CC_VIEWPORT(dev
, viewports
, num_viewports
, dw
, state_len
);
3137 return state_offset
;
3141 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
3142 const struct pipe_stencil_ref
*stencil_ref
,
3144 const struct pipe_blend_color
*blend_color
,
3147 const int state_align
= 64 / 4;
3148 const int state_len
= 6;
3149 uint32_t state_offset
, *dw
;
3151 ILO_GPE_VALID_GEN(dev
, 6, 7);
3153 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
3154 state_len
, state_align
, &state_offset
);
3156 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
3157 stencil_ref
->ref_value
[1] << 16 |
3158 BRW_ALPHATEST_FORMAT_UNORM8
;
3159 dw
[1] = float_to_ubyte(alpha_ref
);
3160 dw
[2] = fui(blend_color
->color
[0]);
3161 dw
[3] = fui(blend_color
->color
[1]);
3162 dw
[4] = fui(blend_color
->color
[2]);
3163 dw
[5] = fui(blend_color
->color
[3]);
3165 return state_offset
;
3169 gen6_blend_factor_dst_alpha_forced_one(int factor
)
3172 case BRW_BLENDFACTOR_DST_ALPHA
:
3173 return BRW_BLENDFACTOR_ONE
;
3174 case BRW_BLENDFACTOR_INV_DST_ALPHA
:
3175 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
:
3176 return BRW_BLENDFACTOR_ZERO
;
3183 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
3184 const struct pipe_blend_state
*blend
,
3185 const struct pipe_framebuffer_state
*framebuffer
,
3186 const struct pipe_alpha_state
*alpha
,
3189 const int state_align
= 64 / 4;
3191 uint32_t state_offset
, *dw
;
3194 ILO_GPE_VALID_GEN(dev
, 6, 7);
3197 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3199 * "The blend state is stored as an array of up to 8 elements..."
3201 num_targets
= framebuffer
->nr_cbufs
;
3202 assert(num_targets
<= 8);
3205 if (!alpha
->enabled
)
3207 /* to be able to reference alpha func */
3211 state_len
= 2 * num_targets
;
3213 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
3214 state_len
, state_align
, &state_offset
);
3216 for (i
= 0; i
< num_targets
; i
++) {
3217 const int target
= (blend
->independent_blend_enable
) ? i
: 0;
3218 const struct pipe_rt_blend_state
*rt
= &blend
->rt
[target
];
3219 const int num_samples
= (target
< framebuffer
->nr_cbufs
) ?
3220 framebuffer
->cbufs
[target
]->texture
->nr_samples
: 1;
3221 const struct util_format_description
*format_desc
=
3222 (target
< framebuffer
->nr_cbufs
) ?
3223 util_format_description(framebuffer
->cbufs
[target
]->format
) : NULL
;
3224 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
3227 rt_is_pure_integer
= false;
3228 rt_dst_alpha_forced_one
= false;
3233 switch (format_desc
->format
) {
3234 case PIPE_FORMAT_B8G8R8X8_UNORM
:
3235 /* force alpha to one when the HW format has alpha */
3236 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
3237 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM
);
3238 rt_dst_alpha_forced_one
= true;
3244 for (ch
= 0; ch
< 4; ch
++) {
3245 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
3248 if (format_desc
->channel
[ch
].pure_integer
) {
3249 rt_is_unorm
= false;
3250 rt_is_pure_integer
= true;
3254 if (!format_desc
->channel
[ch
].normalized
||
3255 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
3256 rt_is_unorm
= false;
3261 dw
[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT
<< 2 | 0x3;
3264 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3266 * "* Color Buffer Blending and Logic Ops must not be enabled
3267 * simultaneously, or behavior is UNDEFINED.
3269 * * Logic Ops are only supported on *_UNORM surfaces (excluding
3270 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3272 * Since blend->logicop_enable takes precedence over rt->blend_enable,
3273 * and logicop is ignored for non-UNORM color buffers, no special care
3276 if (blend
->logicop_enable
) {
3279 gen6_translate_pipe_logicop(blend
->logicop_func
) << 18;
3282 else if (rt
->blend_enable
&& !rt_is_pure_integer
) {
3283 int rgb_src
, rgb_dst
, a_src
, a_dst
;
3285 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
3286 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
3287 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
3288 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
3290 if (rt_dst_alpha_forced_one
) {
3291 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
3292 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
3293 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
3294 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
3298 gen6_translate_pipe_blend(rt
->alpha_func
) << 26 |
3301 gen6_translate_pipe_blend(rt
->rgb_func
) << 11 |
3305 if (rt
->rgb_func
!= rt
->alpha_func
||
3312 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3314 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3315 * Dither both must be disabled."
3317 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3318 * requires that anyway.
3320 if (num_samples
> 1) {
3321 if (blend
->alpha_to_coverage
)
3324 if (blend
->alpha_to_one
) {
3325 const bool dual_blend
=
3326 (!blend
->logicop_enable
&& rt
->blend_enable
&&
3327 util_blend_state_is_dual(blend
, target
));
3330 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3332 * "If Dual Source Blending is enabled, this bit (AlphaToOne
3333 * Enable) must be disabled."
3339 if (dev
->gen
>= ILO_GEN(7))
3343 if (!(rt
->colormask
& PIPE_MASK_A
))
3345 if (!(rt
->colormask
& PIPE_MASK_R
))
3347 if (!(rt
->colormask
& PIPE_MASK_G
))
3349 if (!(rt
->colormask
& PIPE_MASK_B
))
3353 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3355 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3358 if (alpha
->enabled
&& !rt_is_pure_integer
) {
3360 gen6_translate_dsa_func(alpha
->func
) << 13;
3369 return state_offset
;
3373 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
3374 const struct pipe_depth_stencil_alpha_state
*dsa
,
3377 const int state_align
= 64 / 4;
3378 const int state_len
= 3;
3379 uint32_t state_offset
, *dw
;
3381 ILO_GPE_VALID_GEN(dev
, 6, 7);
3383 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
3384 state_len
, state_align
, &state_offset
);
3387 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3389 * "If the Depth Buffer is either undefined or does not have a surface
3390 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3391 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3393 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3395 * "This field (Stencil Test Enable) cannot be enabled if
3396 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3398 * TODO We do not check these yet.
3400 if (dsa
->stencil
[0].enabled
) {
3401 const struct pipe_stencil_state
*stencil
= &dsa
->stencil
[0];
3404 gen6_translate_dsa_func(stencil
->func
) << 28 |
3405 gen6_translate_pipe_stencil_op(stencil
->fail_op
) << 25 |
3406 gen6_translate_pipe_stencil_op(stencil
->zfail_op
) << 22 |
3407 gen6_translate_pipe_stencil_op(stencil
->zpass_op
) << 19;
3408 if (stencil
->writemask
)
3411 dw
[1] = stencil
->valuemask
<< 24 |
3412 stencil
->writemask
<< 16;
3414 if (dsa
->stencil
[1].enabled
) {
3415 stencil
= &dsa
->stencil
[1];
3418 gen6_translate_dsa_func(stencil
->func
) << 12 |
3419 gen6_translate_pipe_stencil_op(stencil
->fail_op
) << 9 |
3420 gen6_translate_pipe_stencil_op(stencil
->zfail_op
) << 6 |
3421 gen6_translate_pipe_stencil_op(stencil
->zpass_op
) << 3;
3422 if (stencil
->writemask
)
3425 dw
[1] |= stencil
->valuemask
<< 8 |
3435 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3437 * "Enabling the Depth Test function without defining a Depth Buffer is
3440 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3442 * "A Depth Buffer must be defined before enabling writes to it, or
3443 * operation is UNDEFINED."
3445 * TODO We do not check these yet.
3447 dw
[2] = dsa
->depth
.enabled
<< 31 |
3448 dsa
->depth
.writemask
<< 26;
3449 if (dsa
->depth
.enabled
)
3450 dw
[2] |= gen6_translate_dsa_func(dsa
->depth
.func
) << 27;
3452 dw
[2] |= BRW_COMPAREFUNCTION_ALWAYS
<< 27;
3454 return state_offset
;
3458 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
3459 const struct pipe_scissor_state
*scissors
,
3463 const int state_align
= 32 / 4;
3464 const int state_len
= 2 * num_scissors
;
3465 uint32_t state_offset
, *dw
;
3468 ILO_GPE_VALID_GEN(dev
, 6, 7);
3471 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3473 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3474 * stored as an array of up to 16 elements..."
3476 assert(num_scissors
&& num_scissors
<= 16);
3478 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
3479 state_len
, state_align
, &state_offset
);
3481 for (i
= 0; i
< num_scissors
; i
++) {
3482 if (scissors
[i
].minx
< scissors
[i
].maxx
&&
3483 scissors
[i
].miny
< scissors
[i
].maxy
) {
3484 dw
[0] = scissors
[i
].miny
<< 16 | scissors
[i
].minx
;
3485 dw
[1] = (scissors
[i
].maxy
- 1) << 16 | (scissors
[i
].maxx
- 1);
3488 /* we have to make min greater than max as they are both inclusive */
3489 dw
[0] = 1 << 16 | 1;
3496 return state_offset
;
3500 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
3501 uint32_t *surface_states
,
3502 int num_surface_states
,
3505 const int state_align
= 32 / 4;
3506 const int state_len
= num_surface_states
;
3507 uint32_t state_offset
, *dw
;
3509 ILO_GPE_VALID_GEN(dev
, 6, 7);
3512 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3514 * "It is stored as an array of up to 256 elements..."
3516 assert(num_surface_states
<= 256);
3518 if (!num_surface_states
)
3521 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
3522 state_len
, state_align
, &state_offset
);
3523 memcpy(dw
, surface_states
,
3524 num_surface_states
* sizeof(surface_states
[0]));
3526 return state_offset
;
3530 gen6_fill_null_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3531 unsigned width
, unsigned height
,
3532 unsigned depth
, unsigned lod
,
3533 uint32_t *dw
, int num_dwords
)
3535 ILO_GPE_VALID_GEN(dev
, 6, 6);
3536 assert(num_dwords
== 6);
3539 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3541 * "A null surface will be used in instances where an actual surface is
3542 * not bound. When a write message is generated to a null surface, no
3543 * actual surface is written to. When a read message (including any
3544 * sampling engine message) is generated to a null surface, the result
3545 * is all zeros. Note that a null surface type is allowed to be used
3546 * with all messages, even if it is not specificially indicated as
3547 * supported. All of the remaining fields in surface state are ignored
3548 * for null surfaces, with the following exceptions:
3550 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3551 * depth buffer's corresponding state for all render target
3552 * surfaces, including null.
3553 * * Surface Format must be R8G8B8A8_UNORM."
3555 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3557 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3561 dw
[0] = BRW_SURFACE_NULL
<< BRW_SURFACE_TYPE_SHIFT
|
3562 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
;
3566 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
3567 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
3568 lod
<< BRW_SURFACE_LOD_SHIFT
;
3570 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
3578 gen6_fill_buffer_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3579 const struct ilo_buffer
*buf
,
3580 unsigned offset
, unsigned size
,
3581 unsigned struct_size
,
3582 enum pipe_format elem_format
,
3583 bool is_rt
, bool render_cache_rw
,
3584 uint32_t *dw
, int num_dwords
)
3586 const int elem_size
= util_format_get_blocksize(elem_format
);
3587 int width
, height
, depth
, pitch
;
3588 int surface_format
, num_entries
;
3590 ILO_GPE_VALID_GEN(dev
, 6, 6);
3591 assert(num_dwords
== 6);
3594 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3595 * structure in a buffer.
3598 surface_format
= ilo_translate_color_format(elem_format
);
3600 num_entries
= size
/ struct_size
;
3601 /* see if there is enough space to fit another element */
3602 if (size
% struct_size
>= elem_size
)
3606 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3608 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3609 * Address) specifies the base address of first element of the
3610 * surface. The surface is interpreted as a simple array of that
3611 * single element type. The address must be naturally-aligned to the
3612 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3613 * must be 16-byte aligned).
3615 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3616 * the base address of the first element of the surface, computed in
3617 * software by adding the surface base address to the byte offset of
3618 * the element in the buffer."
3621 assert(offset
% elem_size
== 0);
3624 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3626 * "For buffer surfaces, the number of entries in the buffer ranges
3629 assert(num_entries
>= 1 && num_entries
<= 1 << 27);
3632 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3634 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
3635 * indicates the size of the structure."
3637 pitch
= struct_size
;
3642 width
= (num_entries
& 0x0000007f);
3644 height
= (num_entries
& 0x000fff80) >> 7;
3646 depth
= (num_entries
& 0x07f00000) >> 20;
3648 dw
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
3649 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
;
3650 if (render_cache_rw
)
3651 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
3655 dw
[2] = height
<< BRW_SURFACE_HEIGHT_SHIFT
|
3656 width
<< BRW_SURFACE_WIDTH_SHIFT
;
3658 dw
[3] = depth
<< BRW_SURFACE_DEPTH_SHIFT
|
3659 pitch
<< BRW_SURFACE_PITCH_SHIFT
;
3666 gen6_fill_normal_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3667 struct ilo_texture
*tex
,
3668 enum pipe_format format
,
3669 unsigned first_level
, unsigned num_levels
,
3670 unsigned first_layer
, unsigned num_layers
,
3671 bool is_rt
, bool render_cache_rw
,
3672 uint32_t *dw
, int num_dwords
)
3674 int surface_type
, surface_format
;
3675 int width
, height
, depth
, pitch
, lod
;
3676 unsigned layer_offset
, x_offset
, y_offset
;
3678 ILO_GPE_VALID_GEN(dev
, 6, 6);
3679 assert(num_dwords
== 6);
3681 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
3682 assert(surface_type
!= BRW_SURFACE_BUFFER
);
3684 if (format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&& tex
->separate_s8
)
3685 format
= PIPE_FORMAT_Z32_FLOAT
;
3688 surface_format
= ilo_translate_render_format(format
);
3690 surface_format
= ilo_translate_texture_format(format
);
3691 assert(surface_format
>= 0);
3693 width
= tex
->base
.width0
;
3694 height
= tex
->base
.height0
;
3695 pitch
= tex
->bo_stride
;
3697 switch (tex
->base
.target
) {
3698 case PIPE_TEXTURE_3D
:
3699 depth
= tex
->base
.depth0
;
3701 case PIPE_TEXTURE_CUBE
:
3702 case PIPE_TEXTURE_CUBE_ARRAY
:
3704 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3706 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
3707 * range of this field (Depth) is [0,84], indicating the number of
3708 * cube array elements (equal to the number of underlying 2D array
3709 * elements divided by 6). For other surfaces, this field must be
3713 assert(num_layers
% 6 == 0);
3714 depth
= num_layers
/ 6;
3717 assert(num_layers
== 1);
3724 /* sanity check the size */
3725 assert(width
>= 1 && height
>= 1 && depth
>= 1 && pitch
>= 1);
3726 switch (surface_type
) {
3727 case BRW_SURFACE_1D
:
3728 assert(width
<= 8192 && height
== 1 && depth
<= 512);
3730 case BRW_SURFACE_2D
:
3731 assert(width
<= 8192 && height
<= 8192 && depth
<= 512);
3733 case BRW_SURFACE_3D
:
3734 assert(width
<= 2048 && height
<= 2048 && depth
<= 2048);
3736 case BRW_SURFACE_CUBE
:
3737 assert(width
<= 8192 && height
<= 8192 && depth
<= 85);
3738 assert(width
== height
);
3741 assert(!"unexpected surface type");
3745 /* non-full array spacing is supported only on GEN7+ */
3746 assert(tex
->array_spacing_full
);
3747 /* non-interleaved samples are supported only on GEN7+ */
3748 if (tex
->base
.nr_samples
> 1)
3749 assert(tex
->interleaved
);
3752 * Compute the offset to the layer manually.
3754 * For rendering, the hardware requires LOD to be the same for all render
3755 * targets and the depth buffer. We need to compute the offset to the
3756 * layer manually and always set LOD to 0.
3759 /* we lose the capability for layered rendering */
3760 assert(num_levels
== 1 && num_layers
== 1);
3762 layer_offset
= ilo_texture_get_slice_offset(tex
,
3763 first_level
, first_layer
, &x_offset
, &y_offset
);
3765 assert(x_offset
% 4 == 0);
3766 assert(y_offset
% 2 == 0);
3770 /* derive the size for the LOD */
3771 width
= u_minify(tex
->base
.width0
, first_level
);
3772 height
= u_minify(tex
->base
.height0
, first_level
);
3773 if (surface_type
== BRW_SURFACE_3D
)
3774 depth
= u_minify(tex
->base
.depth0
, first_level
);
3784 lod
= num_levels
- 1;
3788 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3790 * "Linear render target surface base addresses must be element-size
3791 * aligned, for non-YUV surface formats, or a multiple of 2
3792 * element-sizes for YUV surface formats. Other linear surfaces have
3793 * no alignment requirements (byte alignment is sufficient.)"
3795 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
3797 * "For linear render target surfaces, the pitch must be a multiple
3798 * of the element size for non-YUV surface formats. Pitch must be a
3799 * multiple of 2 * element size for YUV surface formats."
3801 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
3803 * "For linear surfaces, this field (X Offset) must be zero"
3805 if (tex
->tiling
== INTEL_TILING_NONE
) {
3807 const int elem_size
= util_format_get_blocksize(format
);
3808 assert(layer_offset
% elem_size
== 0);
3809 assert(pitch
% elem_size
== 0);
3815 dw
[0] = surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
3816 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
3817 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
;
3819 if (surface_type
== BRW_SURFACE_CUBE
&& !is_rt
) {
3821 BRW_SURFACE_CUBEFACE_ENABLES
;
3824 if (render_cache_rw
)
3825 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
3827 dw
[1] = layer_offset
;
3829 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
3830 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
3831 lod
<< BRW_SURFACE_LOD_SHIFT
;
3833 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
3834 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
|
3835 ilo_gpe_gen6_translate_winsys_tiling(tex
->tiling
);
3837 dw
[4] = first_level
<< BRW_SURFACE_MIN_LOD_SHIFT
|
3840 ((tex
->base
.nr_samples
> 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4
:
3841 BRW_SURFACE_MULTISAMPLECOUNT_1
);
3843 dw
[5] = x_offset
<< BRW_SURFACE_X_OFFSET_SHIFT
|
3844 y_offset
<< BRW_SURFACE_Y_OFFSET_SHIFT
;
3846 dw
[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE
;
3850 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3851 struct intel_bo
*bo
, bool for_render
,
3852 const uint32_t *dw
, int num_dwords
,
3855 const int state_align
= 32 / 4;
3856 const int state_len
= 6;
3857 uint32_t state_offset
;
3858 uint32_t read_domains
, write_domain
;
3860 ILO_GPE_VALID_GEN(dev
, 6, 6);
3861 assert(num_dwords
== state_len
);
3864 read_domains
= INTEL_DOMAIN_RENDER
;
3865 write_domain
= INTEL_DOMAIN_RENDER
;
3868 read_domains
= INTEL_DOMAIN_SAMPLER
;
3872 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
3873 ilo_cp_write(cp
, dw
[0]);
3874 ilo_cp_write_bo(cp
, dw
[1], bo
, read_domains
, write_domain
);
3875 ilo_cp_write(cp
, dw
[2]);
3876 ilo_cp_write(cp
, dw
[3]);
3877 ilo_cp_write(cp
, dw
[4]);
3878 ilo_cp_write(cp
, dw
[5]);
3881 return state_offset
;
3885 gen6_emit_surf_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3886 const struct pipe_surface
*surface
,
3889 struct intel_bo
*bo
;
3892 ILO_GPE_VALID_GEN(dev
, 6, 6);
3894 if (surface
&& surface
->texture
) {
3895 struct ilo_texture
*tex
= ilo_texture(surface
->texture
);
3900 * classic i965 sets render_cache_rw for constant buffers and sol
3901 * surfaces but not render buffers. Why?
3903 gen6_fill_normal_SURFACE_STATE(dev
, tex
, surface
->format
,
3904 surface
->u
.tex
.level
, 1,
3905 surface
->u
.tex
.first_layer
,
3906 surface
->u
.tex
.last_layer
- surface
->u
.tex
.first_layer
+ 1,
3907 true, true, dw
, Elements(dw
));
3911 gen6_fill_null_SURFACE_STATE(dev
,
3912 surface
->width
, surface
->height
, 1, 0, dw
, Elements(dw
));
3915 return gen6_emit_SURFACE_STATE(dev
, bo
, true, dw
, Elements(dw
), cp
);
3919 gen6_emit_view_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3920 const struct pipe_sampler_view
*view
,
3923 struct ilo_texture
*tex
= ilo_texture(view
->texture
);
3926 ILO_GPE_VALID_GEN(dev
, 6, 6);
3928 gen6_fill_normal_SURFACE_STATE(dev
, tex
, view
->format
,
3929 view
->u
.tex
.first_level
,
3930 view
->u
.tex
.last_level
- view
->u
.tex
.first_level
+ 1,
3931 view
->u
.tex
.first_layer
,
3932 view
->u
.tex
.last_layer
- view
->u
.tex
.first_layer
+ 1,
3933 false, false, dw
, Elements(dw
));
3935 return gen6_emit_SURFACE_STATE(dev
, tex
->bo
, false, dw
, Elements(dw
), cp
);
3939 gen6_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3940 const struct pipe_constant_buffer
*cbuf
,
3943 const enum pipe_format elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
3944 struct ilo_buffer
*buf
= ilo_buffer(cbuf
->buffer
);
3947 ILO_GPE_VALID_GEN(dev
, 6, 6);
3949 gen6_fill_buffer_SURFACE_STATE(dev
, buf
,
3950 cbuf
->buffer_offset
, cbuf
->buffer_size
,
3951 util_format_get_blocksize(elem_format
), elem_format
,
3952 false, false, dw
, Elements(dw
));
3954 return gen6_emit_SURFACE_STATE(dev
, buf
->bo
, false, dw
, Elements(dw
), cp
);
3958 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
3959 const struct pipe_stream_output_target
*so
,
3960 const struct pipe_stream_output_info
*so_info
,
3964 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
3965 unsigned bo_offset
, struct_size
;
3966 enum pipe_format elem_format
;
3969 ILO_GPE_VALID_GEN(dev
, 6, 6);
3971 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
3972 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
3974 switch (so_info
->output
[so_index
].num_components
) {
3976 elem_format
= PIPE_FORMAT_R32_FLOAT
;
3979 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
3982 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
3985 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
3988 assert(!"unexpected SO components length");
3989 elem_format
= PIPE_FORMAT_R32_FLOAT
;
3993 gen6_fill_buffer_SURFACE_STATE(dev
, buf
, bo_offset
, so
->buffer_size
,
3994 struct_size
, elem_format
, false, true, dw
, Elements(dw
));
3996 return gen6_emit_SURFACE_STATE(dev
, buf
->bo
, false, dw
, Elements(dw
), cp
);
4000 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
4001 const struct pipe_sampler_state
**samplers
,
4002 const struct pipe_sampler_view
**sampler_views
,
4003 const uint32_t *sampler_border_colors
,
4007 const int state_align
= 32 / 4;
4008 const int state_len
= 4 * num_samplers
;
4009 uint32_t state_offset
, *dw
;
4012 ILO_GPE_VALID_GEN(dev
, 6, 7);
4015 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4017 * "The sampler state is stored as an array of up to 16 elements..."
4019 assert(num_samplers
<= 16);
4024 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
4025 state_len
, state_align
, &state_offset
);
4027 for (i
= 0; i
< num_samplers
; i
++) {
4028 const struct pipe_sampler_state
*sampler
= samplers
[i
];
4029 const struct pipe_sampler_view
*view
= sampler_views
[i
];
4030 const uint32_t border_color
= sampler_border_colors
[i
];
4031 enum pipe_texture_target target
;
4032 int mip_filter
, min_filter
, mag_filter
, max_aniso
;
4033 int lod_bias
, max_lod
, min_lod
, base_level
;
4034 int wrap_s
, wrap_t
, wrap_r
;
4037 /* there may be holes */
4038 if (!sampler
|| !view
) {
4039 /* disabled sampler */
4049 target
= view
->texture
->target
;
4051 /* determine mip/min/mag filters */
4052 mip_filter
= gen6_translate_tex_mipfilter(sampler
->min_mip_filter
);
4055 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4057 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4058 * surfaces of type SURFTYPE_3D."
4060 if (sampler
->max_anisotropy
&& target
!= PIPE_TEXTURE_3D
) {
4061 min_filter
= BRW_MAPFILTER_ANISOTROPIC
;
4062 mag_filter
= BRW_MAPFILTER_ANISOTROPIC
;
4064 if (sampler
->max_anisotropy
>= 2 && sampler
->max_anisotropy
<= 16)
4065 max_aniso
= sampler
->max_anisotropy
/ 2 - 1;
4066 else if (sampler
->max_anisotropy
> 16)
4067 max_aniso
= BRW_ANISORATIO_16
;
4069 max_aniso
= BRW_ANISORATIO_2
;
4072 min_filter
= gen6_translate_tex_filter(sampler
->min_img_filter
);
4073 mag_filter
= gen6_translate_tex_filter(sampler
->mag_img_filter
);
4080 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4081 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
4082 * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
4083 * additionally clamping the texture coordinates to [0.0, 1.0].
4085 * The clamping is taken care of in the shaders. There are two filters
4086 * here, but let the minification one has a say.
4088 clamp_to_edge
= (sampler
->min_img_filter
== PIPE_TEX_FILTER_NEAREST
);
4091 case PIPE_TEXTURE_CUBE
:
4093 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4095 * "When using cube map texture coordinates, only
4096 * TEXCOORDMODE_CLAMP and TEXCOORDMODE_CUBE settings are valid,
4097 * and each TC component must have the same Address Control
4100 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4102 * "This field (Cube Surface Control Mode) must be set to
4103 * CUBECTRLMODE_PROGRAMMED"
4105 * Therefore, we cannot use "Cube Surface Control Mode" for semless
4106 * cube map filtering.
4108 if (sampler
->seamless_cube_map
&&
4109 (sampler
->min_img_filter
!= PIPE_TEX_FILTER_NEAREST
||
4110 sampler
->mag_img_filter
!= PIPE_TEX_FILTER_NEAREST
)) {
4111 wrap_s
= BRW_TEXCOORDMODE_CUBE
;
4112 wrap_t
= BRW_TEXCOORDMODE_CUBE
;
4113 wrap_r
= BRW_TEXCOORDMODE_CUBE
;
4116 wrap_s
= BRW_TEXCOORDMODE_CLAMP
;
4117 wrap_t
= BRW_TEXCOORDMODE_CLAMP
;
4118 wrap_r
= BRW_TEXCOORDMODE_CLAMP
;
4121 case PIPE_TEXTURE_1D
:
4122 wrap_s
= gen6_translate_tex_wrap(sampler
->wrap_s
, clamp_to_edge
);
4124 * as noted in the classic i965 driver, the HW may look at these
4125 * values so we need to set them to a safe mode
4127 wrap_t
= BRW_TEXCOORDMODE_WRAP
;
4128 wrap_r
= BRW_TEXCOORDMODE_WRAP
;
4131 wrap_s
= gen6_translate_tex_wrap(sampler
->wrap_s
, clamp_to_edge
);
4132 wrap_t
= gen6_translate_tex_wrap(sampler
->wrap_t
, clamp_to_edge
);
4133 wrap_r
= gen6_translate_tex_wrap(sampler
->wrap_r
, clamp_to_edge
);
4138 * Here is how the hardware calculate per-pixel LOD, from my reading of
4141 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4142 * other ways. The number of texels is measured using level
4144 * 2) Bias is added to LOD.
4145 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4146 * compared with Base to determine whether magnification or
4147 * minification is needed.
4148 * (if preclamp is disabled, LOD is compared with Base before
4150 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4151 * set to floor(MinLod).
4152 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4154 * With Gallium interface, Base is always zero and view->u.tex.first_level
4155 * specifies SurfMinLod.
4157 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4159 * "[DevSNB] Errata: Incorrect behavior is observed in cases where
4160 * the min and mag mode filters are different and SurfMinLOD is
4161 * nonzero. The determination of MagMode uses the following equation
4162 * instead of the one in the above pseudocode: MagMode = (LOD +
4163 * SurfMinLOD - Base <= 0)"
4165 * As a way to work around that, we set Base to view->u.tex.first_level
4168 if (dev
->gen
>= ILO_GEN(7)) {
4169 const float scale
= 256.0f
;
4171 /* [-16.0, 16.0) in S4.8 */
4173 (CLAMP(sampler
->lod_bias
, -16.0f
, 15.9f
) * scale
);
4178 /* [0.0, 14.0] in U4.8 */
4179 max_lod
= (int) (CLAMP(sampler
->max_lod
, 0.0f
, 14.0f
) * scale
);
4180 min_lod
= (int) (CLAMP(sampler
->min_lod
, 0.0f
, 14.0f
) * scale
);
4183 const float scale
= 64.0f
;
4185 /* [-16.0, 16.0) in S4.6 */
4187 (CLAMP(sampler
->lod_bias
, -16.0f
, 15.9f
) * scale
);
4190 base_level
= view
->u
.tex
.first_level
;
4192 /* [0.0, 13.0] in U4.6 */
4193 max_lod
= (int) (CLAMP(sampler
->max_lod
, 0.0f
, 13.0f
) * scale
);
4194 min_lod
= (int) (CLAMP(sampler
->min_lod
, 0.0f
, 13.0f
) * scale
);
4198 * We want LOD to be clamped to determine magnification/minification,
4199 * and get set to zero when it is magnification or when mipmapping is
4200 * disabled. The hardware would set LOD to floor(MinLod) and that is a
4201 * problem when MinLod is greater than or equal to 1.0f.
4203 * We know that with Base being zero, it is always minification when
4204 * MinLod is non-zero. To meet our need, we just need to set MinLod to
4205 * zero and set MagFilter to MinFilter when mipmapping is disabled.
4207 if (sampler
->min_mip_filter
== PIPE_TEX_MIPFILTER_NONE
&& min_lod
) {
4209 mag_filter
= min_filter
;
4212 if (!sampler
->normalized_coords
) {
4213 /* work around a bug in util_blitter */
4214 mip_filter
= BRW_MIPFILTER_NONE
;
4217 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4219 * "The following state must be set as indicated if this field
4220 * (Non-normalized Coordinate Enable) is enabled:
4222 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4223 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4224 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4225 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4227 * - Min Mode Filter must be MAPFILTER_NEAREST or
4229 * - Mip Mode Filter must be MIPFILTER_NONE.
4230 * - Min LOD must be 0.
4231 * - Max LOD must be 0.
4232 * - MIP Count must be 0.
4233 * - Surface Min LOD must be 0.
4234 * - Texture LOD Bias must be 0."
4236 assert(wrap_s
== BRW_TEXCOORDMODE_CLAMP
||
4237 wrap_s
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4238 assert(wrap_t
== BRW_TEXCOORDMODE_CLAMP
||
4239 wrap_t
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4240 assert(wrap_r
== BRW_TEXCOORDMODE_CLAMP
||
4241 wrap_r
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4243 assert(target
== PIPE_TEXTURE_RECT
);
4245 assert(mag_filter
== BRW_MAPFILTER_NEAREST
||
4246 mag_filter
== BRW_MAPFILTER_LINEAR
);
4247 assert(min_filter
== BRW_MAPFILTER_NEAREST
||
4248 min_filter
== BRW_MAPFILTER_LINEAR
);
4249 assert(mip_filter
== BRW_MIPFILTER_NONE
);
4252 if (dev
->gen
>= ILO_GEN(7)) {
4260 /* enable EWA filtering unconditionally breaks some piglit tests */
4261 if (sampler
->max_anisotropy
)
4264 dw
[1] = min_lod
<< 20 |
4267 if (sampler
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
4268 dw
[1] |= gen6_translate_shadow_func(sampler
->compare_func
) << 1;
4270 assert(!(border_color
& 0x1f));
4271 dw
[2] = border_color
;
4273 dw
[3] = max_aniso
<< 19 |
4278 /* round the coordinates for linear filtering */
4279 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
4280 dw
[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
4281 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
4282 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
4284 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
4285 dw
[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
4286 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
4287 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
4290 if (!sampler
->normalized_coords
)
4295 (min_filter
!= mag_filter
) << 27 |
4302 if (sampler
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
4303 dw
[0] |= gen6_translate_shadow_func(sampler
->compare_func
);
4305 dw
[1] = min_lod
<< 22 |
4311 assert(!(border_color
& 0x1f));
4312 dw
[2] = border_color
;
4314 dw
[3] = max_aniso
<< 19;
4316 /* round the coordinates for linear filtering */
4317 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
4318 dw
[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
4319 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
4320 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
4322 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
4323 dw
[3] |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
4324 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
4325 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
4328 if (!sampler
->normalized_coords
)
4335 return state_offset
;
4339 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
4340 const union pipe_color_union
*color
,
4343 const int state_align
= 32 / 4;
4344 const int state_len
= 12;
4345 uint32_t state_offset
, *dw
;
4347 color
->f
[0], color
->f
[1], color
->f
[2], color
->f
[3],
4350 ILO_GPE_VALID_GEN(dev
, 6, 6);
4352 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
4353 state_len
, state_align
, &state_offset
);
4356 * This state is not documented in the Sandy Bridge PRM, but in the
4357 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4361 dw
[1] = fui(rgba
[0]);
4362 dw
[2] = fui(rgba
[1]);
4363 dw
[3] = fui(rgba
[2]);
4364 dw
[4] = fui(rgba
[3]);
4367 dw
[5] = util_float_to_half(rgba
[0]) |
4368 util_float_to_half(rgba
[1]) << 16;
4369 dw
[6] = util_float_to_half(rgba
[2]) |
4370 util_float_to_half(rgba
[3]) << 16;
4372 /* clamp to [-1.0f, 1.0f] */
4373 rgba
[0] = CLAMP(rgba
[0], -1.0f
, 1.0f
);
4374 rgba
[1] = CLAMP(rgba
[1], -1.0f
, 1.0f
);
4375 rgba
[2] = CLAMP(rgba
[2], -1.0f
, 1.0f
);
4376 rgba
[3] = CLAMP(rgba
[3], -1.0f
, 1.0f
);
4379 dw
[9] = (int16_t) util_iround(rgba
[0] * 32767.0f
) |
4380 (int16_t) util_iround(rgba
[1] * 32767.0f
) << 16;
4381 dw
[10] = (int16_t) util_iround(rgba
[2] * 32767.0f
) |
4382 (int16_t) util_iround(rgba
[3] * 32767.0f
) << 16;
4385 dw
[11] = (int8_t) util_iround(rgba
[0] * 127.0f
) |
4386 (int8_t) util_iround(rgba
[1] * 127.0f
) << 8 |
4387 (int8_t) util_iround(rgba
[2] * 127.0f
) << 16 |
4388 (int8_t) util_iround(rgba
[3] * 127.0f
) << 24;
4390 /* clamp to [0.0f, 1.0f] */
4391 rgba
[0] = CLAMP(rgba
[0], 0.0f
, 1.0f
);
4392 rgba
[1] = CLAMP(rgba
[1], 0.0f
, 1.0f
);
4393 rgba
[2] = CLAMP(rgba
[2], 0.0f
, 1.0f
);
4394 rgba
[3] = CLAMP(rgba
[3], 0.0f
, 1.0f
);
4397 dw
[0] = (uint8_t) util_iround(rgba
[0] * 255.0f
) |
4398 (uint8_t) util_iround(rgba
[1] * 255.0f
) << 8 |
4399 (uint8_t) util_iround(rgba
[2] * 255.0f
) << 16 |
4400 (uint8_t) util_iround(rgba
[3] * 255.0f
) << 24;
4403 dw
[7] = (uint16_t) util_iround(rgba
[0] * 65535.0f
) |
4404 (uint16_t) util_iround(rgba
[1] * 65535.0f
) << 16;
4405 dw
[8] = (uint16_t) util_iround(rgba
[2] * 65535.0f
) |
4406 (uint16_t) util_iround(rgba
[3] * 65535.0f
) << 16;
4408 return state_offset
;
4412 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
4413 int size
, void **pcb
,
4417 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4418 * to 32 bytes, and their sizes are specified in 256-bit units.
4420 const int state_align
= 32 / 4;
4421 const int state_len
= align(size
, 32) / 4;
4422 uint32_t state_offset
;
4425 ILO_GPE_VALID_GEN(dev
, 6, 7);
4427 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
4428 state_len
, state_align
, &state_offset
);
4430 /* zero out the unused range */
4431 if (size
< state_len
* 4)
4432 memset(&buf
[size
], 0, state_len
* 4 - size
);
4437 return state_offset
;
4441 gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
4442 enum ilo_gpe_gen6_command cmd
,
4445 static const struct {
4448 } gen6_command_size_table
[ILO_GPE_GEN6_COMMAND_COUNT
] = {
4449 [ILO_GPE_GEN6_STATE_BASE_ADDRESS
] = { 0, 10 },
4450 [ILO_GPE_GEN6_STATE_SIP
] = { 0, 2 },
4451 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
] = { 0, 1 },
4452 [ILO_GPE_GEN6_PIPELINE_SELECT
] = { 0, 1 },
4453 [ILO_GPE_GEN6_MEDIA_VFE_STATE
] = { 0, 8 },
4454 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD
] = { 0, 4 },
4455 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
] = { 0, 4 },
4456 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
] = { 0, 2 },
4457 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH
] = { 0, 2 },
4458 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
] = { 17, 1 },
4459 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
] = { 0, 4 },
4460 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
] = { 0, 4 },
4461 [ILO_GPE_GEN6_3DSTATE_URB
] = { 0, 3 },
4462 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
] = { 1, 4 },
4463 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
] = { 1, 2 },
4464 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
] = { 0, 3 },
4465 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
] = { 0, 4 },
4466 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
] = { 0, 4 },
4467 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
] = { 0, 2 },
4468 [ILO_GPE_GEN6_3DSTATE_VS
] = { 0, 6 },
4469 [ILO_GPE_GEN6_3DSTATE_GS
] = { 0, 7 },
4470 [ILO_GPE_GEN6_3DSTATE_CLIP
] = { 0, 4 },
4471 [ILO_GPE_GEN6_3DSTATE_SF
] = { 0, 20 },
4472 [ILO_GPE_GEN6_3DSTATE_WM
] = { 0, 9 },
4473 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
] = { 0, 5 },
4474 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
] = { 0, 5 },
4475 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
] = { 0, 5 },
4476 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
] = { 0, 2 },
4477 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
] = { 0, 4 },
4478 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
] = { 0, 7 },
4479 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
] = { 0, 2 },
4480 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
] = { 0, 33 },
4481 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
] = { 0, 3 },
4482 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
] = { 0, 3 },
4483 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
] = { 0, 4 },
4484 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
] = { 0, 3 },
4485 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
] = { 0, 3 },
4486 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
] = { 0, 3 },
4487 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
] = { 0, 2 },
4488 [ILO_GPE_GEN6_PIPE_CONTROL
] = { 0, 5 },
4489 [ILO_GPE_GEN6_3DPRIMITIVE
] = { 0, 6 },
4491 const int header
= gen6_command_size_table
[cmd
].header
;
4492 const int body
= gen6_command_size_table
[arg
].body
;
4493 const int count
= arg
;
4495 ILO_GPE_VALID_GEN(dev
, 6, 6);
4496 assert(cmd
< ILO_GPE_GEN6_COMMAND_COUNT
);
4498 return (likely(count
)) ? header
+ body
* count
: 0;
4502 gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
4503 enum ilo_gpe_gen6_state state
,
4506 static const struct {
4510 } gen6_state_size_table
[ILO_GPE_GEN6_STATE_COUNT
] = {
4511 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
] = { 8, 8, true },
4512 [ILO_GPE_GEN6_SF_VIEWPORT
] = { 8, 8, true },
4513 [ILO_GPE_GEN6_CLIP_VIEWPORT
] = { 8, 4, true },
4514 [ILO_GPE_GEN6_CC_VIEWPORT
] = { 8, 2, true },
4515 [ILO_GPE_GEN6_COLOR_CALC_STATE
] = { 16, 6, false },
4516 [ILO_GPE_GEN6_BLEND_STATE
] = { 16, 2, true },
4517 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE
] = { 16, 3, false },
4518 [ILO_GPE_GEN6_SCISSOR_RECT
] = { 8, 2, true },
4519 [ILO_GPE_GEN6_BINDING_TABLE_STATE
] = { 8, 1, true },
4520 [ILO_GPE_GEN6_SURFACE_STATE
] = { 8, 6, false },
4521 [ILO_GPE_GEN6_SAMPLER_STATE
] = { 8, 4, true },
4522 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
] = { 8, 12, false },
4523 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
] = { 8, 1, true },
4525 const int alignment
= gen6_state_size_table
[state
].alignment
;
4526 const int body
= gen6_state_size_table
[state
].body
;
4527 const bool is_array
= gen6_state_size_table
[state
].is_array
;
4528 const int count
= arg
;
4531 ILO_GPE_VALID_GEN(dev
, 6, 6);
4532 assert(state
< ILO_GPE_GEN6_STATE_COUNT
);
4534 if (likely(count
)) {
4536 estimate
= (alignment
- 1) + body
* count
;
4539 estimate
= (alignment
- 1) + body
;
4540 /* all states are aligned */
4542 estimate
+= util_align_npot(body
, alignment
) * (count
- 1);
4552 static const struct ilo_gpe_gen6 gen6_gpe
= {
4553 .estimate_command_size
= gen6_estimate_command_size
,
4554 .estimate_state_size
= gen6_estimate_state_size
,
4556 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4557 GEN6_SET(STATE_BASE_ADDRESS
),
4558 GEN6_SET(STATE_SIP
),
4559 GEN6_SET(3DSTATE_VF_STATISTICS
),
4560 GEN6_SET(PIPELINE_SELECT
),
4561 GEN6_SET(MEDIA_VFE_STATE
),
4562 GEN6_SET(MEDIA_CURBE_LOAD
),
4563 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD
),
4564 GEN6_SET(MEDIA_GATEWAY_STATE
),
4565 GEN6_SET(MEDIA_STATE_FLUSH
),
4566 GEN6_SET(MEDIA_OBJECT_WALKER
),
4567 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS
),
4568 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS
),
4569 GEN6_SET(3DSTATE_URB
),
4570 GEN6_SET(3DSTATE_VERTEX_BUFFERS
),
4571 GEN6_SET(3DSTATE_VERTEX_ELEMENTS
),
4572 GEN6_SET(3DSTATE_INDEX_BUFFER
),
4573 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS
),
4574 GEN6_SET(3DSTATE_CC_STATE_POINTERS
),
4575 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS
),
4576 GEN6_SET(3DSTATE_VS
),
4577 GEN6_SET(3DSTATE_GS
),
4578 GEN6_SET(3DSTATE_CLIP
),
4579 GEN6_SET(3DSTATE_SF
),
4580 GEN6_SET(3DSTATE_WM
),
4581 GEN6_SET(3DSTATE_CONSTANT_VS
),
4582 GEN6_SET(3DSTATE_CONSTANT_GS
),
4583 GEN6_SET(3DSTATE_CONSTANT_PS
),
4584 GEN6_SET(3DSTATE_SAMPLE_MASK
),
4585 GEN6_SET(3DSTATE_DRAWING_RECTANGLE
),
4586 GEN6_SET(3DSTATE_DEPTH_BUFFER
),
4587 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET
),
4588 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN
),
4589 GEN6_SET(3DSTATE_LINE_STIPPLE
),
4590 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS
),
4591 GEN6_SET(3DSTATE_GS_SVB_INDEX
),
4592 GEN6_SET(3DSTATE_MULTISAMPLE
),
4593 GEN6_SET(3DSTATE_STENCIL_BUFFER
),
4594 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER
),
4595 GEN6_SET(3DSTATE_CLEAR_PARAMS
),
4596 GEN6_SET(PIPE_CONTROL
),
4597 GEN6_SET(3DPRIMITIVE
),
4598 GEN6_SET(INTERFACE_DESCRIPTOR_DATA
),
4599 GEN6_SET(SF_VIEWPORT
),
4600 GEN6_SET(CLIP_VIEWPORT
),
4601 GEN6_SET(CC_VIEWPORT
),
4602 GEN6_SET(COLOR_CALC_STATE
),
4603 GEN6_SET(BLEND_STATE
),
4604 GEN6_SET(DEPTH_STENCIL_STATE
),
4605 GEN6_SET(SCISSOR_RECT
),
4606 GEN6_SET(BINDING_TABLE_STATE
),
4607 GEN6_SET(surf_SURFACE_STATE
),
4608 GEN6_SET(view_SURFACE_STATE
),
4609 GEN6_SET(cbuf_SURFACE_STATE
),
4610 GEN6_SET(so_SURFACE_STATE
),
4611 GEN6_SET(SAMPLER_STATE
),
4612 GEN6_SET(SAMPLER_BORDER_COLOR_STATE
),
4613 GEN6_SET(push_constant_buffer
),
4617 const struct ilo_gpe_gen6
*
4618 ilo_gpe_gen6_get(void)