2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
33 #include "ilo_context.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
42 * Translate winsys tiling to hardware tiling.
45 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
48 case INTEL_TILING_NONE
:
51 return BRW_SURFACE_TILED
;
53 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
55 assert(!"unknown tiling");
61 * Translate a pipe primitive type to the matching hardware primitive type.
64 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
66 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
67 [PIPE_PRIM_POINTS
] = _3DPRIM_POINTLIST
,
68 [PIPE_PRIM_LINES
] = _3DPRIM_LINELIST
,
69 [PIPE_PRIM_LINE_LOOP
] = _3DPRIM_LINELOOP
,
70 [PIPE_PRIM_LINE_STRIP
] = _3DPRIM_LINESTRIP
,
71 [PIPE_PRIM_TRIANGLES
] = _3DPRIM_TRILIST
,
72 [PIPE_PRIM_TRIANGLE_STRIP
] = _3DPRIM_TRISTRIP
,
73 [PIPE_PRIM_TRIANGLE_FAN
] = _3DPRIM_TRIFAN
,
74 [PIPE_PRIM_QUADS
] = _3DPRIM_QUADLIST
,
75 [PIPE_PRIM_QUAD_STRIP
] = _3DPRIM_QUADSTRIP
,
76 [PIPE_PRIM_POLYGON
] = _3DPRIM_POLYGON
,
77 [PIPE_PRIM_LINES_ADJACENCY
] = _3DPRIM_LINELIST_ADJ
,
78 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = _3DPRIM_LINESTRIP_ADJ
,
79 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = _3DPRIM_TRILIST_ADJ
,
80 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = _3DPRIM_TRISTRIP_ADJ
,
83 assert(prim_mapping
[prim
]);
85 return prim_mapping
[prim
];
89 * Translate a pipe texture target to the matching hardware surface type.
92 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
96 return BRW_SURFACE_BUFFER
;
98 case PIPE_TEXTURE_1D_ARRAY
:
99 return BRW_SURFACE_1D
;
100 case PIPE_TEXTURE_2D
:
101 case PIPE_TEXTURE_RECT
:
102 case PIPE_TEXTURE_2D_ARRAY
:
103 return BRW_SURFACE_2D
;
104 case PIPE_TEXTURE_3D
:
105 return BRW_SURFACE_3D
;
106 case PIPE_TEXTURE_CUBE
:
107 case PIPE_TEXTURE_CUBE_ARRAY
:
108 return BRW_SURFACE_CUBE
;
110 assert(!"unknown texture target");
111 return BRW_SURFACE_BUFFER
;
116 * Translate a depth/stencil pipe format to the matching hardware
117 * format. Return -1 on errors.
120 gen6_translate_depth_format(enum pipe_format format
)
123 case PIPE_FORMAT_Z16_UNORM
:
124 return BRW_DEPTHFORMAT_D16_UNORM
;
125 case PIPE_FORMAT_Z32_FLOAT
:
126 return BRW_DEPTHFORMAT_D32_FLOAT
;
127 case PIPE_FORMAT_Z24X8_UNORM
:
128 return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
129 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
130 return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
131 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
132 return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
139 * Translate a pipe logicop to the matching hardware logicop.
142 gen6_translate_pipe_logicop(unsigned logicop
)
145 case PIPE_LOGICOP_CLEAR
: return BRW_LOGICOPFUNCTION_CLEAR
;
146 case PIPE_LOGICOP_NOR
: return BRW_LOGICOPFUNCTION_NOR
;
147 case PIPE_LOGICOP_AND_INVERTED
: return BRW_LOGICOPFUNCTION_AND_INVERTED
;
148 case PIPE_LOGICOP_COPY_INVERTED
: return BRW_LOGICOPFUNCTION_COPY_INVERTED
;
149 case PIPE_LOGICOP_AND_REVERSE
: return BRW_LOGICOPFUNCTION_AND_REVERSE
;
150 case PIPE_LOGICOP_INVERT
: return BRW_LOGICOPFUNCTION_INVERT
;
151 case PIPE_LOGICOP_XOR
: return BRW_LOGICOPFUNCTION_XOR
;
152 case PIPE_LOGICOP_NAND
: return BRW_LOGICOPFUNCTION_NAND
;
153 case PIPE_LOGICOP_AND
: return BRW_LOGICOPFUNCTION_AND
;
154 case PIPE_LOGICOP_EQUIV
: return BRW_LOGICOPFUNCTION_EQUIV
;
155 case PIPE_LOGICOP_NOOP
: return BRW_LOGICOPFUNCTION_NOOP
;
156 case PIPE_LOGICOP_OR_INVERTED
: return BRW_LOGICOPFUNCTION_OR_INVERTED
;
157 case PIPE_LOGICOP_COPY
: return BRW_LOGICOPFUNCTION_COPY
;
158 case PIPE_LOGICOP_OR_REVERSE
: return BRW_LOGICOPFUNCTION_OR_REVERSE
;
159 case PIPE_LOGICOP_OR
: return BRW_LOGICOPFUNCTION_OR
;
160 case PIPE_LOGICOP_SET
: return BRW_LOGICOPFUNCTION_SET
;
162 assert(!"unknown logicop function");
163 return BRW_LOGICOPFUNCTION_CLEAR
;
168 * Translate a pipe blend function to the matching hardware blend function.
171 gen6_translate_pipe_blend(unsigned blend
)
174 case PIPE_BLEND_ADD
: return BRW_BLENDFUNCTION_ADD
;
175 case PIPE_BLEND_SUBTRACT
: return BRW_BLENDFUNCTION_SUBTRACT
;
176 case PIPE_BLEND_REVERSE_SUBTRACT
: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT
;
177 case PIPE_BLEND_MIN
: return BRW_BLENDFUNCTION_MIN
;
178 case PIPE_BLEND_MAX
: return BRW_BLENDFUNCTION_MAX
;
180 assert(!"unknown blend function");
181 return BRW_BLENDFUNCTION_ADD
;
186 * Translate a pipe blend factor to the matching hardware blend factor.
189 gen6_translate_pipe_blendfactor(unsigned blendfactor
)
191 switch (blendfactor
) {
192 case PIPE_BLENDFACTOR_ONE
: return BRW_BLENDFACTOR_ONE
;
193 case PIPE_BLENDFACTOR_SRC_COLOR
: return BRW_BLENDFACTOR_SRC_COLOR
;
194 case PIPE_BLENDFACTOR_SRC_ALPHA
: return BRW_BLENDFACTOR_SRC_ALPHA
;
195 case PIPE_BLENDFACTOR_DST_ALPHA
: return BRW_BLENDFACTOR_DST_ALPHA
;
196 case PIPE_BLENDFACTOR_DST_COLOR
: return BRW_BLENDFACTOR_DST_COLOR
;
197 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
;
198 case PIPE_BLENDFACTOR_CONST_COLOR
: return BRW_BLENDFACTOR_CONST_COLOR
;
199 case PIPE_BLENDFACTOR_CONST_ALPHA
: return BRW_BLENDFACTOR_CONST_ALPHA
;
200 case PIPE_BLENDFACTOR_SRC1_COLOR
: return BRW_BLENDFACTOR_SRC1_COLOR
;
201 case PIPE_BLENDFACTOR_SRC1_ALPHA
: return BRW_BLENDFACTOR_SRC1_ALPHA
;
202 case PIPE_BLENDFACTOR_ZERO
: return BRW_BLENDFACTOR_ZERO
;
203 case PIPE_BLENDFACTOR_INV_SRC_COLOR
: return BRW_BLENDFACTOR_INV_SRC_COLOR
;
204 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: return BRW_BLENDFACTOR_INV_SRC_ALPHA
;
205 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: return BRW_BLENDFACTOR_INV_DST_ALPHA
;
206 case PIPE_BLENDFACTOR_INV_DST_COLOR
: return BRW_BLENDFACTOR_INV_DST_COLOR
;
207 case PIPE_BLENDFACTOR_INV_CONST_COLOR
: return BRW_BLENDFACTOR_INV_CONST_COLOR
;
208 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: return BRW_BLENDFACTOR_INV_CONST_ALPHA
;
209 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
: return BRW_BLENDFACTOR_INV_SRC1_COLOR
;
210 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
: return BRW_BLENDFACTOR_INV_SRC1_ALPHA
;
212 assert(!"unknown blend factor");
213 return BRW_BLENDFACTOR_ONE
;
218 * Translate a pipe stencil op to the matching hardware stencil op.
221 gen6_translate_pipe_stencil_op(unsigned stencil_op
)
223 switch (stencil_op
) {
224 case PIPE_STENCIL_OP_KEEP
: return BRW_STENCILOP_KEEP
;
225 case PIPE_STENCIL_OP_ZERO
: return BRW_STENCILOP_ZERO
;
226 case PIPE_STENCIL_OP_REPLACE
: return BRW_STENCILOP_REPLACE
;
227 case PIPE_STENCIL_OP_INCR
: return BRW_STENCILOP_INCRSAT
;
228 case PIPE_STENCIL_OP_DECR
: return BRW_STENCILOP_DECRSAT
;
229 case PIPE_STENCIL_OP_INCR_WRAP
: return BRW_STENCILOP_INCR
;
230 case PIPE_STENCIL_OP_DECR_WRAP
: return BRW_STENCILOP_DECR
;
231 case PIPE_STENCIL_OP_INVERT
: return BRW_STENCILOP_INVERT
;
233 assert(!"unknown stencil op");
234 return BRW_STENCILOP_KEEP
;
239 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
242 gen6_translate_tex_mipfilter(unsigned filter
)
245 case PIPE_TEX_MIPFILTER_NEAREST
: return BRW_MIPFILTER_NEAREST
;
246 case PIPE_TEX_MIPFILTER_LINEAR
: return BRW_MIPFILTER_LINEAR
;
247 case PIPE_TEX_MIPFILTER_NONE
: return BRW_MIPFILTER_NONE
;
249 assert(!"unknown mipfilter");
250 return BRW_MIPFILTER_NONE
;
255 * Translate a pipe texture filter to the matching hardware mapfilter.
258 gen6_translate_tex_filter(unsigned filter
)
261 case PIPE_TEX_FILTER_NEAREST
: return BRW_MAPFILTER_NEAREST
;
262 case PIPE_TEX_FILTER_LINEAR
: return BRW_MAPFILTER_LINEAR
;
264 assert(!"unknown sampler filter");
265 return BRW_MAPFILTER_NEAREST
;
270 * Translate a pipe texture coordinate wrapping mode to the matching hardware
274 gen6_translate_tex_wrap(unsigned wrap
, bool clamp_to_edge
)
276 /* clamp to edge or border? */
277 if (wrap
== PIPE_TEX_WRAP_CLAMP
) {
278 wrap
= (clamp_to_edge
) ?
279 PIPE_TEX_WRAP_CLAMP_TO_EDGE
: PIPE_TEX_WRAP_CLAMP_TO_BORDER
;
283 case PIPE_TEX_WRAP_REPEAT
: return BRW_TEXCOORDMODE_WRAP
;
284 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
: return BRW_TEXCOORDMODE_CLAMP
;
285 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
: return BRW_TEXCOORDMODE_CLAMP_BORDER
;
286 case PIPE_TEX_WRAP_MIRROR_REPEAT
: return BRW_TEXCOORDMODE_MIRROR
;
287 case PIPE_TEX_WRAP_CLAMP
:
288 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
289 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
290 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
292 assert(!"unknown sampler wrap mode");
293 return BRW_TEXCOORDMODE_WRAP
;
298 * Translate a pipe DSA test function to the matching hardware compare
302 gen6_translate_dsa_func(unsigned func
)
305 case PIPE_FUNC_NEVER
: return BRW_COMPAREFUNCTION_NEVER
;
306 case PIPE_FUNC_LESS
: return BRW_COMPAREFUNCTION_LESS
;
307 case PIPE_FUNC_EQUAL
: return BRW_COMPAREFUNCTION_EQUAL
;
308 case PIPE_FUNC_LEQUAL
: return BRW_COMPAREFUNCTION_LEQUAL
;
309 case PIPE_FUNC_GREATER
: return BRW_COMPAREFUNCTION_GREATER
;
310 case PIPE_FUNC_NOTEQUAL
: return BRW_COMPAREFUNCTION_NOTEQUAL
;
311 case PIPE_FUNC_GEQUAL
: return BRW_COMPAREFUNCTION_GEQUAL
;
312 case PIPE_FUNC_ALWAYS
: return BRW_COMPAREFUNCTION_ALWAYS
;
314 assert(!"unknown depth/stencil/alpha test function");
315 return BRW_COMPAREFUNCTION_NEVER
;
320 * Translate a pipe shadow compare function to the matching hardware shadow
324 gen6_translate_shadow_func(unsigned func
)
327 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
328 * comparison, and 1.0 is returned when the comparison is true.
330 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
331 * the comparison, and 0.0 is returned when the comparison is true.
334 case PIPE_FUNC_NEVER
: return BRW_PREFILTER_ALWAYS
;
335 case PIPE_FUNC_LESS
: return BRW_PREFILTER_LEQUAL
;
336 case PIPE_FUNC_EQUAL
: return BRW_PREFILTER_NOTEQUAL
;
337 case PIPE_FUNC_LEQUAL
: return BRW_PREFILTER_LESS
;
338 case PIPE_FUNC_GREATER
: return BRW_PREFILTER_GEQUAL
;
339 case PIPE_FUNC_NOTEQUAL
: return BRW_PREFILTER_EQUAL
;
340 case PIPE_FUNC_GEQUAL
: return BRW_PREFILTER_GREATER
;
341 case PIPE_FUNC_ALWAYS
: return BRW_PREFILTER_NEVER
;
343 assert(!"unknown shadow compare function");
344 return BRW_PREFILTER_NEVER
;
349 * Translate an index size to the matching hardware index format.
352 gen6_translate_index_size(int size
)
355 case 4: return BRW_INDEX_DWORD
;
356 case 2: return BRW_INDEX_WORD
;
357 case 1: return BRW_INDEX_BYTE
;
359 assert(!"unknown index size");
360 return BRW_INDEX_BYTE
;
365 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
366 struct intel_bo
*general_state_bo
,
367 struct intel_bo
*surface_state_bo
,
368 struct intel_bo
*dynamic_state_bo
,
369 struct intel_bo
*indirect_object_bo
,
370 struct intel_bo
*instruction_bo
,
371 uint32_t general_state_size
,
372 uint32_t dynamic_state_size
,
373 uint32_t indirect_object_size
,
374 uint32_t instruction_size
,
377 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
378 const uint8_t cmd_len
= 10;
380 ILO_GPE_VALID_GEN(dev
, 6, 7);
382 /* 4K-page aligned */
383 assert(((general_state_size
| dynamic_state_size
|
384 indirect_object_size
| instruction_size
) & 0xfff) == 0);
386 ilo_cp_begin(cp
, cmd_len
);
387 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
389 ilo_cp_write_bo(cp
, 1, general_state_bo
,
392 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
393 INTEL_DOMAIN_SAMPLER
,
395 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
396 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
398 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
401 ilo_cp_write_bo(cp
, 1, instruction_bo
,
402 INTEL_DOMAIN_INSTRUCTION
,
405 if (general_state_size
) {
406 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
411 /* skip range check */
415 if (dynamic_state_size
) {
416 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
417 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
421 /* skip range check */
422 ilo_cp_write(cp
, 0xfffff000 + 1);
425 if (indirect_object_size
) {
426 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
431 /* skip range check */
432 ilo_cp_write(cp
, 0xfffff000 + 1);
435 if (instruction_size
) {
436 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
437 INTEL_DOMAIN_INSTRUCTION
,
441 /* skip range check */
449 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
453 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
454 const uint8_t cmd_len
= 2;
456 ILO_GPE_VALID_GEN(dev
, 6, 7);
458 ilo_cp_begin(cp
, cmd_len
| (cmd_len
- 2));
459 ilo_cp_write(cp
, cmd
);
460 ilo_cp_write(cp
, sip
);
465 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
469 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
470 const uint8_t cmd_len
= 1;
472 ILO_GPE_VALID_GEN(dev
, 6, 7);
474 ilo_cp_begin(cp
, cmd_len
);
475 ilo_cp_write(cp
, cmd
| enable
);
480 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
484 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
485 const uint8_t cmd_len
= 1;
487 ILO_GPE_VALID_GEN(dev
, 6, 7);
490 assert(pipeline
== 0x0 || pipeline
== 0x1);
492 ilo_cp_begin(cp
, cmd_len
);
493 ilo_cp_write(cp
, cmd
| pipeline
);
498 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
499 int max_threads
, int num_urb_entries
,
503 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
504 const uint8_t cmd_len
= 8;
507 ILO_GPE_VALID_GEN(dev
, 6, 6);
509 dw2
= (max_threads
- 1) << 16 |
510 num_urb_entries
<< 8 |
511 1 << 7 | /* Reset Gateway Timer */
512 1 << 6; /* Bypass Gateway Control */
514 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
515 480; /* CURBE Allocation Size */
517 ilo_cp_begin(cp
, cmd_len
);
518 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
519 ilo_cp_write(cp
, 0); /* scratch */
520 ilo_cp_write(cp
, dw2
);
521 ilo_cp_write(cp
, 0); /* MBZ */
522 ilo_cp_write(cp
, dw4
);
523 ilo_cp_write(cp
, 0); /* scoreboard */
530 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
531 uint32_t buf
, int size
,
534 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
535 const uint8_t cmd_len
= 4;
537 ILO_GPE_VALID_GEN(dev
, 6, 6);
539 assert(buf
% 32 == 0);
540 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
541 size
= align(size
, 32);
543 ilo_cp_begin(cp
, cmd_len
);
544 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
545 ilo_cp_write(cp
, 0); /* MBZ */
546 ilo_cp_write(cp
, size
);
547 ilo_cp_write(cp
, buf
);
552 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
553 uint32_t offset
, int num_ids
,
556 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
557 const uint8_t cmd_len
= 4;
559 ILO_GPE_VALID_GEN(dev
, 6, 6);
561 assert(offset
% 32 == 0);
563 ilo_cp_begin(cp
, cmd_len
);
564 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
565 ilo_cp_write(cp
, 0); /* MBZ */
566 /* every ID has 8 DWords */
567 ilo_cp_write(cp
, num_ids
* 8 * 4);
568 ilo_cp_write(cp
, offset
);
573 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
574 int id
, int byte
, int thread_count
,
577 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
578 const uint8_t cmd_len
= 2;
581 ILO_GPE_VALID_GEN(dev
, 6, 6);
587 ilo_cp_begin(cp
, cmd_len
);
588 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
589 ilo_cp_write(cp
, dw1
);
594 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
595 int thread_count_water_mark
,
599 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
600 const uint8_t cmd_len
= 2;
603 ILO_GPE_VALID_GEN(dev
, 6, 6);
605 dw1
= thread_count_water_mark
<< 16 |
608 ilo_cp_begin(cp
, cmd_len
);
609 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
610 ilo_cp_write(cp
, dw1
);
615 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
618 assert(!"MEDIA_OBJECT_WALKER unsupported");
622 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
623 uint32_t vs_binding_table
,
624 uint32_t gs_binding_table
,
625 uint32_t ps_binding_table
,
628 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
629 const uint8_t cmd_len
= 4;
631 ILO_GPE_VALID_GEN(dev
, 6, 6);
633 ilo_cp_begin(cp
, cmd_len
);
634 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
635 GEN6_BINDING_TABLE_MODIFY_VS
|
636 GEN6_BINDING_TABLE_MODIFY_GS
|
637 GEN6_BINDING_TABLE_MODIFY_PS
);
638 ilo_cp_write(cp
, vs_binding_table
);
639 ilo_cp_write(cp
, gs_binding_table
);
640 ilo_cp_write(cp
, ps_binding_table
);
645 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
646 uint32_t vs_sampler_state
,
647 uint32_t gs_sampler_state
,
648 uint32_t ps_sampler_state
,
651 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
652 const uint8_t cmd_len
= 4;
654 ILO_GPE_VALID_GEN(dev
, 6, 6);
656 ilo_cp_begin(cp
, cmd_len
);
657 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
658 VS_SAMPLER_STATE_CHANGE
|
659 GS_SAMPLER_STATE_CHANGE
|
660 PS_SAMPLER_STATE_CHANGE
);
661 ilo_cp_write(cp
, vs_sampler_state
);
662 ilo_cp_write(cp
, gs_sampler_state
);
663 ilo_cp_write(cp
, ps_sampler_state
);
668 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
669 int vs_total_size
, int gs_total_size
,
670 int vs_entry_size
, int gs_entry_size
,
673 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
674 const uint8_t cmd_len
= 3;
675 const int row_size
= 128; /* 1024 bits */
676 int vs_alloc_size
, gs_alloc_size
;
677 int vs_num_entries
, gs_num_entries
;
679 ILO_GPE_VALID_GEN(dev
, 6, 6);
681 /* in 1024-bit URB rows */
682 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
683 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
685 /* the valid range is [1, 5] */
690 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
692 /* the valid range is [24, 256] in multiples of 4 */
693 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
694 if (vs_num_entries
> 256)
695 vs_num_entries
= 256;
696 assert(vs_num_entries
>= 24);
698 /* the valid range is [0, 256] in multiples of 4 */
699 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
700 if (gs_num_entries
> 256)
701 gs_num_entries
= 256;
703 ilo_cp_begin(cp
, cmd_len
);
704 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
705 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_VS_SIZE_SHIFT
|
706 vs_num_entries
<< GEN6_URB_VS_ENTRIES_SHIFT
);
707 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_GS_ENTRIES_SHIFT
|
708 (gs_alloc_size
- 1) << GEN6_URB_GS_SIZE_SHIFT
);
713 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
714 const struct pipe_vertex_buffer
*vbuffers
,
715 uint64_t vbuffer_mask
,
716 const struct ilo_ve_state
*ve
,
719 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
723 ILO_GPE_VALID_GEN(dev
, 6, 7);
726 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
728 * "From 1 to 33 VBs can be specified..."
730 assert(vbuffer_mask
<= (1UL << 33));
737 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
738 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
740 if (vbuffer_mask
& (1 << pipe_idx
))
744 ilo_cp_begin(cp
, cmd_len
);
745 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
747 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
748 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
749 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
750 const struct pipe_vertex_buffer
*vb
= &vbuffers
[pipe_idx
];
753 if (!(vbuffer_mask
& (1 << pipe_idx
)))
756 dw
= hw_idx
<< GEN6_VB0_INDEX_SHIFT
;
758 if (instance_divisor
)
759 dw
|= GEN6_VB0_ACCESS_INSTANCEDATA
;
761 dw
|= GEN6_VB0_ACCESS_VERTEXDATA
;
763 if (dev
->gen
>= ILO_GEN(7))
764 dw
|= GEN7_VB0_ADDRESS_MODIFYENABLE
;
766 /* use null vb if there is no buffer or the stride is out of range */
767 if (vb
->buffer
&& vb
->stride
<= 2048) {
768 const struct ilo_buffer
*buf
= ilo_buffer(vb
->buffer
);
769 const uint32_t start_offset
= vb
->buffer_offset
;
771 * As noted in ilo_translate_format(), we treat some 3-component
772 * formats as 4-component formats to work around hardware
773 * limitations. Imagine the case where the vertex buffer holds a
774 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
775 * The hardware would not be able to fetch it because the vertex
776 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
777 * and that takes at least 8 bytes.
779 * For the workaround to work, we query the physical size, which is
780 * page aligned, to calculate end_offset so that the last vertex has
781 * a better chance to be fetched.
783 const uint32_t end_offset
= intel_bo_get_size(buf
->bo
) - 1;
785 dw
|= vb
->stride
<< BRW_VB0_PITCH_SHIFT
;
787 ilo_cp_write(cp
, dw
);
788 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
789 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
790 ilo_cp_write(cp
, instance_divisor
);
795 ilo_cp_write(cp
, dw
);
798 ilo_cp_write(cp
, instance_divisor
);
806 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
807 struct ilo_ve_cso
*cso
)
811 ILO_GPE_VALID_GEN(dev
, 6, 7);
814 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
816 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
817 * valid VERTEX_ELEMENT structure.
819 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
820 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
822 * - The Source Element Format must be set to the UINT format.
824 * - [DevSNB]: Edge Flags are not supported for QUADLIST
825 * primitives. Software may elect to convert QUADLIST primitives
826 * to some set of corresponding edge-flag-supported primitive
827 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
830 cso
->payload
[0] |= GEN6_VE0_EDGE_FLAG_ENABLE
;
832 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
833 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_1_SHIFT
|
834 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_2_SHIFT
|
835 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_3_SHIFT
;
838 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
839 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
840 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
842 * Since all the hardware cares about is whether the flags are zero or not,
843 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
845 format
= (cso
->payload
[0] >> BRW_VE0_FORMAT_SHIFT
) & 0x1ff;
846 if (format
== BRW_SURFACEFORMAT_R32_FLOAT
) {
847 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT
==
848 BRW_SURFACEFORMAT_R32_FLOAT
- 1);
850 cso
->payload
[0] -= (1 << BRW_VE0_FORMAT_SHIFT
);
853 assert(format
== BRW_SURFACEFORMAT_R8_UINT
);
858 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
859 int comp0
, int comp1
, int comp2
, int comp3
,
860 struct ilo_ve_cso
*cso
)
862 ILO_GPE_VALID_GEN(dev
, 6, 7);
864 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
865 cso
->payload
[0] = GEN6_VE0_VALID
;
867 comp0
<< BRW_VE1_COMPONENT_0_SHIFT
|
868 comp1
<< BRW_VE1_COMPONENT_1_SHIFT
|
869 comp2
<< BRW_VE1_COMPONENT_2_SHIFT
|
870 comp3
<< BRW_VE1_COMPONENT_3_SHIFT
;
874 ve_init_cso(const struct ilo_dev_info
*dev
,
875 const struct pipe_vertex_element
*state
,
877 struct ilo_ve_cso
*cso
)
880 BRW_VE1_COMPONENT_STORE_SRC
,
881 BRW_VE1_COMPONENT_STORE_SRC
,
882 BRW_VE1_COMPONENT_STORE_SRC
,
883 BRW_VE1_COMPONENT_STORE_SRC
,
887 ILO_GPE_VALID_GEN(dev
, 6, 7);
889 switch (util_format_get_nr_components(state
->src_format
)) {
890 case 1: comp
[1] = BRW_VE1_COMPONENT_STORE_0
;
891 case 2: comp
[2] = BRW_VE1_COMPONENT_STORE_0
;
892 case 3: comp
[3] = (util_format_is_pure_integer(state
->src_format
)) ?
893 BRW_VE1_COMPONENT_STORE_1_INT
:
894 BRW_VE1_COMPONENT_STORE_1_FLT
;
897 format
= ilo_translate_vertex_format(state
->src_format
);
899 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
901 vb_index
<< GEN6_VE0_INDEX_SHIFT
|
903 format
<< BRW_VE0_FORMAT_SHIFT
|
904 state
->src_offset
<< BRW_VE0_SRC_OFFSET_SHIFT
;
907 comp
[0] << BRW_VE1_COMPONENT_0_SHIFT
|
908 comp
[1] << BRW_VE1_COMPONENT_1_SHIFT
|
909 comp
[2] << BRW_VE1_COMPONENT_2_SHIFT
|
910 comp
[3] << BRW_VE1_COMPONENT_3_SHIFT
;
914 ilo_gpe_init_ve(const struct ilo_dev_info
*dev
,
916 const struct pipe_vertex_element
*states
,
917 struct ilo_ve_state
*ve
)
921 ILO_GPE_VALID_GEN(dev
, 6, 7);
923 ve
->count
= num_states
;
926 for (i
= 0; i
< num_states
; i
++) {
927 const unsigned pipe_idx
= states
[i
].vertex_buffer_index
;
928 const unsigned instance_divisor
= states
[i
].instance_divisor
;
932 * map the pipe vb to the hardware vb, which has a fixed instance
935 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
936 if (ve
->vb_mapping
[hw_idx
] == pipe_idx
&&
937 ve
->instance_divisors
[hw_idx
] == instance_divisor
)
941 /* create one if there is no matching hardware vb */
942 if (hw_idx
>= ve
->vb_count
) {
943 hw_idx
= ve
->vb_count
++;
945 ve
->vb_mapping
[hw_idx
] = pipe_idx
;
946 ve
->instance_divisors
[hw_idx
] = instance_divisor
;
949 ve_init_cso(dev
, &states
[i
], hw_idx
, &ve
->cso
[i
]);
954 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
955 const struct ilo_ve_state
*ve
,
956 bool last_velement_edgeflag
,
957 bool prepend_generated_ids
,
960 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
964 ILO_GPE_VALID_GEN(dev
, 6, 7);
967 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
969 * "Up to 34 (DevSNB+) vertex elements are supported."
971 assert(ve
->count
+ prepend_generated_ids
<= 34);
973 if (!ve
->count
&& !prepend_generated_ids
) {
974 struct ilo_ve_cso dummy
;
976 ve_init_cso_with_components(dev
,
977 BRW_VE1_COMPONENT_STORE_0
,
978 BRW_VE1_COMPONENT_STORE_0
,
979 BRW_VE1_COMPONENT_STORE_0
,
980 BRW_VE1_COMPONENT_STORE_1_FLT
,
984 ilo_cp_begin(cp
, cmd_len
);
985 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
986 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
992 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
994 ilo_cp_begin(cp
, cmd_len
);
995 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
997 if (prepend_generated_ids
) {
998 struct ilo_ve_cso gen_ids
;
1000 ve_init_cso_with_components(dev
,
1001 BRW_VE1_COMPONENT_STORE_VID
,
1002 BRW_VE1_COMPONENT_STORE_IID
,
1003 BRW_VE1_COMPONENT_NOSTORE
,
1004 BRW_VE1_COMPONENT_NOSTORE
,
1007 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
1010 if (last_velement_edgeflag
) {
1011 struct ilo_ve_cso edgeflag
;
1013 for (i
= 0; i
< ve
->count
- 1; i
++)
1014 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
1016 edgeflag
= ve
->cso
[i
];
1017 ve_set_cso_edgeflag(dev
, &edgeflag
);
1018 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
1021 for (i
= 0; i
< ve
->count
; i
++)
1022 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
1029 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
1030 const struct ilo_ib_state
*ib
,
1031 bool enable_cut_index
,
1034 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
1035 const uint8_t cmd_len
= 3;
1036 const struct ilo_buffer
*buf
= ilo_buffer(ib
->resource
);
1037 uint32_t start_offset
, end_offset
;
1040 ILO_GPE_VALID_GEN(dev
, 6, 7);
1045 format
= gen6_translate_index_size(ib
->state
.index_size
);
1048 * set start_offset to 0 here and adjust pipe_draw_info::start with
1049 * ib->draw_start_offset in 3DPRIMITIVE
1052 end_offset
= buf
->bo_size
;
1054 /* end_offset must also be aligned and is inclusive */
1055 end_offset
-= (end_offset
% ib
->state
.index_size
);
1058 ilo_cp_begin(cp
, cmd_len
);
1059 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1060 ((enable_cut_index
) ? BRW_CUT_INDEX_ENABLE
: 0) |
1062 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
1063 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
1068 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1069 uint32_t clip_viewport
,
1070 uint32_t sf_viewport
,
1071 uint32_t cc_viewport
,
1074 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
1075 const uint8_t cmd_len
= 4;
1077 ILO_GPE_VALID_GEN(dev
, 6, 6);
1079 ilo_cp_begin(cp
, cmd_len
);
1080 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1081 GEN6_CLIP_VIEWPORT_MODIFY
|
1082 GEN6_SF_VIEWPORT_MODIFY
|
1083 GEN6_CC_VIEWPORT_MODIFY
);
1084 ilo_cp_write(cp
, clip_viewport
);
1085 ilo_cp_write(cp
, sf_viewport
);
1086 ilo_cp_write(cp
, cc_viewport
);
1091 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1092 uint32_t blend_state
,
1093 uint32_t depth_stencil_state
,
1094 uint32_t color_calc_state
,
1097 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
1098 const uint8_t cmd_len
= 4;
1100 ILO_GPE_VALID_GEN(dev
, 6, 6);
1102 ilo_cp_begin(cp
, cmd_len
);
1103 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1104 ilo_cp_write(cp
, blend_state
| 1);
1105 ilo_cp_write(cp
, depth_stencil_state
| 1);
1106 ilo_cp_write(cp
, color_calc_state
| 1);
1111 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1112 uint32_t scissor_rect
,
1115 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1116 const uint8_t cmd_len
= 2;
1118 ILO_GPE_VALID_GEN(dev
, 6, 7);
1120 ilo_cp_begin(cp
, cmd_len
);
1121 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1122 ilo_cp_write(cp
, scissor_rect
);
1127 ilo_gpe_init_vs_cso(const struct ilo_dev_info
*dev
,
1128 const struct ilo_shader_state
*vs
,
1129 struct ilo_shader_cso
*cso
)
1131 int start_grf
, vue_read_len
, max_threads
;
1132 uint32_t dw2
, dw4
, dw5
;
1134 ILO_GPE_VALID_GEN(dev
, 6, 7);
1136 start_grf
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_URB_DATA_START_REG
);
1137 vue_read_len
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_INPUT_COUNT
);
1140 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
1142 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
1143 * 128-bit vertex elements to be passed into the payload for each
1146 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
1147 * data to be read and passed to the thread."
1149 vue_read_len
= (vue_read_len
+ 1) / 2;
1156 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
1158 * "Device # of EUs #Threads/EU
1162 max_threads
= (dev
->gt
== 2) ? 60 : 24;
1166 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
1168 * "Device # of EUs #Threads/EU
1169 * Ivy Bridge (GT2) 16 8
1170 * Ivy Bridge (GT1) 6 6"
1172 max_threads
= (dev
->gt
== 2) ? 128 : 36;
1175 /* see brwCreateContext() */
1176 max_threads
= (dev
->gt
== 2) ? 280 : 70;
1183 dw2
= (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT
;
1185 dw4
= start_grf
<< GEN6_VS_DISPATCH_START_GRF_SHIFT
|
1186 vue_read_len
<< GEN6_VS_URB_READ_LENGTH_SHIFT
|
1187 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT
;
1189 dw5
= GEN6_VS_STATISTICS_ENABLE
|
1192 if (dev
->gen
>= ILO_GEN(7.5))
1193 dw5
|= (max_threads
- 1) << HSW_VS_MAX_THREADS_SHIFT
;
1195 dw5
|= (max_threads
- 1) << GEN6_VS_MAX_THREADS_SHIFT
;
1197 STATIC_ASSERT(Elements(cso
->payload
) >= 3);
1198 cso
->payload
[0] = dw2
;
1199 cso
->payload
[1] = dw4
;
1200 cso
->payload
[2] = dw5
;
1204 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1205 const struct ilo_shader_state
*vs
,
1209 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1210 const uint8_t cmd_len
= 6;
1211 const struct ilo_shader_cso
*cso
;
1212 uint32_t dw2
, dw4
, dw5
;
1214 ILO_GPE_VALID_GEN(dev
, 6, 7);
1217 ilo_cp_begin(cp
, cmd_len
);
1218 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1219 ilo_cp_write(cp
, 0);
1220 ilo_cp_write(cp
, 0);
1221 ilo_cp_write(cp
, 0);
1222 ilo_cp_write(cp
, 0);
1223 ilo_cp_write(cp
, 0);
1228 cso
= ilo_shader_get_kernel_cso(vs
);
1229 dw2
= cso
->payload
[0];
1230 dw4
= cso
->payload
[1];
1231 dw5
= cso
->payload
[2];
1233 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT
;
1235 ilo_cp_begin(cp
, cmd_len
);
1236 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1237 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1238 ilo_cp_write(cp
, dw2
);
1239 ilo_cp_write(cp
, 0); /* scratch */
1240 ilo_cp_write(cp
, dw4
);
1241 ilo_cp_write(cp
, dw5
);
1246 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info
*dev
,
1247 const struct ilo_shader_state
*gs
,
1248 struct ilo_shader_cso
*cso
)
1250 int start_grf
, vue_read_len
, max_threads
;
1251 uint32_t dw2
, dw4
, dw5
, dw6
;
1253 ILO_GPE_VALID_GEN(dev
, 6, 6);
1255 if (ilo_shader_get_type(gs
) == PIPE_SHADER_GEOMETRY
) {
1256 start_grf
= ilo_shader_get_kernel_param(gs
,
1257 ILO_KERNEL_URB_DATA_START_REG
);
1259 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_INPUT_COUNT
);
1262 start_grf
= ilo_shader_get_kernel_param(gs
,
1263 ILO_KERNEL_VS_GEN6_SO_START_REG
);
1265 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_OUTPUT_COUNT
);
1269 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
1271 * "Specifies the amount of URB data read and passed in the thread
1272 * payload for each Vertex URB entry, in 256-bit register increments.
1274 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
1275 * 0 indicating no Vertex URB data to be read and passed to the
1278 vue_read_len
= (vue_read_len
+ 1) / 2;
1283 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
1285 * "Maximum Number of Threads valid range is [0,27] when Rendering
1286 * Enabled bit is set."
1288 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
1290 * "Programming Note: If the GS stage is enabled, software must always
1291 * allocate at least one GS URB Entry. This is true even if the GS
1292 * thread never needs to output vertices to the pipeline, e.g., when
1293 * only performing stream output. This is an artifact of the need to
1294 * pass the GS thread an initial destination URB handle."
1296 * As such, we always enable rendering, and limit the number of threads.
1299 /* maximum is 60, but limited to 28 */
1303 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
1307 dw2
= GEN6_GS_SPF_MODE
;
1309 dw4
= vue_read_len
<< GEN6_GS_URB_READ_LENGTH_SHIFT
|
1310 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT
|
1311 start_grf
<< GEN6_GS_DISPATCH_START_GRF_SHIFT
;
1313 dw5
= (max_threads
- 1) << GEN6_GS_MAX_THREADS_SHIFT
|
1314 GEN6_GS_STATISTICS_ENABLE
|
1315 GEN6_GS_SO_STATISTICS_ENABLE
|
1316 GEN6_GS_RENDERING_ENABLE
;
1319 * we cannot make use of GEN6_GS_REORDER because it will reorder
1320 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
1321 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
1322 * (2N+2, 2N+1, 2N+3)).
1324 dw6
= GEN6_GS_ENABLE
;
1326 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_DISCARD_ADJACENCY
))
1327 dw6
|= GEN6_GS_DISCARD_ADJACENCY
;
1329 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_VS_GEN6_SO
)) {
1330 const uint32_t svbi_post_inc
=
1331 ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_GEN6_SVBI_POST_INC
);
1333 dw6
|= GEN6_GS_SVBI_PAYLOAD_ENABLE
;
1334 if (svbi_post_inc
) {
1335 dw6
|= GEN6_GS_SVBI_POSTINCREMENT_ENABLE
|
1336 svbi_post_inc
<< GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT
;
1340 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
1341 cso
->payload
[0] = dw2
;
1342 cso
->payload
[1] = dw4
;
1343 cso
->payload
[2] = dw5
;
1344 cso
->payload
[3] = dw6
;
1348 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1349 const struct ilo_shader_state
*gs
,
1350 const struct ilo_shader_state
*vs
,
1354 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1355 const uint8_t cmd_len
= 7;
1356 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1358 ILO_GPE_VALID_GEN(dev
, 6, 6);
1361 const struct ilo_shader_cso
*cso
;
1363 dw1
= ilo_shader_get_kernel_offset(gs
);
1365 cso
= ilo_shader_get_kernel_cso(gs
);
1366 dw2
= cso
->payload
[0];
1367 dw4
= cso
->payload
[1];
1368 dw5
= cso
->payload
[2];
1369 dw6
= cso
->payload
[3];
1371 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1372 struct ilo_shader_cso cso
;
1373 enum ilo_kernel_param param
;
1375 switch (verts_per_prim
) {
1377 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1380 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1383 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1387 dw1
= ilo_shader_get_kernel_offset(vs
) +
1388 ilo_shader_get_kernel_param(vs
, param
);
1390 /* cannot use VS's CSO */
1391 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1392 dw2
= cso
.payload
[0];
1393 dw4
= cso
.payload
[1];
1394 dw5
= cso
.payload
[2];
1395 dw6
= cso
.payload
[3];
1400 dw4
= 1 << GEN6_GS_URB_READ_LENGTH_SHIFT
;
1401 dw5
= GEN6_GS_STATISTICS_ENABLE
;
1405 ilo_cp_begin(cp
, cmd_len
);
1406 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1407 ilo_cp_write(cp
, dw1
);
1408 ilo_cp_write(cp
, dw2
);
1409 ilo_cp_write(cp
, 0);
1410 ilo_cp_write(cp
, dw4
);
1411 ilo_cp_write(cp
, dw5
);
1412 ilo_cp_write(cp
, dw6
);
1417 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info
*dev
,
1418 const struct pipe_rasterizer_state
*state
,
1419 struct ilo_rasterizer_clip
*clip
)
1421 uint32_t dw1
, dw2
, dw3
;
1423 ILO_GPE_VALID_GEN(dev
, 6, 7);
1425 dw1
= GEN6_CLIP_STATISTICS_ENABLE
;
1427 if (dev
->gen
>= ILO_GEN(7)) {
1429 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
1431 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
1432 * enabled only for the cases where the incoming primitive topology
1433 * into the clipper guaranteed to be Trilist."
1435 * What does this mean?
1438 GEN7_CLIP_EARLY_CULL
;
1440 if (state
->front_ccw
)
1441 dw1
|= GEN7_CLIP_WINDING_CCW
;
1443 switch (state
->cull_face
) {
1444 case PIPE_FACE_NONE
:
1445 dw1
|= GEN7_CLIP_CULLMODE_NONE
;
1447 case PIPE_FACE_FRONT
:
1448 dw1
|= GEN7_CLIP_CULLMODE_FRONT
;
1450 case PIPE_FACE_BACK
:
1451 dw1
|= GEN7_CLIP_CULLMODE_BACK
;
1453 case PIPE_FACE_FRONT_AND_BACK
:
1454 dw1
|= GEN7_CLIP_CULLMODE_BOTH
;
1459 dw2
= GEN6_CLIP_ENABLE
|
1461 state
->clip_plane_enable
<< GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT
|
1462 GEN6_CLIP_MODE_NORMAL
;
1464 if (state
->clip_halfz
)
1465 dw2
|= GEN6_CLIP_API_D3D
;
1467 dw2
|= GEN6_CLIP_API_OGL
;
1469 if (state
->depth_clip
)
1470 dw2
|= GEN6_CLIP_Z_TEST
;
1472 if (state
->flatshade_first
) {
1473 dw2
|= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
1474 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
1475 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
1478 dw2
|= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
1479 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
1480 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
1483 dw3
= 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT
|
1484 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT
;
1486 clip
->payload
[0] = dw1
;
1487 clip
->payload
[1] = dw2
;
1488 clip
->payload
[2] = dw3
;
1490 clip
->can_enable_guardband
= true;
1493 * There are several reasons that guard band test should be disabled
1495 * - GL wide points (to avoid partially visibie object)
1496 * - GL wide or AA lines (to avoid partially visibie object)
1498 if (state
->point_size_per_vertex
|| state
->point_size
> 1.0f
)
1499 clip
->can_enable_guardband
= false;
1500 if (state
->line_smooth
|| state
->line_width
> 1.0f
)
1501 clip
->can_enable_guardband
= false;
1505 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1506 const struct ilo_rasterizer_state
*rasterizer
,
1507 const struct ilo_shader_state
*fs
,
1508 bool enable_guardband
,
1512 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1513 const uint8_t cmd_len
= 4;
1514 uint32_t dw1
, dw2
, dw3
;
1519 dw1
= rasterizer
->clip
.payload
[0];
1520 dw2
= rasterizer
->clip
.payload
[1];
1521 dw3
= rasterizer
->clip
.payload
[2];
1523 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1524 dw2
|= GEN6_CLIP_GB_TEST
;
1526 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1527 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1529 if (interps
& (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
|
1530 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
|
1531 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC
))
1532 dw2
|= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE
;
1534 dw3
|= GEN6_CLIP_FORCE_ZERO_RTAINDEX
|
1535 (num_viewports
- 1);
1543 ilo_cp_begin(cp
, cmd_len
);
1544 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1545 ilo_cp_write(cp
, dw1
);
1546 ilo_cp_write(cp
, dw2
);
1547 ilo_cp_write(cp
, dw3
);
1552 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info
*dev
,
1553 const struct pipe_rasterizer_state
*state
,
1554 struct ilo_rasterizer_sf
*sf
)
1556 float offset_const
, offset_scale
, offset_clamp
;
1557 int line_width
, point_width
;
1558 uint32_t dw1
, dw2
, dw3
;
1560 ILO_GPE_VALID_GEN(dev
, 6, 7);
1563 * Scale the constant term. The minimum representable value used by the HW
1564 * is not large enouch to be the minimum resolvable difference.
1566 offset_const
= state
->offset_units
* 2.0f
;
1568 offset_scale
= state
->offset_scale
;
1569 offset_clamp
= state
->offset_clamp
;
1572 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1574 * "This bit (Statistics Enable) should be set whenever clipping is
1575 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
1576 * should be cleared if clipping is disabled or Statistics Enable in
1577 * CLIP_STATE is clear."
1579 dw1
= GEN6_SF_STATISTICS_ENABLE
|
1580 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE
;
1582 /* XXX GEN6 path seems to work fine for GEN7 */
1583 if (false && dev
->gen
>= ILO_GEN(7)) {
1585 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
1587 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
1588 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
1589 * Depth Offset Enable Point) should be set whenever non zero depth
1590 * bias (Slope, Bias) values are used. Setting this bit may have
1591 * some degradation of performance for some workloads."
1593 if (state
->offset_tri
|| state
->offset_line
|| state
->offset_point
) {
1594 /* XXX need to scale offset_const according to the depth format */
1595 dw1
|= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS
;
1597 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
|
1598 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
|
1599 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
1602 offset_const
= 0.0f
;
1603 offset_scale
= 0.0f
;
1604 offset_clamp
= 0.0f
;
1608 if (state
->offset_tri
)
1609 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
;
1610 if (state
->offset_line
)
1611 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
;
1612 if (state
->offset_point
)
1613 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
1616 switch (state
->fill_front
) {
1617 case PIPE_POLYGON_MODE_FILL
:
1618 dw1
|= GEN6_SF_FRONT_SOLID
;
1620 case PIPE_POLYGON_MODE_LINE
:
1621 dw1
|= GEN6_SF_FRONT_WIREFRAME
;
1623 case PIPE_POLYGON_MODE_POINT
:
1624 dw1
|= GEN6_SF_FRONT_POINT
;
1628 switch (state
->fill_back
) {
1629 case PIPE_POLYGON_MODE_FILL
:
1630 dw1
|= GEN6_SF_BACK_SOLID
;
1632 case PIPE_POLYGON_MODE_LINE
:
1633 dw1
|= GEN6_SF_BACK_WIREFRAME
;
1635 case PIPE_POLYGON_MODE_POINT
:
1636 dw1
|= GEN6_SF_BACK_POINT
;
1640 if (state
->front_ccw
)
1641 dw1
|= GEN6_SF_WINDING_CCW
;
1645 if (state
->line_smooth
) {
1647 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1649 * "This field (Anti-aliasing Enable) must be disabled if any of the
1650 * render targets have integer (UINT or SINT) surface format."
1652 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1654 * "This field (Hierarchical Depth Buffer Enable) must be disabled
1655 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
1657 * TODO We do not check those yet.
1659 dw2
|= GEN6_SF_LINE_AA_ENABLE
|
1660 GEN6_SF_LINE_END_CAP_WIDTH_1_0
;
1663 switch (state
->cull_face
) {
1664 case PIPE_FACE_NONE
:
1665 dw2
|= GEN6_SF_CULL_NONE
;
1667 case PIPE_FACE_FRONT
:
1668 dw2
|= GEN6_SF_CULL_FRONT
;
1670 case PIPE_FACE_BACK
:
1671 dw2
|= GEN6_SF_CULL_BACK
;
1673 case PIPE_FACE_FRONT_AND_BACK
:
1674 dw2
|= GEN6_SF_CULL_BOTH
;
1679 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
1680 * pixels in the minor direction. We have to make the lines slightly
1681 * thicker, 0.5 pixel on both sides, so that they intersect that many
1682 * pixels are considered into the lines.
1684 * Line width is in U3.7.
1686 line_width
= (int) ((state
->line_width
+
1687 (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
1688 line_width
= CLAMP(line_width
, 0, 1023);
1690 if (line_width
== 128 && !state
->line_smooth
) {
1695 dw2
|= line_width
<< GEN6_SF_LINE_WIDTH_SHIFT
;
1698 dw2
|= GEN6_SF_SCISSOR_ENABLE
;
1700 dw3
= GEN6_SF_LINE_AA_MODE_TRUE
|
1701 GEN6_SF_VERTEX_SUBPIXEL_8BITS
;
1703 if (state
->line_last_pixel
)
1706 if (state
->flatshade_first
) {
1707 dw3
|= 0 << GEN6_SF_TRI_PROVOKE_SHIFT
|
1708 0 << GEN6_SF_LINE_PROVOKE_SHIFT
|
1709 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
1712 dw3
|= 2 << GEN6_SF_TRI_PROVOKE_SHIFT
|
1713 1 << GEN6_SF_LINE_PROVOKE_SHIFT
|
1714 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
1717 if (!state
->point_size_per_vertex
)
1718 dw3
|= GEN6_SF_USE_STATE_POINT_WIDTH
;
1721 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
1722 point_width
= CLAMP(point_width
, 1, 2047);
1726 STATIC_ASSERT(Elements(sf
->payload
) >= 6);
1727 sf
->payload
[0] = dw1
;
1728 sf
->payload
[1] = dw2
;
1729 sf
->payload
[2] = dw3
;
1730 sf
->payload
[3] = fui(offset_const
);
1731 sf
->payload
[4] = fui(offset_scale
);
1732 sf
->payload
[5] = fui(offset_clamp
);
1734 if (state
->multisample
) {
1735 sf
->dw_msaa
= GEN6_SF_MSRAST_ON_PATTERN
;
1738 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
1740 * "Software must not program a value of 0.0 when running in
1741 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
1742 * when multisampling rasterization is enabled."
1745 line_width
= 128; /* 1.0f */
1747 sf
->dw_msaa
|= line_width
<< GEN6_SF_LINE_WIDTH_SHIFT
;
1756 * Fill in DW2 to DW7 of 3DSTATE_SF.
1759 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
1760 const struct ilo_rasterizer_state
*rasterizer
,
1762 enum pipe_format depth_format
,
1763 uint32_t *payload
, unsigned payload_len
)
1765 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
1767 assert(payload_len
== Elements(sf
->payload
));
1770 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
1772 if (num_samples
> 1)
1773 payload
[1] |= sf
->dw_msaa
;
1775 if (dev
->gen
>= ILO_GEN(7)) {
1778 /* separate stencil */
1779 switch (depth_format
) {
1780 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
1781 depth_format
= PIPE_FORMAT_Z24X8_UNORM
;
1783 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
1784 depth_format
= PIPE_FORMAT_Z32_FLOAT
;;
1786 case PIPE_FORMAT_S8_UINT
:
1787 depth_format
= PIPE_FORMAT_NONE
;
1793 format
= gen6_translate_depth_format(depth_format
);
1794 /* FLOAT surface is assumed when there is no depth buffer */
1796 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
1798 payload
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
1803 payload
[1] = (num_samples
> 1) ? GEN6_SF_MSRAST_ON_PATTERN
: 0;
1812 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
1815 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
1816 const struct ilo_rasterizer_state
*rasterizer
,
1817 const struct ilo_shader_state
*fs
,
1818 const struct ilo_shader_state
*last_sh
,
1819 uint32_t *dw
, int num_dwords
)
1821 int output_count
, vue_offset
, vue_len
;
1822 const struct ilo_kernel_routing
*routing
;
1824 ILO_GPE_VALID_GEN(dev
, 6, 7);
1825 assert(num_dwords
== 13);
1828 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
1830 if (dev
->gen
>= ILO_GEN(7))
1831 dw
[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
;
1833 dw
[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
;
1838 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
1839 assert(output_count
<= 32);
1841 routing
= ilo_shader_get_kernel_routing(fs
);
1843 vue_offset
= routing
->source_skip
;
1844 assert(vue_offset
% 2 == 0);
1847 vue_len
= (routing
->source_len
+ 1) / 2;
1851 if (dev
->gen
>= ILO_GEN(7)) {
1852 dw
[0] = output_count
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
1853 vue_len
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
1854 vue_offset
<< GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
;
1855 if (routing
->swizzle_enable
)
1856 dw
[0] |= GEN7_SBE_SWIZZLE_ENABLE
;
1859 dw
[0] = output_count
<< GEN6_SF_NUM_OUTPUTS_SHIFT
|
1860 vue_len
<< GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
|
1861 vue_offset
<< GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT
;
1862 if (routing
->swizzle_enable
)
1863 dw
[0] |= GEN6_SF_SWIZZLE_ENABLE
;
1866 switch (rasterizer
->state
.sprite_coord_mode
) {
1867 case PIPE_SPRITE_COORD_UPPER_LEFT
:
1868 dw
[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT
;
1870 case PIPE_SPRITE_COORD_LOWER_LEFT
:
1871 dw
[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT
;
1875 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
1876 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
1879 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
1881 * "This field (Point Sprite Texture Coordinate Enable) must be
1882 * programmed to 0 when non-point primitives are rendered."
1884 * TODO We do not check that yet.
1886 dw
[9] = routing
->point_sprite_enable
;
1888 dw
[10] = routing
->const_interp_enable
;
1890 /* WrapShortest enables */
1896 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1897 const struct ilo_rasterizer_state
*rasterizer
,
1898 const struct ilo_shader_state
*fs
,
1899 const struct ilo_shader_state
*last_sh
,
1902 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1903 const uint8_t cmd_len
= 20;
1904 uint32_t payload_raster
[6], payload_sbe
[13];
1906 ILO_GPE_VALID_GEN(dev
, 6, 6);
1908 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1909 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1910 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1911 fs
, last_sh
, payload_sbe
, Elements(payload_sbe
));
1913 ilo_cp_begin(cp
, cmd_len
);
1914 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1915 ilo_cp_write(cp
, payload_sbe
[0]);
1916 ilo_cp_write_multi(cp
, payload_raster
, 6);
1917 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1922 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info
*dev
,
1923 const struct pipe_rasterizer_state
*state
,
1924 struct ilo_rasterizer_wm
*wm
)
1928 ILO_GPE_VALID_GEN(dev
, 6, 6);
1930 /* only the FF unit states are set, as in GEN7 */
1932 dw5
= GEN6_WM_LINE_AA_WIDTH_2_0
;
1934 /* same value as in 3DSTATE_SF */
1935 if (state
->line_smooth
)
1936 dw5
|= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0
;
1938 if (state
->poly_stipple_enable
)
1939 dw5
|= GEN6_WM_POLYGON_STIPPLE_ENABLE
;
1940 if (state
->line_stipple_enable
)
1941 dw5
|= GEN6_WM_LINE_STIPPLE_ENABLE
;
1943 dw6
= GEN6_WM_POSITION_ZW_PIXEL
|
1944 GEN6_WM_MSRAST_OFF_PIXEL
|
1945 GEN6_WM_MSDISPMODE_PERSAMPLE
;
1947 if (state
->bottom_edge_rule
)
1948 dw6
|= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT
;
1951 * assertion that makes sure
1953 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
1957 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL
== 0 &&
1958 GEN6_WM_MSDISPMODE_PERSAMPLE
== 0);
1961 (state
->multisample
) ? GEN6_WM_MSRAST_ON_PATTERN
: 0;
1962 wm
->dw_msaa_disp
= GEN6_WM_MSDISPMODE_PERPIXEL
;
1964 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
1965 wm
->payload
[0] = dw5
;
1966 wm
->payload
[1] = dw6
;
1970 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info
*dev
,
1971 const struct ilo_shader_state
*fs
,
1972 struct ilo_shader_cso
*cso
)
1974 int start_grf
, input_count
, interps
, max_threads
;
1975 uint32_t dw2
, dw4
, dw5
, dw6
;
1977 ILO_GPE_VALID_GEN(dev
, 6, 6);
1979 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
1980 input_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
1981 interps
= ilo_shader_get_kernel_param(fs
,
1982 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
);
1984 /* see brwCreateContext() */
1985 max_threads
= (dev
->gt
== 2) ? 80 : 40;
1987 dw2
= (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT
;
1989 dw4
= start_grf
<< GEN6_WM_DISPATCH_START_GRF_SHIFT_0
|
1990 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1
|
1991 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2
;
1993 dw5
= (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
;
1996 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
1998 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
1999 * PS kernel or color calculator has the ability to kill (discard)
2000 * pixels or samples, other than due to depth or stencil testing.
2001 * This bit is required to be ENABLED in the following situations:
2003 * The API pixel shader program contains "killpix" or "discard"
2004 * instructions, or other code in the pixel shader kernel that can
2005 * cause the final pixel mask to differ from the pixel mask received
2008 * A sampler with chroma key enabled with kill pixel mode is used by
2011 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
2014 * The pixel shader kernel generates and outputs oMask.
2016 * Note: As ClipDistance clipping is fully supported in hardware and
2017 * therefore not via PS instructions, there should be no need to
2018 * ENABLE this bit due to ClipDistance clipping."
2020 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
2021 dw5
|= GEN6_WM_KILL_ENABLE
;
2024 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
2026 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
2027 * field must be set to disabled."
2029 * TODO This is not checked yet.
2031 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
2032 dw5
|= GEN6_WM_COMPUTED_DEPTH
;
2034 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
2035 dw5
|= GEN6_WM_USES_SOURCE_DEPTH
;
2037 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
2038 dw5
|= GEN6_WM_USES_SOURCE_W
;
2041 * TODO set this bit only when
2043 * a) fs writes colors and color is not masked, or
2044 * b) fs writes depth, or
2048 dw5
|= GEN6_WM_DISPATCH_ENABLE
;
2050 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
2051 dw5
|= GEN6_WM_8_DISPATCH_ENABLE
;
2053 dw6
= input_count
<< GEN6_WM_NUM_SF_OUTPUTS_SHIFT
|
2054 GEN6_WM_POSOFFSET_NONE
|
2055 interps
<< GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT
;
2057 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
2058 cso
->payload
[0] = dw2
;
2059 cso
->payload
[1] = dw4
;
2060 cso
->payload
[2] = dw5
;
2061 cso
->payload
[3] = dw6
;
2065 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
2066 const struct ilo_shader_state
*fs
,
2068 const struct ilo_rasterizer_state
*rasterizer
,
2069 bool dual_blend
, bool cc_may_kill
,
2072 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
2073 const uint8_t cmd_len
= 9;
2074 const int num_samples
= 1;
2075 const struct ilo_shader_cso
*fs_cso
;
2076 uint32_t dw2
, dw4
, dw5
, dw6
;
2078 ILO_GPE_VALID_GEN(dev
, 6, 6);
2081 /* see brwCreateContext() */
2082 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
2084 ilo_cp_begin(cp
, cmd_len
);
2085 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2086 ilo_cp_write(cp
, 0);
2087 ilo_cp_write(cp
, 0);
2088 ilo_cp_write(cp
, 0);
2089 ilo_cp_write(cp
, 0);
2090 /* honor the valid range even if dispatching is disabled */
2091 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
);
2092 ilo_cp_write(cp
, 0);
2093 ilo_cp_write(cp
, 0);
2094 ilo_cp_write(cp
, 0);
2100 fs_cso
= ilo_shader_get_kernel_cso(fs
);
2101 dw2
= fs_cso
->payload
[0];
2102 dw4
= fs_cso
->payload
[1];
2103 dw5
= fs_cso
->payload
[2];
2104 dw6
= fs_cso
->payload
[3];
2106 dw2
|= (num_samplers
+ 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT
;
2109 dw4
|= GEN6_WM_STATISTICS_ENABLE
;
2113 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
2115 * "This bit (Statistics Enable) must be disabled if either of these
2116 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
2117 * Resolve Enable or Depth Buffer Resolve Enable."
2119 dw4
|= GEN6_WM_DEPTH_CLEAR
;
2120 dw4
|= GEN6_WM_DEPTH_RESOLVE
;
2121 dw4
|= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE
;
2125 dw5
|= GEN6_WM_KILL_ENABLE
|
2126 GEN6_WM_DISPATCH_ENABLE
;
2130 dw5
|= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE
;
2132 dw5
|= rasterizer
->wm
.payload
[0];
2134 dw6
|= rasterizer
->wm
.payload
[1];
2136 if (num_samples
> 1) {
2137 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
2138 rasterizer
->wm
.dw_msaa_disp
;
2141 ilo_cp_begin(cp
, cmd_len
);
2142 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2143 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
2144 ilo_cp_write(cp
, dw2
);
2145 ilo_cp_write(cp
, 0); /* scratch */
2146 ilo_cp_write(cp
, dw4
);
2147 ilo_cp_write(cp
, dw5
);
2148 ilo_cp_write(cp
, dw6
);
2149 ilo_cp_write(cp
, 0); /* kernel 1 */
2150 ilo_cp_write(cp
, 0); /* kernel 2 */
2155 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
2156 const uint32_t *bufs
, const int *sizes
,
2157 int num_bufs
, int max_read_length
,
2158 uint32_t *dw
, int num_dwords
)
2160 unsigned enabled
= 0x0;
2161 int total_read_length
, i
;
2163 assert(num_dwords
== 4);
2165 total_read_length
= 0;
2166 for (i
= 0; i
< 4; i
++) {
2167 if (i
< num_bufs
&& sizes
[i
]) {
2168 /* in 256-bit units minus one */
2169 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
2171 assert(bufs
[i
] % 32 == 0);
2172 assert(read_len
< 32);
2175 dw
[i
] = bufs
[i
] | read_len
;
2177 total_read_length
+= read_len
+ 1;
2184 assert(total_read_length
<= max_read_length
);
2190 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
2191 const uint32_t *bufs
, const int *sizes
,
2195 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
2196 const uint8_t cmd_len
= 5;
2197 uint32_t buf_dw
[4], buf_enabled
;
2199 ILO_GPE_VALID_GEN(dev
, 6, 6);
2200 assert(num_bufs
<= 4);
2203 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
2205 * "The sum of all four read length fields (each incremented to
2206 * represent the actual read length) must be less than or equal to 32"
2208 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2209 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
2211 ilo_cp_begin(cp
, cmd_len
);
2212 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2213 ilo_cp_write(cp
, buf_dw
[0]);
2214 ilo_cp_write(cp
, buf_dw
[1]);
2215 ilo_cp_write(cp
, buf_dw
[2]);
2216 ilo_cp_write(cp
, buf_dw
[3]);
2221 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
2222 const uint32_t *bufs
, const int *sizes
,
2226 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
2227 const uint8_t cmd_len
= 5;
2228 uint32_t buf_dw
[4], buf_enabled
;
2230 ILO_GPE_VALID_GEN(dev
, 6, 6);
2231 assert(num_bufs
<= 4);
2234 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
2236 * "The sum of all four read length fields (each incremented to
2237 * represent the actual read length) must be less than or equal to 64"
2239 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2240 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
2242 ilo_cp_begin(cp
, cmd_len
);
2243 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2244 ilo_cp_write(cp
, buf_dw
[0]);
2245 ilo_cp_write(cp
, buf_dw
[1]);
2246 ilo_cp_write(cp
, buf_dw
[2]);
2247 ilo_cp_write(cp
, buf_dw
[3]);
2252 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
2253 const uint32_t *bufs
, const int *sizes
,
2257 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
2258 const uint8_t cmd_len
= 5;
2259 uint32_t buf_dw
[4], buf_enabled
;
2261 ILO_GPE_VALID_GEN(dev
, 6, 6);
2262 assert(num_bufs
<= 4);
2265 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
2267 * "The sum of all four read length fields (each incremented to
2268 * represent the actual read length) must be less than or equal to 64"
2270 buf_enabled
= gen6_fill_3dstate_constant(dev
,
2271 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
2273 ilo_cp_begin(cp
, cmd_len
);
2274 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
2275 ilo_cp_write(cp
, buf_dw
[0]);
2276 ilo_cp_write(cp
, buf_dw
[1]);
2277 ilo_cp_write(cp
, buf_dw
[2]);
2278 ilo_cp_write(cp
, buf_dw
[3]);
2283 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
2284 unsigned sample_mask
,
2287 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
2288 const uint8_t cmd_len
= 2;
2289 const unsigned valid_mask
= 0xf;
2291 ILO_GPE_VALID_GEN(dev
, 6, 6);
2293 sample_mask
&= valid_mask
;
2295 ilo_cp_begin(cp
, cmd_len
);
2296 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2297 ilo_cp_write(cp
, sample_mask
);
2302 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
2303 unsigned x
, unsigned y
,
2304 unsigned width
, unsigned height
,
2307 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
2308 const uint8_t cmd_len
= 4;
2309 unsigned xmax
= x
+ width
- 1;
2310 unsigned ymax
= y
+ height
- 1;
2313 ILO_GPE_VALID_GEN(dev
, 6, 7);
2315 if (dev
->gen
>= ILO_GEN(7)) {
2320 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
2322 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
2323 * must be an even number"
2330 if (x
> rect_limit
) x
= rect_limit
;
2331 if (y
> rect_limit
) y
= rect_limit
;
2332 if (xmax
> rect_limit
) xmax
= rect_limit
;
2333 if (ymax
> rect_limit
) ymax
= rect_limit
;
2335 ilo_cp_begin(cp
, cmd_len
);
2336 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2337 ilo_cp_write(cp
, y
<< 16 | x
);
2338 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
2341 * There is no need to set the origin. It is intended to support front
2344 ilo_cp_write(cp
, 0);
2349 struct ilo_zs_surface_info
{
2354 struct intel_bo
*bo
;
2356 enum intel_tiling_mode tiling
;
2360 unsigned width
, height
, depth
;
2361 unsigned lod
, first_layer
, num_layers
;
2362 uint32_t x_offset
, y_offset
;
2366 zs_init_info_null(const struct ilo_dev_info
*dev
,
2367 struct ilo_zs_surface_info
*info
)
2369 ILO_GPE_VALID_GEN(dev
, 6, 7);
2371 memset(info
, 0, sizeof(*info
));
2373 info
->surface_type
= BRW_SURFACE_NULL
;
2374 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2378 info
->num_layers
= 1;
2382 zs_init_info(const struct ilo_dev_info
*dev
,
2383 const struct ilo_texture
*tex
,
2384 enum pipe_format format
,
2386 unsigned first_layer
, unsigned num_layers
,
2387 struct ilo_zs_surface_info
*info
)
2389 const bool rebase_layer
= true;
2390 struct intel_bo
* const hiz_bo
= NULL
;
2391 bool separate_stencil
;
2392 uint32_t x_offset
[3], y_offset
[3];
2394 ILO_GPE_VALID_GEN(dev
, 6, 7);
2396 memset(info
, 0, sizeof(*info
));
2398 info
->surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
2400 if (info
->surface_type
== BRW_SURFACE_CUBE
) {
2402 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
2404 * "For Other Surfaces (Cube Surfaces):
2405 * This field (Minimum Array Element) is ignored."
2407 * "For Other Surfaces (Cube Surfaces):
2408 * This field (Render Target View Extent) is ignored."
2410 * As such, we cannot set first_layer and num_layers on cube surfaces.
2411 * To work around that, treat it as a 2D surface.
2413 info
->surface_type
= BRW_SURFACE_2D
;
2416 if (dev
->gen
>= ILO_GEN(7)) {
2417 separate_stencil
= true;
2421 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2423 * "This field (Separate Stencil Buffer Enable) must be set to the
2424 * same value (enabled or disabled) as Hierarchical Depth Buffer
2427 separate_stencil
= (hiz_bo
!= NULL
);
2431 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
2433 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
2434 * Surface Format of the depth buffer cannot be
2435 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
2436 * requires the separate stencil buffer."
2438 * From the Ironlake PRM, volume 2 part 1, page 330:
2440 * "If this field (Separate Stencil Buffer Enable) is disabled, the
2441 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
2443 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
2444 * is indeed used, the depth values output by the fragment shaders will
2445 * be different when read back.
2447 * As for GEN7+, separate_stencil is always true.
2450 case PIPE_FORMAT_Z16_UNORM
:
2451 info
->format
= BRW_DEPTHFORMAT_D16_UNORM
;
2453 case PIPE_FORMAT_Z32_FLOAT
:
2454 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2456 case PIPE_FORMAT_Z24X8_UNORM
:
2457 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
2458 info
->format
= (separate_stencil
) ?
2459 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
:
2460 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
2462 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
2463 info
->format
= (separate_stencil
) ?
2464 BRW_DEPTHFORMAT_D32_FLOAT
:
2465 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
2467 case PIPE_FORMAT_S8_UINT
:
2468 if (separate_stencil
) {
2469 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
2474 assert(!"unsupported depth/stencil format");
2475 zs_init_info_null(dev
, info
);
2480 if (format
!= PIPE_FORMAT_S8_UINT
) {
2481 info
->zs
.bo
= tex
->bo
;
2482 info
->zs
.stride
= tex
->bo_stride
;
2483 info
->zs
.tiling
= tex
->tiling
;
2486 info
->zs
.offset
= ilo_texture_get_slice_offset(tex
,
2487 level
, first_layer
, &x_offset
[0], &y_offset
[0]);
2491 if (tex
->separate_s8
|| format
== PIPE_FORMAT_S8_UINT
) {
2492 const struct ilo_texture
*s8_tex
=
2493 (tex
->separate_s8
) ? tex
->separate_s8
: tex
;
2495 info
->stencil
.bo
= s8_tex
->bo
;
2498 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
2500 * "The pitch must be set to 2x the value computed based on width,
2501 * as the stencil buffer is stored with two rows interleaved."
2503 * According to the classic driver, we need to do the same for GEN7+
2504 * even though the Ivy Bridge PRM does not say anything about it.
2506 info
->stencil
.stride
= s8_tex
->bo_stride
* 2;
2508 info
->stencil
.tiling
= s8_tex
->tiling
;
2511 info
->stencil
.offset
= ilo_texture_get_slice_offset(s8_tex
,
2512 level
, first_layer
, &x_offset
[1], &y_offset
[1]);
2517 info
->hiz
.bo
= hiz_bo
;
2518 info
->hiz
.stride
= 0;
2519 info
->hiz
.tiling
= 0;
2520 info
->hiz
.offset
= 0;
2525 info
->width
= tex
->base
.width0
;
2526 info
->height
= tex
->base
.height0
;
2527 info
->depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
2528 tex
->base
.depth0
: num_layers
;
2531 info
->first_layer
= first_layer
;
2532 info
->num_layers
= num_layers
;
2535 /* the size of the layer */
2536 info
->width
= u_minify(info
->width
, level
);
2537 info
->height
= u_minify(info
->height
, level
);
2538 if (info
->surface_type
== BRW_SURFACE_3D
)
2539 info
->depth
= u_minify(info
->depth
, level
);
2543 /* no layered rendering */
2544 assert(num_layers
== 1);
2547 info
->first_layer
= 0;
2548 info
->num_layers
= 1;
2550 /* all three share the same X/Y offsets */
2552 if (info
->stencil
.bo
) {
2553 assert(x_offset
[0] == x_offset
[1]);
2554 assert(y_offset
[0] == y_offset
[1]);
2557 info
->x_offset
= x_offset
[0];
2558 info
->y_offset
= y_offset
[0];
2561 assert(info
->stencil
.bo
);
2563 info
->x_offset
= x_offset
[1];
2564 info
->y_offset
= y_offset
[1];
2568 assert(info
->x_offset
== x_offset
[2]);
2569 assert(info
->y_offset
== y_offset
[2]);
2573 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
2575 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
2576 * Coordinate Offset X) must be zero to ensure correct alignment"
2578 * XXX Skip the check for gen6, which seems to be fine. We need to make
2579 * sure that does not happen eventually.
2581 if (dev
->gen
>= ILO_GEN(7)) {
2582 assert((info
->x_offset
& 7) == 0 && (info
->y_offset
& 7) == 0);
2583 info
->x_offset
&= ~7;
2584 info
->y_offset
&= ~7;
2587 info
->width
+= info
->x_offset
;
2588 info
->height
+= info
->y_offset
;
2590 /* we have to treat them as 2D surfaces */
2591 if (info
->surface_type
== BRW_SURFACE_CUBE
) {
2592 assert(tex
->base
.width0
== tex
->base
.height0
);
2593 /* we will set slice_offset to point to the single face */
2594 info
->surface_type
= BRW_SURFACE_2D
;
2596 else if (info
->surface_type
== BRW_SURFACE_1D
&& info
->height
> 1) {
2597 assert(tex
->base
.height0
== 1);
2598 info
->surface_type
= BRW_SURFACE_2D
;
2604 ilo_gpe_init_zs_surface(const struct ilo_dev_info
*dev
,
2605 const struct ilo_texture
*tex
,
2606 enum pipe_format format
,
2608 unsigned first_layer
, unsigned num_layers
,
2609 struct ilo_zs_surface
*zs
)
2611 const int max_2d_size
= (dev
->gen
>= ILO_GEN(7)) ? 16384 : 8192;
2612 const int max_array_size
= (dev
->gen
>= ILO_GEN(7)) ? 2048 : 512;
2613 struct ilo_zs_surface_info info
;
2614 uint32_t dw1
, dw2
, dw3
, dw4
, dw5
, dw6
;
2616 ILO_GPE_VALID_GEN(dev
, 6, 7);
2619 zs_init_info(dev
, tex
, format
, level
, first_layer
, num_layers
, &info
);
2621 zs_init_info_null(dev
, &info
);
2623 switch (info
.surface_type
) {
2624 case BRW_SURFACE_NULL
:
2626 case BRW_SURFACE_1D
:
2627 assert(info
.width
<= max_2d_size
&& info
.height
== 1 &&
2628 info
.depth
<= max_array_size
);
2629 assert(info
.first_layer
< max_array_size
- 1 &&
2630 info
.num_layers
<= max_array_size
);
2632 case BRW_SURFACE_2D
:
2633 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
2634 info
.depth
<= max_array_size
);
2635 assert(info
.first_layer
< max_array_size
- 1 &&
2636 info
.num_layers
<= max_array_size
);
2638 case BRW_SURFACE_3D
:
2639 assert(info
.width
<= 2048 && info
.height
<= 2048 && info
.depth
<= 2048);
2640 assert(info
.first_layer
< 2048 && info
.num_layers
<= max_array_size
);
2641 assert(info
.x_offset
== 0 && info
.y_offset
== 0);
2643 case BRW_SURFACE_CUBE
:
2644 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
2646 assert(info
.first_layer
== 0 && info
.num_layers
== 1);
2647 assert(info
.width
== info
.height
);
2648 assert(info
.x_offset
== 0 && info
.y_offset
== 0);
2651 assert(!"unexpected depth surface type");
2655 dw1
= info
.surface_type
<< 29 |
2659 /* required for GEN6+ */
2660 assert(info
.zs
.tiling
== INTEL_TILING_Y
);
2661 assert(info
.zs
.stride
> 0 && info
.zs
.stride
< 128 * 1024 &&
2662 info
.zs
.stride
% 128 == 0);
2663 assert(info
.width
<= info
.zs
.stride
);
2665 dw1
|= (info
.zs
.stride
- 1);
2666 dw2
= info
.zs
.offset
;
2672 if (dev
->gen
>= ILO_GEN(7)) {
2676 if (info
.stencil
.bo
)
2682 dw3
= (info
.height
- 1) << 18 |
2683 (info
.width
- 1) << 4 |
2686 dw4
= (info
.depth
- 1) << 21 |
2687 info
.first_layer
<< 10;
2689 dw5
= info
.y_offset
<< 16 | info
.x_offset
;
2691 dw6
= (info
.num_layers
- 1) << 21;
2694 /* always Y-tiled */
2703 dw3
= (info
.height
- 1) << 19 |
2704 (info
.width
- 1) << 6 |
2706 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< 1;
2708 dw4
= (info
.depth
- 1) << 21 |
2709 info
.first_layer
<< 10 |
2710 (info
.num_layers
- 1) << 1;
2712 dw5
= info
.y_offset
<< 16 | info
.x_offset
;
2717 STATIC_ASSERT(Elements(zs
->payload
) >= 10);
2719 zs
->payload
[0] = dw1
;
2720 zs
->payload
[1] = dw2
;
2721 zs
->payload
[2] = dw3
;
2722 zs
->payload
[3] = dw4
;
2723 zs
->payload
[4] = dw5
;
2724 zs
->payload
[5] = dw6
;
2726 /* do not increment reference count */
2727 zs
->bo
= info
.zs
.bo
;
2729 /* separate stencil */
2730 if (info
.stencil
.bo
) {
2731 assert(info
.stencil
.stride
> 0 && info
.stencil
.stride
< 128 * 1024 &&
2732 info
.stencil
.stride
% 128 == 0);
2734 zs
->payload
[6] = info
.stencil
.stride
- 1;
2735 zs
->payload
[7] = info
.stencil
.offset
;
2737 /* do not increment reference count */
2738 zs
->separate_s8_bo
= info
.stencil
.bo
;
2743 zs
->separate_s8_bo
= NULL
;
2748 zs
->payload
[8] = info
.hiz
.stride
- 1;
2749 zs
->payload
[9] = info
.hiz
.offset
;
2751 /* do not increment reference count */
2752 zs
->hiz_bo
= info
.hiz
.bo
;
2762 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
2763 const struct ilo_zs_surface
*zs
,
2766 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2767 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
2768 const uint8_t cmd_len
= 7;
2770 ILO_GPE_VALID_GEN(dev
, 6, 7);
2772 ilo_cp_begin(cp
, cmd_len
);
2773 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2774 ilo_cp_write(cp
, zs
->payload
[0]);
2775 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
,
2776 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2777 ilo_cp_write(cp
, zs
->payload
[2]);
2778 ilo_cp_write(cp
, zs
->payload
[3]);
2779 ilo_cp_write(cp
, zs
->payload
[4]);
2780 ilo_cp_write(cp
, zs
->payload
[5]);
2785 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
2786 int x_offset
, int y_offset
,
2789 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
2790 const uint8_t cmd_len
= 2;
2792 ILO_GPE_VALID_GEN(dev
, 6, 7);
2793 assert(x_offset
>= 0 && x_offset
<= 31);
2794 assert(y_offset
>= 0 && y_offset
<= 31);
2796 ilo_cp_begin(cp
, cmd_len
);
2797 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2798 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
2803 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
2804 const struct pipe_poly_stipple
*pattern
,
2807 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
2808 const uint8_t cmd_len
= 33;
2811 ILO_GPE_VALID_GEN(dev
, 6, 7);
2812 assert(Elements(pattern
->stipple
) == 32);
2814 ilo_cp_begin(cp
, cmd_len
);
2815 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2816 for (i
= 0; i
< 32; i
++)
2817 ilo_cp_write(cp
, pattern
->stipple
[i
]);
2822 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
2823 unsigned pattern
, unsigned factor
,
2826 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
2827 const uint8_t cmd_len
= 3;
2830 ILO_GPE_VALID_GEN(dev
, 6, 7);
2831 assert((pattern
& 0xffff) == pattern
);
2832 assert(factor
>= 1 && factor
<= 256);
2834 ilo_cp_begin(cp
, cmd_len
);
2835 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2836 ilo_cp_write(cp
, pattern
);
2838 if (dev
->gen
>= ILO_GEN(7)) {
2840 inverse
= (unsigned) (65536.0f
/ factor
);
2841 ilo_cp_write(cp
, inverse
<< 15 | factor
);
2845 inverse
= (unsigned) (8192.0f
/ factor
);
2846 ilo_cp_write(cp
, inverse
<< 16 | factor
);
2853 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
2856 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
2857 const uint8_t cmd_len
= 3;
2859 ILO_GPE_VALID_GEN(dev
, 6, 7);
2861 ilo_cp_begin(cp
, cmd_len
);
2862 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2863 ilo_cp_write(cp
, 0 << 16 | 0);
2864 ilo_cp_write(cp
, 0 << 16 | 0);
2869 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
2870 int index
, unsigned svbi
,
2872 bool load_vertex_count
,
2875 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
2876 const uint8_t cmd_len
= 4;
2879 ILO_GPE_VALID_GEN(dev
, 6, 6);
2880 assert(index
>= 0 && index
< 4);
2882 dw1
= index
<< SVB_INDEX_SHIFT
;
2883 if (load_vertex_count
)
2884 dw1
|= SVB_LOAD_INTERNAL_VERTEX_COUNT
;
2886 ilo_cp_begin(cp
, cmd_len
);
2887 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2888 ilo_cp_write(cp
, dw1
);
2889 ilo_cp_write(cp
, svbi
);
2890 ilo_cp_write(cp
, max_svbi
);
2895 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
2897 const uint32_t *packed_sample_pos
,
2898 bool pixel_location_center
,
2901 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
2902 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
2903 uint32_t dw1
, dw2
, dw3
;
2905 ILO_GPE_VALID_GEN(dev
, 6, 7);
2907 dw1
= (pixel_location_center
) ?
2908 MS_PIXEL_LOCATION_CENTER
: MS_PIXEL_LOCATION_UPPER_LEFT
;
2910 switch (num_samples
) {
2913 dw1
|= MS_NUMSAMPLES_1
;
2918 dw1
|= MS_NUMSAMPLES_4
;
2919 dw2
= packed_sample_pos
[0];
2923 assert(dev
->gen
>= ILO_GEN(7));
2924 dw1
|= MS_NUMSAMPLES_8
;
2925 dw2
= packed_sample_pos
[0];
2926 dw3
= packed_sample_pos
[1];
2929 assert(!"unsupported sample count");
2930 dw1
|= MS_NUMSAMPLES_1
;
2936 ilo_cp_begin(cp
, cmd_len
);
2937 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2938 ilo_cp_write(cp
, dw1
);
2939 ilo_cp_write(cp
, dw2
);
2940 if (dev
->gen
>= ILO_GEN(7))
2941 ilo_cp_write(cp
, dw3
);
2946 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
2947 const struct ilo_zs_surface
*zs
,
2950 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2951 ILO_GPE_CMD(0x3, 0x0, 0x06) :
2952 ILO_GPE_CMD(0x3, 0x1, 0x0e);
2953 const uint8_t cmd_len
= 3;
2955 ILO_GPE_VALID_GEN(dev
, 6, 7);
2957 ilo_cp_begin(cp
, cmd_len
);
2958 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2959 /* see ilo_gpe_init_zs_surface() */
2960 ilo_cp_write(cp
, zs
->payload
[6]);
2961 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
,
2962 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2967 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
2968 const struct ilo_zs_surface
*zs
,
2971 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
2972 ILO_GPE_CMD(0x3, 0x0, 0x07) :
2973 ILO_GPE_CMD(0x3, 0x1, 0x0f);
2974 const uint8_t cmd_len
= 3;
2976 ILO_GPE_VALID_GEN(dev
, 6, 7);
2978 ilo_cp_begin(cp
, cmd_len
);
2979 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
2980 /* see ilo_gpe_init_zs_surface() */
2981 ilo_cp_write(cp
, zs
->payload
[8]);
2982 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
,
2983 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
2988 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
2992 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
2993 const uint8_t cmd_len
= 2;
2995 ILO_GPE_VALID_GEN(dev
, 6, 6);
2997 ilo_cp_begin(cp
, cmd_len
);
2998 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
2999 GEN5_DEPTH_CLEAR_VALID
);
3000 ilo_cp_write(cp
, clear_val
);
3005 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
3007 struct intel_bo
*bo
, uint32_t bo_offset
,
3011 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
3012 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
3013 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
3014 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
3016 ILO_GPE_VALID_GEN(dev
, 6, 7);
3018 if (dw1
& PIPE_CONTROL_CS_STALL
) {
3020 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
3022 * "1 of the following must also be set (when CS stall is set):
3024 * * Depth Cache Flush Enable ([0] of DW1)
3025 * * Stall at Pixel Scoreboard ([1] of DW1)
3026 * * Depth Stall ([13] of DW1)
3027 * * Post-Sync Operation ([13] of DW1)
3028 * * Render Target Cache Flush Enable ([12] of DW1)
3029 * * Notify Enable ([8] of DW1)"
3031 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
3033 * "One of the following must also be set (when CS stall is set):
3035 * * Render Target Cache Flush Enable ([12] of DW1)
3036 * * Depth Cache Flush Enable ([0] of DW1)
3037 * * Stall at Pixel Scoreboard ([1] of DW1)
3038 * * Depth Stall ([13] of DW1)
3039 * * Post-Sync Operation ([13] of DW1)"
3041 uint32_t bit_test
= PIPE_CONTROL_WRITE_FLUSH
|
3042 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
3043 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
3044 PIPE_CONTROL_DEPTH_STALL
;
3047 bit_test
|= PIPE_CONTROL_WRITE_IMMEDIATE
|
3048 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
3049 PIPE_CONTROL_WRITE_TIMESTAMP
;
3051 if (dev
->gen
== ILO_GEN(6))
3052 bit_test
|= PIPE_CONTROL_INTERRUPT_ENABLE
;
3054 assert(dw1
& bit_test
);
3057 if (dw1
& PIPE_CONTROL_DEPTH_STALL
) {
3059 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
3061 * "Following bits must be clear (when Depth Stall is set):
3063 * * Render Target Cache Flush Enable ([12] of DW1)
3064 * * Depth Cache Flush Enable ([0] of DW1)"
3066 assert(!(dw1
& (PIPE_CONTROL_WRITE_FLUSH
|
3067 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
3070 ilo_cp_begin(cp
, cmd_len
);
3071 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
3072 ilo_cp_write(cp
, dw1
);
3073 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
3074 ilo_cp_write(cp
, 0);
3076 ilo_cp_write(cp
, 0);
3081 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
3082 const struct pipe_draw_info
*info
,
3083 const struct ilo_ib_state
*ib
,
3087 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
3088 const uint8_t cmd_len
= 6;
3089 const int prim
= (rectlist
) ?
3090 _3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
3091 const int vb_access
= (info
->indexed
) ?
3092 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM
:
3093 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
;
3094 const uint32_t vb_start
= info
->start
+
3095 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
3097 ILO_GPE_VALID_GEN(dev
, 6, 6);
3099 ilo_cp_begin(cp
, cmd_len
);
3100 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
3101 prim
<< GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT
|
3103 ilo_cp_write(cp
, info
->count
);
3104 ilo_cp_write(cp
, vb_start
);
3105 ilo_cp_write(cp
, info
->instance_count
);
3106 ilo_cp_write(cp
, info
->start_instance
);
3107 ilo_cp_write(cp
, info
->index_bias
);
3112 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
3113 const struct ilo_shader_state
**cs
,
3114 uint32_t *sampler_state
,
3116 uint32_t *binding_table_state
,
3122 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
3124 * "(Interface Descriptor Total Length) This field must have the same
3125 * alignment as the Interface Descriptor Data Start Address.
3127 * It must be DQWord (32-byte) aligned..."
3129 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
3131 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
3132 * aligned address of the Interface Descriptor data."
3134 const int state_align
= 32 / 4;
3135 const int state_len
= (32 / 4) * num_ids
;
3136 uint32_t state_offset
, *dw
;
3139 ILO_GPE_VALID_GEN(dev
, 6, 6);
3141 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
3142 state_len
, state_align
, &state_offset
);
3144 for (i
= 0; i
< num_ids
; i
++) {
3145 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
3146 dw
[1] = 1 << 18; /* SPF */
3147 dw
[2] = sampler_state
[i
] |
3148 (num_samplers
[i
] + 3) / 4 << 2;
3149 dw
[3] = binding_table_state
[i
] |
3151 dw
[4] = 0 << 16 | /* CURBE Read Length */
3152 0; /* CURBE Read Offset */
3153 dw
[5] = 0; /* Barrier ID */
3160 return state_offset
;
3164 viewport_get_guardband(const struct ilo_dev_info
*dev
,
3165 int center_x
, int center_y
,
3166 int *min_gbx
, int *max_gbx
,
3167 int *min_gby
, int *max_gby
)
3170 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
3172 * "Per-Device Guardband Extents
3174 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
3175 * - Maximum Post-Clamp Delta (X or Y): 16K"
3177 * "In addition, in order to be correctly rendered, objects must have a
3178 * screenspace bounding box not exceeding 8K in the X or Y direction.
3179 * This additional restriction must also be comprehended by software,
3180 * i.e., enforced by use of clipping."
3182 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
3184 * "Per-Device Guardband Extents
3186 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
3187 * - Maximum Post-Clamp Delta (X or Y): N/A"
3189 * "In addition, in order to be correctly rendered, objects must have a
3190 * screenspace bounding box not exceeding 8K in the X or Y direction.
3191 * This additional restriction must also be comprehended by software,
3192 * i.e., enforced by use of clipping."
3194 * Combined, the bounding box of any object can not exceed 8K in both
3197 * Below we set the guardband as a squre of length 8K, centered at where
3198 * the viewport is. This makes sure all objects passing the GB test are
3199 * valid to the renderer, and those failing the XY clipping have a
3200 * better chance of passing the GB test.
3202 const int max_extent
= (dev
->gen
>= ILO_GEN(7)) ? 32768 : 16384;
3203 const int half_len
= 8192 / 2;
3205 /* make sure the guardband is within the valid range */
3206 if (center_x
- half_len
< -max_extent
)
3207 center_x
= -max_extent
+ half_len
;
3208 else if (center_x
+ half_len
> max_extent
- 1)
3209 center_x
= max_extent
- half_len
;
3211 if (center_y
- half_len
< -max_extent
)
3212 center_y
= -max_extent
+ half_len
;
3213 else if (center_y
+ half_len
> max_extent
- 1)
3214 center_y
= max_extent
- half_len
;
3216 *min_gbx
= (float) (center_x
- half_len
);
3217 *max_gbx
= (float) (center_x
+ half_len
);
3218 *min_gby
= (float) (center_y
- half_len
);
3219 *max_gby
= (float) (center_y
+ half_len
);
3223 ilo_gpe_set_viewport_cso(const struct ilo_dev_info
*dev
,
3224 const struct pipe_viewport_state
*state
,
3225 struct ilo_viewport_cso
*vp
)
3227 const float scale_x
= fabs(state
->scale
[0]);
3228 const float scale_y
= fabs(state
->scale
[1]);
3229 const float scale_z
= fabs(state
->scale
[2]);
3230 int min_gbx
, max_gbx
, min_gby
, max_gby
;
3232 ILO_GPE_VALID_GEN(dev
, 6, 7);
3234 viewport_get_guardband(dev
,
3235 (int) state
->translate
[0],
3236 (int) state
->translate
[1],
3237 &min_gbx
, &max_gbx
, &min_gby
, &max_gby
);
3240 vp
->m00
= state
->scale
[0];
3241 vp
->m11
= state
->scale
[1];
3242 vp
->m22
= state
->scale
[2];
3243 vp
->m30
= state
->translate
[0];
3244 vp
->m31
= state
->translate
[1];
3245 vp
->m32
= state
->translate
[2];
3247 /* guardband in NDC space */
3248 vp
->min_gbx
= ((float) min_gbx
- state
->translate
[0]) / scale_x
;
3249 vp
->max_gbx
= ((float) max_gbx
- state
->translate
[0]) / scale_x
;
3250 vp
->min_gby
= ((float) min_gby
- state
->translate
[1]) / scale_y
;
3251 vp
->max_gby
= ((float) max_gby
- state
->translate
[1]) / scale_y
;
3253 /* viewport in screen space */
3254 vp
->min_x
= scale_x
* -1.0f
+ state
->translate
[0];
3255 vp
->max_x
= scale_x
* 1.0f
+ state
->translate
[0];
3256 vp
->min_y
= scale_y
* -1.0f
+ state
->translate
[1];
3257 vp
->max_y
= scale_y
* 1.0f
+ state
->translate
[1];
3258 vp
->min_z
= scale_z
* -1.0f
+ state
->translate
[2];
3259 vp
->max_z
= scale_z
* 1.0f
+ state
->translate
[2];
3263 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
3264 const struct ilo_viewport_cso
*viewports
,
3265 unsigned num_viewports
,
3268 const int state_align
= 32 / 4;
3269 const int state_len
= 8 * num_viewports
;
3270 uint32_t state_offset
, *dw
;
3273 ILO_GPE_VALID_GEN(dev
, 6, 6);
3276 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
3278 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
3279 * stored as an array of up to 16 elements..."
3281 assert(num_viewports
&& num_viewports
<= 16);
3283 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
3284 state_len
, state_align
, &state_offset
);
3286 for (i
= 0; i
< num_viewports
; i
++) {
3287 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
3289 dw
[0] = fui(vp
->m00
);
3290 dw
[1] = fui(vp
->m11
);
3291 dw
[2] = fui(vp
->m22
);
3292 dw
[3] = fui(vp
->m30
);
3293 dw
[4] = fui(vp
->m31
);
3294 dw
[5] = fui(vp
->m32
);
3301 return state_offset
;
3305 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
3306 const struct ilo_viewport_cso
*viewports
,
3307 unsigned num_viewports
,
3310 const int state_align
= 32 / 4;
3311 const int state_len
= 4 * num_viewports
;
3312 uint32_t state_offset
, *dw
;
3315 ILO_GPE_VALID_GEN(dev
, 6, 6);
3318 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
3320 * "The viewport-related state is stored as an array of up to 16
3323 assert(num_viewports
&& num_viewports
<= 16);
3325 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
3326 state_len
, state_align
, &state_offset
);
3328 for (i
= 0; i
< num_viewports
; i
++) {
3329 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
3331 dw
[0] = fui(vp
->min_gbx
);
3332 dw
[1] = fui(vp
->max_gbx
);
3333 dw
[2] = fui(vp
->min_gby
);
3334 dw
[3] = fui(vp
->max_gby
);
3339 return state_offset
;
3343 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
3344 const struct ilo_viewport_cso
*viewports
,
3345 unsigned num_viewports
,
3348 const int state_align
= 32 / 4;
3349 const int state_len
= 2 * num_viewports
;
3350 uint32_t state_offset
, *dw
;
3353 ILO_GPE_VALID_GEN(dev
, 6, 7);
3356 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
3358 * "The viewport state is stored as an array of up to 16 elements..."
3360 assert(num_viewports
&& num_viewports
<= 16);
3362 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
3363 state_len
, state_align
, &state_offset
);
3365 for (i
= 0; i
< num_viewports
; i
++) {
3366 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
3368 dw
[0] = fui(vp
->min_z
);
3369 dw
[1] = fui(vp
->max_z
);
3374 return state_offset
;
3378 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
3379 const struct pipe_stencil_ref
*stencil_ref
,
3381 const struct pipe_blend_color
*blend_color
,
3384 const int state_align
= 64 / 4;
3385 const int state_len
= 6;
3386 uint32_t state_offset
, *dw
;
3388 ILO_GPE_VALID_GEN(dev
, 6, 7);
3390 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
3391 state_len
, state_align
, &state_offset
);
3393 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
3394 stencil_ref
->ref_value
[1] << 16 |
3395 BRW_ALPHATEST_FORMAT_UNORM8
;
3396 dw
[1] = float_to_ubyte(alpha_ref
);
3397 dw
[2] = fui(blend_color
->color
[0]);
3398 dw
[3] = fui(blend_color
->color
[1]);
3399 dw
[4] = fui(blend_color
->color
[2]);
3400 dw
[5] = fui(blend_color
->color
[3]);
3402 return state_offset
;
3406 gen6_blend_factor_dst_alpha_forced_one(int factor
)
3409 case BRW_BLENDFACTOR_DST_ALPHA
:
3410 return BRW_BLENDFACTOR_ONE
;
3411 case BRW_BLENDFACTOR_INV_DST_ALPHA
:
3412 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
:
3413 return BRW_BLENDFACTOR_ZERO
;
3420 blend_get_rt_blend_enable(const struct ilo_dev_info
*dev
,
3421 const struct pipe_rt_blend_state
*rt
,
3422 bool dst_alpha_forced_one
)
3424 int rgb_src
, rgb_dst
, a_src
, a_dst
;
3427 if (!rt
->blend_enable
)
3430 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
3431 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
3432 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
3433 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
3435 if (dst_alpha_forced_one
) {
3436 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
3437 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
3438 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
3439 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
3443 gen6_translate_pipe_blend(rt
->alpha_func
) << 26 |
3446 gen6_translate_pipe_blend(rt
->rgb_func
) << 11 |
3450 if (rt
->rgb_func
!= rt
->alpha_func
||
3451 rgb_src
!= a_src
|| rgb_dst
!= a_dst
)
3458 ilo_gpe_init_blend(const struct ilo_dev_info
*dev
,
3459 const struct pipe_blend_state
*state
,
3460 struct ilo_blend_state
*blend
)
3462 unsigned num_cso
, i
;
3464 ILO_GPE_VALID_GEN(dev
, 6, 7);
3466 if (state
->independent_blend_enable
) {
3467 num_cso
= Elements(blend
->cso
);
3470 memset(blend
->cso
, 0, sizeof(blend
->cso
));
3474 blend
->independent_blend_enable
= state
->independent_blend_enable
;
3475 blend
->alpha_to_coverage
= state
->alpha_to_coverage
;
3476 blend
->dual_blend
= false;
3478 for (i
= 0; i
< num_cso
; i
++) {
3479 const struct pipe_rt_blend_state
*rt
= &state
->rt
[i
];
3480 struct ilo_blend_cso
*cso
= &blend
->cso
[i
];
3483 cso
->payload
[0] = 0;
3484 cso
->payload
[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT
<< 2 |
3487 if (!(rt
->colormask
& PIPE_MASK_A
))
3488 cso
->payload
[1] |= 1 << 27;
3489 if (!(rt
->colormask
& PIPE_MASK_R
))
3490 cso
->payload
[1] |= 1 << 26;
3491 if (!(rt
->colormask
& PIPE_MASK_G
))
3492 cso
->payload
[1] |= 1 << 25;
3493 if (!(rt
->colormask
& PIPE_MASK_B
))
3494 cso
->payload
[1] |= 1 << 24;
3497 cso
->payload
[1] |= 1 << 12;
3500 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3502 * "Color Buffer Blending and Logic Ops must not be enabled
3503 * simultaneously, or behavior is UNDEFINED."
3505 * Since state->logicop_enable takes precedence over rt->blend_enable,
3506 * no special care is needed.
3508 if (state
->logicop_enable
) {
3509 cso
->dw_logicop
= 1 << 22 |
3510 gen6_translate_pipe_logicop(state
->logicop_func
) << 18;
3513 cso
->dw_blend_dst_alpha_forced_one
= 0;
3518 cso
->dw_logicop
= 0;
3520 cso
->dw_blend
= blend_get_rt_blend_enable(dev
, rt
, false);
3521 cso
->dw_blend_dst_alpha_forced_one
=
3522 blend_get_rt_blend_enable(dev
, rt
, true);
3524 dual_blend
= (rt
->blend_enable
&&
3525 util_blend_state_is_dual(state
, i
));
3528 cso
->dw_alpha_mod
= 0;
3530 if (state
->alpha_to_coverage
) {
3531 cso
->dw_alpha_mod
|= 1 << 31;
3533 if (dev
->gen
>= ILO_GEN(7))
3534 cso
->dw_alpha_mod
|= 1 << 29;
3538 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
3540 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
3541 * must be disabled."
3543 if (state
->alpha_to_one
&& !dual_blend
)
3544 cso
->dw_alpha_mod
|= 1 << 30;
3547 blend
->dual_blend
= true;
3552 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
3553 const struct ilo_blend_state
*blend
,
3554 const struct ilo_fb_state
*fb
,
3555 const struct pipe_alpha_state
*alpha
,
3558 const int state_align
= 64 / 4;
3560 uint32_t state_offset
, *dw
;
3561 unsigned num_targets
, i
;
3563 ILO_GPE_VALID_GEN(dev
, 6, 7);
3566 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
3568 * "The blend state is stored as an array of up to 8 elements..."
3570 num_targets
= fb
->state
.nr_cbufs
;
3571 assert(num_targets
<= 8);
3574 if (!alpha
->enabled
)
3576 /* to be able to reference alpha func */
3580 state_len
= 2 * num_targets
;
3582 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
3583 state_len
, state_align
, &state_offset
);
3585 for (i
= 0; i
< num_targets
; i
++) {
3586 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
3587 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
3588 const int num_samples
= fb
->num_samples
;
3589 const struct util_format_description
*format_desc
=
3590 (idx
< fb
->state
.nr_cbufs
) ?
3591 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
3592 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
3595 rt_is_pure_integer
= false;
3596 rt_dst_alpha_forced_one
= false;
3601 switch (format_desc
->format
) {
3602 case PIPE_FORMAT_B8G8R8X8_UNORM
:
3603 /* force alpha to one when the HW format has alpha */
3604 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
3605 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM
);
3606 rt_dst_alpha_forced_one
= true;
3612 for (ch
= 0; ch
< 4; ch
++) {
3613 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
3616 if (format_desc
->channel
[ch
].pure_integer
) {
3617 rt_is_unorm
= false;
3618 rt_is_pure_integer
= true;
3622 if (!format_desc
->channel
[ch
].normalized
||
3623 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
3624 rt_is_unorm
= false;
3628 dw
[0] = cso
->payload
[0];
3629 dw
[1] = cso
->payload
[1];
3631 if (!rt_is_pure_integer
) {
3632 if (rt_dst_alpha_forced_one
)
3633 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
3635 dw
[0] |= cso
->dw_blend
;
3639 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
3641 * "Logic Ops are only supported on *_UNORM surfaces (excluding
3642 * _SRGB variants), otherwise Logic Ops must be DISABLED."
3644 * Since logicop is ignored for non-UNORM color buffers, no special care
3648 dw
[1] |= cso
->dw_logicop
;
3651 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
3653 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
3654 * Dither both must be disabled."
3656 * There is no such limitation on GEN7, or for AlphaToOne. But GL
3657 * requires that anyway.
3659 if (num_samples
> 1)
3660 dw
[1] |= cso
->dw_alpha_mod
;
3663 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
3665 * "Alpha Test can only be enabled if Pixel Shader outputs a float
3668 if (alpha
->enabled
&& !rt_is_pure_integer
) {
3670 gen6_translate_dsa_func(alpha
->func
) << 13;
3676 return state_offset
;
3680 ilo_gpe_init_dsa(const struct ilo_dev_info
*dev
,
3681 const struct pipe_depth_stencil_alpha_state
*state
,
3682 struct ilo_dsa_state
*dsa
)
3684 const struct pipe_depth_state
*depth
= &state
->depth
;
3685 const struct pipe_stencil_state
*stencil0
= &state
->stencil
[0];
3686 const struct pipe_stencil_state
*stencil1
= &state
->stencil
[1];
3689 ILO_GPE_VALID_GEN(dev
, 6, 7);
3691 /* copy alpha state for later use */
3692 dsa
->alpha
= state
->alpha
;
3694 STATIC_ASSERT(Elements(dsa
->payload
) >= 3);
3698 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
3700 * "If the Depth Buffer is either undefined or does not have a surface
3701 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
3702 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
3704 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
3706 * "This field (Stencil Test Enable) cannot be enabled if
3707 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
3709 * TODO We do not check these yet.
3711 if (stencil0
->enabled
) {
3713 gen6_translate_dsa_func(stencil0
->func
) << 28 |
3714 gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 25 |
3715 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 22 |
3716 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 19;
3717 if (stencil0
->writemask
)
3720 dw
[1] = stencil0
->valuemask
<< 24 |
3721 stencil0
->writemask
<< 16;
3723 if (stencil1
->enabled
) {
3725 gen6_translate_dsa_func(stencil1
->func
) << 12 |
3726 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 9 |
3727 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 6 |
3728 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 3;
3729 if (stencil1
->writemask
)
3732 dw
[1] |= stencil1
->valuemask
<< 8 |
3733 stencil1
->writemask
;
3742 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
3744 * "Enabling the Depth Test function without defining a Depth Buffer is
3747 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
3749 * "A Depth Buffer must be defined before enabling writes to it, or
3750 * operation is UNDEFINED."
3752 * TODO We do not check these yet.
3754 dw
[2] = depth
->enabled
<< 31 |
3755 depth
->writemask
<< 26;
3757 dw
[2] |= gen6_translate_dsa_func(depth
->func
) << 27;
3759 dw
[2] |= BRW_COMPAREFUNCTION_ALWAYS
<< 27;
3763 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
3764 const struct ilo_dsa_state
*dsa
,
3767 const int state_align
= 64 / 4;
3768 const int state_len
= 3;
3769 uint32_t state_offset
, *dw
;
3772 ILO_GPE_VALID_GEN(dev
, 6, 7);
3774 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
3775 state_len
, state_align
, &state_offset
);
3777 dw
[0] = dsa
->payload
[0];
3778 dw
[1] = dsa
->payload
[1];
3779 dw
[2] = dsa
->payload
[2];
3781 return state_offset
;
3785 ilo_gpe_set_scissor(const struct ilo_dev_info
*dev
,
3786 unsigned start_slot
,
3787 unsigned num_states
,
3788 const struct pipe_scissor_state
*states
,
3789 struct ilo_scissor_state
*scissor
)
3793 ILO_GPE_VALID_GEN(dev
, 6, 7);
3795 for (i
= 0; i
< num_states
; i
++) {
3796 uint16_t min_x
, min_y
, max_x
, max_y
;
3798 /* both max and min are inclusive in SCISSOR_RECT */
3799 if (states
[i
].minx
< states
[i
].maxx
&&
3800 states
[i
].miny
< states
[i
].maxy
) {
3801 min_x
= states
[i
].minx
;
3802 min_y
= states
[i
].miny
;
3803 max_x
= states
[i
].maxx
- 1;
3804 max_y
= states
[i
].maxy
- 1;
3807 /* we have to make min greater than max */
3814 scissor
->payload
[(start_slot
+ i
) * 2 + 0] = min_y
<< 16 | min_x
;
3815 scissor
->payload
[(start_slot
+ i
) * 2 + 1] = max_y
<< 16 | max_x
;
3818 if (!start_slot
&& num_states
)
3819 scissor
->scissor0
= states
[0];
3823 ilo_gpe_set_scissor_null(const struct ilo_dev_info
*dev
,
3824 struct ilo_scissor_state
*scissor
)
3828 for (i
= 0; i
< Elements(scissor
->payload
); i
+= 2) {
3829 scissor
->payload
[i
+ 0] = 1 << 16 | 1;
3830 scissor
->payload
[i
+ 1] = 0;
3835 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
3836 const struct ilo_scissor_state
*scissor
,
3837 unsigned num_viewports
,
3840 const int state_align
= 32 / 4;
3841 const int state_len
= 2 * num_viewports
;
3842 uint32_t state_offset
, *dw
;
3844 ILO_GPE_VALID_GEN(dev
, 6, 7);
3847 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
3849 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
3850 * stored as an array of up to 16 elements..."
3852 assert(num_viewports
&& num_viewports
<= 16);
3854 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
3855 state_len
, state_align
, &state_offset
);
3857 memcpy(dw
, scissor
->payload
, state_len
* 4);
3859 return state_offset
;
3863 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
3864 uint32_t *surface_states
,
3865 int num_surface_states
,
3868 const int state_align
= 32 / 4;
3869 const int state_len
= num_surface_states
;
3870 uint32_t state_offset
, *dw
;
3872 ILO_GPE_VALID_GEN(dev
, 6, 7);
3875 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
3877 * "It is stored as an array of up to 256 elements..."
3879 assert(num_surface_states
<= 256);
3881 if (!num_surface_states
)
3884 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
3885 state_len
, state_align
, &state_offset
);
3886 memcpy(dw
, surface_states
,
3887 num_surface_states
* sizeof(surface_states
[0]));
3889 return state_offset
;
3893 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info
*dev
,
3894 unsigned width
, unsigned height
,
3895 unsigned depth
, unsigned level
,
3896 struct ilo_view_surface
*surf
)
3900 ILO_GPE_VALID_GEN(dev
, 6, 6);
3903 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
3905 * "A null surface will be used in instances where an actual surface is
3906 * not bound. When a write message is generated to a null surface, no
3907 * actual surface is written to. When a read message (including any
3908 * sampling engine message) is generated to a null surface, the result
3909 * is all zeros. Note that a null surface type is allowed to be used
3910 * with all messages, even if it is not specificially indicated as
3911 * supported. All of the remaining fields in surface state are ignored
3912 * for null surfaces, with the following exceptions:
3914 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
3915 * depth buffer's corresponding state for all render target
3916 * surfaces, including null.
3917 * * Surface Format must be R8G8B8A8_UNORM."
3919 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
3921 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
3925 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
3928 dw
[0] = BRW_SURFACE_NULL
<< BRW_SURFACE_TYPE_SHIFT
|
3929 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
;
3933 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
3934 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
3935 level
<< BRW_SURFACE_LOD_SHIFT
;
3937 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
3947 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info
*dev
,
3948 const struct ilo_buffer
*buf
,
3949 unsigned offset
, unsigned size
,
3950 unsigned struct_size
,
3951 enum pipe_format elem_format
,
3952 bool is_rt
, bool render_cache_rw
,
3953 struct ilo_view_surface
*surf
)
3955 const int elem_size
= util_format_get_blocksize(elem_format
);
3956 int width
, height
, depth
, pitch
;
3957 int surface_format
, num_entries
;
3960 ILO_GPE_VALID_GEN(dev
, 6, 6);
3963 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
3964 * structure in a buffer.
3967 surface_format
= ilo_translate_color_format(elem_format
);
3969 num_entries
= size
/ struct_size
;
3970 /* see if there is enough space to fit another element */
3971 if (size
% struct_size
>= elem_size
)
3975 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
3977 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
3978 * Address) specifies the base address of first element of the
3979 * surface. The surface is interpreted as a simple array of that
3980 * single element type. The address must be naturally-aligned to the
3981 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
3982 * must be 16-byte aligned).
3984 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
3985 * the base address of the first element of the surface, computed in
3986 * software by adding the surface base address to the byte offset of
3987 * the element in the buffer."
3990 assert(offset
% elem_size
== 0);
3993 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
3995 * "For buffer surfaces, the number of entries in the buffer ranges
3998 assert(num_entries
>= 1 && num_entries
<= 1 << 27);
4001 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4003 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
4004 * indicates the size of the structure."
4006 pitch
= struct_size
;
4011 width
= (num_entries
& 0x0000007f);
4013 height
= (num_entries
& 0x000fff80) >> 7;
4015 depth
= (num_entries
& 0x07f00000) >> 20;
4017 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
4020 dw
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
4021 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
;
4022 if (render_cache_rw
)
4023 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
4027 dw
[2] = height
<< BRW_SURFACE_HEIGHT_SHIFT
|
4028 width
<< BRW_SURFACE_WIDTH_SHIFT
;
4030 dw
[3] = depth
<< BRW_SURFACE_DEPTH_SHIFT
|
4031 pitch
<< BRW_SURFACE_PITCH_SHIFT
;
4036 /* do not increment reference count */
4041 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info
*dev
,
4042 const struct ilo_texture
*tex
,
4043 enum pipe_format format
,
4044 unsigned first_level
,
4045 unsigned num_levels
,
4046 unsigned first_layer
,
4047 unsigned num_layers
,
4048 bool is_rt
, bool render_cache_rw
,
4049 struct ilo_view_surface
*surf
)
4051 int surface_type
, surface_format
;
4052 int width
, height
, depth
, pitch
, lod
;
4053 unsigned layer_offset
, x_offset
, y_offset
;
4056 ILO_GPE_VALID_GEN(dev
, 6, 6);
4058 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
4059 assert(surface_type
!= BRW_SURFACE_BUFFER
);
4061 if (format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&& tex
->separate_s8
)
4062 format
= PIPE_FORMAT_Z32_FLOAT
;
4065 surface_format
= ilo_translate_render_format(format
);
4067 surface_format
= ilo_translate_texture_format(format
);
4068 assert(surface_format
>= 0);
4070 width
= tex
->base
.width0
;
4071 height
= tex
->base
.height0
;
4072 depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
4073 tex
->base
.depth0
: num_layers
;
4074 pitch
= tex
->bo_stride
;
4076 if (surface_type
== BRW_SURFACE_CUBE
) {
4078 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4080 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
4081 * range of this field (Depth) is [0,84], indicating the number of
4082 * cube array elements (equal to the number of underlying 2D array
4083 * elements divided by 6). For other surfaces, this field must be
4086 * When is_rt is true, we treat the texture as a 2D one to avoid the
4090 surface_type
= BRW_SURFACE_2D
;
4093 assert(num_layers
% 6 == 0);
4094 depth
= num_layers
/ 6;
4098 /* sanity check the size */
4099 assert(width
>= 1 && height
>= 1 && depth
>= 1 && pitch
>= 1);
4100 switch (surface_type
) {
4101 case BRW_SURFACE_1D
:
4102 assert(width
<= 8192 && height
== 1 && depth
<= 512);
4103 assert(first_layer
< 512 && num_layers
<= 512);
4105 case BRW_SURFACE_2D
:
4106 assert(width
<= 8192 && height
<= 8192 && depth
<= 512);
4107 assert(first_layer
< 512 && num_layers
<= 512);
4109 case BRW_SURFACE_3D
:
4110 assert(width
<= 2048 && height
<= 2048 && depth
<= 2048);
4111 assert(first_layer
< 2048 && num_layers
<= 512);
4113 assert(first_layer
== 0);
4115 case BRW_SURFACE_CUBE
:
4116 assert(width
<= 8192 && height
<= 8192 && depth
<= 85);
4117 assert(width
== height
);
4118 assert(first_layer
< 512 && num_layers
<= 512);
4120 assert(first_layer
== 0);
4123 assert(!"unexpected surface type");
4127 /* non-full array spacing is supported only on GEN7+ */
4128 assert(tex
->array_spacing_full
);
4129 /* non-interleaved samples are supported only on GEN7+ */
4130 if (tex
->base
.nr_samples
> 1)
4131 assert(tex
->interleaved
);
4135 * Compute the offset to the layer manually.
4137 * For rendering, the hardware requires LOD to be the same for all
4138 * render targets and the depth buffer. We need to compute the offset
4139 * to the layer manually and always set LOD to 0.
4142 /* we lose the capability for layered rendering */
4143 assert(num_layers
== 1);
4145 layer_offset
= ilo_texture_get_slice_offset(tex
,
4146 first_level
, first_layer
, &x_offset
, &y_offset
);
4148 assert(x_offset
% 4 == 0);
4149 assert(y_offset
% 2 == 0);
4153 /* derive the size for the LOD */
4154 width
= u_minify(width
, first_level
);
4155 height
= u_minify(height
, first_level
);
4156 if (surface_type
== BRW_SURFACE_3D
)
4157 depth
= u_minify(depth
, first_level
);
4171 assert(num_levels
== 1);
4179 lod
= num_levels
- 1;
4183 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
4185 * "Linear render target surface base addresses must be element-size
4186 * aligned, for non-YUV surface formats, or a multiple of 2
4187 * element-sizes for YUV surface formats. Other linear surfaces have
4188 * no alignment requirements (byte alignment is sufficient.)"
4190 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
4192 * "For linear render target surfaces, the pitch must be a multiple
4193 * of the element size for non-YUV surface formats. Pitch must be a
4194 * multiple of 2 * element size for YUV surface formats."
4196 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
4198 * "For linear surfaces, this field (X Offset) must be zero"
4200 if (tex
->tiling
== INTEL_TILING_NONE
) {
4202 const int elem_size
= util_format_get_blocksize(format
);
4203 assert(layer_offset
% elem_size
== 0);
4204 assert(pitch
% elem_size
== 0);
4210 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
4213 dw
[0] = surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
4214 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
4215 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
;
4217 if (surface_type
== BRW_SURFACE_CUBE
&& !is_rt
) {
4219 BRW_SURFACE_CUBEFACE_ENABLES
;
4222 if (render_cache_rw
)
4223 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
4225 dw
[1] = layer_offset
;
4227 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
4228 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
4229 lod
<< BRW_SURFACE_LOD_SHIFT
;
4231 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
4232 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
|
4233 ilo_gpe_gen6_translate_winsys_tiling(tex
->tiling
);
4235 dw
[4] = first_level
<< BRW_SURFACE_MIN_LOD_SHIFT
|
4237 (num_layers
- 1) << 8 |
4238 ((tex
->base
.nr_samples
> 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4
:
4239 BRW_SURFACE_MULTISAMPLECOUNT_1
);
4241 dw
[5] = x_offset
<< BRW_SURFACE_X_OFFSET_SHIFT
|
4242 y_offset
<< BRW_SURFACE_Y_OFFSET_SHIFT
;
4244 dw
[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE
;
4246 /* do not increment reference count */
4251 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
4252 const struct ilo_view_surface
*surf
,
4256 const int state_align
= 32 / 4;
4257 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
4258 uint32_t state_offset
;
4259 uint32_t read_domains
, write_domain
;
4261 ILO_GPE_VALID_GEN(dev
, 6, 7);
4264 read_domains
= INTEL_DOMAIN_RENDER
;
4265 write_domain
= INTEL_DOMAIN_RENDER
;
4268 read_domains
= INTEL_DOMAIN_SAMPLER
;
4272 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
4274 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
4276 ilo_cp_write(cp
, surf
->payload
[0]);
4277 ilo_cp_write_bo(cp
, surf
->payload
[1],
4278 surf
->bo
, read_domains
, write_domain
);
4279 ilo_cp_write(cp
, surf
->payload
[2]);
4280 ilo_cp_write(cp
, surf
->payload
[3]);
4281 ilo_cp_write(cp
, surf
->payload
[4]);
4282 ilo_cp_write(cp
, surf
->payload
[5]);
4284 if (dev
->gen
>= ILO_GEN(7)) {
4285 ilo_cp_write(cp
, surf
->payload
[6]);
4286 ilo_cp_write(cp
, surf
->payload
[7]);
4291 return state_offset
;
4295 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
4296 const struct pipe_stream_output_target
*so
,
4297 const struct pipe_stream_output_info
*so_info
,
4301 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
4302 unsigned bo_offset
, struct_size
;
4303 enum pipe_format elem_format
;
4304 struct ilo_view_surface surf
;
4306 ILO_GPE_VALID_GEN(dev
, 6, 6);
4308 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
4309 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
4311 switch (so_info
->output
[so_index
].num_components
) {
4313 elem_format
= PIPE_FORMAT_R32_FLOAT
;
4316 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
4319 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
4322 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
4325 assert(!"unexpected SO components length");
4326 elem_format
= PIPE_FORMAT_R32_FLOAT
;
4330 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
4331 struct_size
, elem_format
, false, true, &surf
);
4333 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
4337 sampler_init_border_color_gen6(const struct ilo_dev_info
*dev
,
4338 const union pipe_color_union
*color
,
4339 uint32_t *dw
, int num_dwords
)
4342 color
->f
[0], color
->f
[1], color
->f
[2], color
->f
[3],
4345 ILO_GPE_VALID_GEN(dev
, 6, 6);
4347 assert(num_dwords
>= 12);
4350 * This state is not documented in the Sandy Bridge PRM, but in the
4351 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
4355 dw
[1] = fui(rgba
[0]);
4356 dw
[2] = fui(rgba
[1]);
4357 dw
[3] = fui(rgba
[2]);
4358 dw
[4] = fui(rgba
[3]);
4361 dw
[5] = util_float_to_half(rgba
[0]) |
4362 util_float_to_half(rgba
[1]) << 16;
4363 dw
[6] = util_float_to_half(rgba
[2]) |
4364 util_float_to_half(rgba
[3]) << 16;
4366 /* clamp to [-1.0f, 1.0f] */
4367 rgba
[0] = CLAMP(rgba
[0], -1.0f
, 1.0f
);
4368 rgba
[1] = CLAMP(rgba
[1], -1.0f
, 1.0f
);
4369 rgba
[2] = CLAMP(rgba
[2], -1.0f
, 1.0f
);
4370 rgba
[3] = CLAMP(rgba
[3], -1.0f
, 1.0f
);
4373 dw
[9] = (int16_t) util_iround(rgba
[0] * 32767.0f
) |
4374 (int16_t) util_iround(rgba
[1] * 32767.0f
) << 16;
4375 dw
[10] = (int16_t) util_iround(rgba
[2] * 32767.0f
) |
4376 (int16_t) util_iround(rgba
[3] * 32767.0f
) << 16;
4379 dw
[11] = (int8_t) util_iround(rgba
[0] * 127.0f
) |
4380 (int8_t) util_iround(rgba
[1] * 127.0f
) << 8 |
4381 (int8_t) util_iround(rgba
[2] * 127.0f
) << 16 |
4382 (int8_t) util_iround(rgba
[3] * 127.0f
) << 24;
4384 /* clamp to [0.0f, 1.0f] */
4385 rgba
[0] = CLAMP(rgba
[0], 0.0f
, 1.0f
);
4386 rgba
[1] = CLAMP(rgba
[1], 0.0f
, 1.0f
);
4387 rgba
[2] = CLAMP(rgba
[2], 0.0f
, 1.0f
);
4388 rgba
[3] = CLAMP(rgba
[3], 0.0f
, 1.0f
);
4391 dw
[0] = (uint8_t) util_iround(rgba
[0] * 255.0f
) |
4392 (uint8_t) util_iround(rgba
[1] * 255.0f
) << 8 |
4393 (uint8_t) util_iround(rgba
[2] * 255.0f
) << 16 |
4394 (uint8_t) util_iround(rgba
[3] * 255.0f
) << 24;
4397 dw
[7] = (uint16_t) util_iround(rgba
[0] * 65535.0f
) |
4398 (uint16_t) util_iround(rgba
[1] * 65535.0f
) << 16;
4399 dw
[8] = (uint16_t) util_iround(rgba
[2] * 65535.0f
) |
4400 (uint16_t) util_iround(rgba
[3] * 65535.0f
) << 16;
4404 ilo_gpe_init_sampler_cso(const struct ilo_dev_info
*dev
,
4405 const struct pipe_sampler_state
*state
,
4406 struct ilo_sampler_cso
*sampler
)
4408 int mip_filter
, min_filter
, mag_filter
, max_aniso
;
4409 int lod_bias
, max_lod
, min_lod
;
4410 int wrap_s
, wrap_t
, wrap_r
, wrap_cube
;
4411 bool clamp_is_to_edge
;
4412 uint32_t dw0
, dw1
, dw3
;
4414 ILO_GPE_VALID_GEN(dev
, 6, 7);
4416 memset(sampler
, 0, sizeof(*sampler
));
4418 mip_filter
= gen6_translate_tex_mipfilter(state
->min_mip_filter
);
4419 min_filter
= gen6_translate_tex_filter(state
->min_img_filter
);
4420 mag_filter
= gen6_translate_tex_filter(state
->mag_img_filter
);
4422 sampler
->anisotropic
= state
->max_anisotropy
;
4424 if (state
->max_anisotropy
>= 2 && state
->max_anisotropy
<= 16)
4425 max_aniso
= state
->max_anisotropy
/ 2 - 1;
4426 else if (state
->max_anisotropy
> 16)
4427 max_aniso
= BRW_ANISORATIO_16
;
4429 max_aniso
= BRW_ANISORATIO_2
;
4433 * Here is how the hardware calculate per-pixel LOD, from my reading of the
4436 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
4437 * other ways. The number of texels is measured using level
4439 * 2) Bias is added to LOD.
4440 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
4441 * compared with Base to determine whether magnification or
4442 * minification is needed. (if preclamp is disabled, LOD is compared
4443 * with Base before clamping)
4444 * 4) If magnification is needed, or no mipmapping is requested, LOD is
4445 * set to floor(MinLod).
4446 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
4448 * With Gallium interface, Base is always zero and
4449 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
4451 if (dev
->gen
>= ILO_GEN(7)) {
4452 const float scale
= 256.0f
;
4454 /* [-16.0, 16.0) in S4.8 */
4456 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
4459 /* [0.0, 14.0] in U4.8 */
4460 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 14.0f
) * scale
);
4461 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 14.0f
) * scale
);
4464 const float scale
= 64.0f
;
4466 /* [-16.0, 16.0) in S4.6 */
4468 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
4471 /* [0.0, 13.0] in U4.6 */
4472 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 13.0f
) * scale
);
4473 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 13.0f
) * scale
);
4477 * We want LOD to be clamped to determine magnification/minification, and
4478 * get set to zero when it is magnification or when mipmapping is disabled.
4479 * The hardware would set LOD to floor(MinLod) and that is a problem when
4480 * MinLod is greater than or equal to 1.0f.
4482 * With Base being zero, it is always minification when MinLod is non-zero.
4483 * To achieve our goal, we just need to set MinLod to zero and set
4484 * MagFilter to MinFilter when mipmapping is disabled.
4486 if (state
->min_mip_filter
== PIPE_TEX_MIPFILTER_NONE
&& min_lod
) {
4488 mag_filter
= min_filter
;
4492 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
4493 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
4494 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
4495 * texture coordinates to [0.0, 1.0].
4497 * The clamping will be taken care of in the shaders. There are two
4498 * filters here, but let the minification one has a say.
4500 clamp_is_to_edge
= (state
->min_img_filter
== PIPE_TEX_FILTER_NEAREST
);
4501 if (!clamp_is_to_edge
) {
4502 sampler
->saturate_s
= (state
->wrap_s
== PIPE_TEX_WRAP_CLAMP
);
4503 sampler
->saturate_t
= (state
->wrap_t
== PIPE_TEX_WRAP_CLAMP
);
4504 sampler
->saturate_r
= (state
->wrap_r
== PIPE_TEX_WRAP_CLAMP
);
4507 /* determine wrap s/t/r */
4508 wrap_s
= gen6_translate_tex_wrap(state
->wrap_s
, clamp_is_to_edge
);
4509 wrap_t
= gen6_translate_tex_wrap(state
->wrap_t
, clamp_is_to_edge
);
4510 wrap_r
= gen6_translate_tex_wrap(state
->wrap_r
, clamp_is_to_edge
);
4513 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
4515 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
4516 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
4517 * must have the same Address Control mode."
4519 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
4521 * "This field (Cube Surface Control Mode) must be set to
4522 * CUBECTRLMODE_PROGRAMMED"
4524 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
4527 if (state
->seamless_cube_map
&&
4528 (state
->min_img_filter
!= PIPE_TEX_FILTER_NEAREST
||
4529 state
->mag_img_filter
!= PIPE_TEX_FILTER_NEAREST
)) {
4530 wrap_cube
= BRW_TEXCOORDMODE_CUBE
;
4533 wrap_cube
= BRW_TEXCOORDMODE_CLAMP
;
4536 if (!state
->normalized_coords
) {
4538 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
4540 * "The following state must be set as indicated if this field
4541 * (Non-normalized Coordinate Enable) is enabled:
4543 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
4544 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
4545 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
4546 * - Mag Mode Filter must be MAPFILTER_NEAREST or
4548 * - Min Mode Filter must be MAPFILTER_NEAREST or
4550 * - Mip Mode Filter must be MIPFILTER_NONE.
4551 * - Min LOD must be 0.
4552 * - Max LOD must be 0.
4553 * - MIP Count must be 0.
4554 * - Surface Min LOD must be 0.
4555 * - Texture LOD Bias must be 0."
4557 assert(wrap_s
== BRW_TEXCOORDMODE_CLAMP
||
4558 wrap_s
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4559 assert(wrap_t
== BRW_TEXCOORDMODE_CLAMP
||
4560 wrap_t
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4561 assert(wrap_r
== BRW_TEXCOORDMODE_CLAMP
||
4562 wrap_r
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
4564 assert(mag_filter
== BRW_MAPFILTER_NEAREST
||
4565 mag_filter
== BRW_MAPFILTER_LINEAR
);
4566 assert(min_filter
== BRW_MAPFILTER_NEAREST
||
4567 min_filter
== BRW_MAPFILTER_LINEAR
);
4569 /* work around a bug in util_blitter */
4570 mip_filter
= BRW_MIPFILTER_NONE
;
4572 assert(mip_filter
== BRW_MIPFILTER_NONE
);
4575 if (dev
->gen
>= ILO_GEN(7)) {
4580 sampler
->dw_filter
= mag_filter
<< 17 |
4583 sampler
->dw_filter_aniso
= BRW_MAPFILTER_ANISOTROPIC
<< 17 |
4584 BRW_MAPFILTER_ANISOTROPIC
<< 14 |
4587 dw1
= min_lod
<< 20 |
4590 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
4591 dw1
|= gen6_translate_shadow_func(state
->compare_func
) << 1;
4593 dw3
= max_aniso
<< 19;
4595 /* round the coordinates for linear filtering */
4596 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
4597 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
4598 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
4599 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
4601 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
4602 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
4603 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
4604 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
4607 if (!state
->normalized_coords
)
4610 sampler
->dw_wrap
= wrap_s
<< 6 |
4615 * As noted in the classic i965 driver, the HW may still reference
4616 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
4619 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
4620 BRW_TEXCOORDMODE_WRAP
<< 3 |
4621 BRW_TEXCOORDMODE_WRAP
;
4623 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
4627 STATIC_ASSERT(Elements(sampler
->payload
) >= 7);
4629 sampler
->payload
[0] = dw0
;
4630 sampler
->payload
[1] = dw1
;
4631 sampler
->payload
[2] = dw3
;
4633 memcpy(&sampler
->payload
[3],
4634 state
->border_color
.ui
, sizeof(state
->border_color
.ui
));
4641 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
4642 dw0
|= gen6_translate_shadow_func(state
->compare_func
);
4644 sampler
->dw_filter
= (min_filter
!= mag_filter
) << 27 |
4648 sampler
->dw_filter_aniso
= BRW_MAPFILTER_ANISOTROPIC
<< 17 |
4649 BRW_MAPFILTER_ANISOTROPIC
<< 14;
4651 dw1
= min_lod
<< 22 |
4654 sampler
->dw_wrap
= wrap_s
<< 6 |
4658 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
4659 BRW_TEXCOORDMODE_WRAP
<< 3 |
4660 BRW_TEXCOORDMODE_WRAP
;
4662 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
4666 dw3
= max_aniso
<< 19;
4668 /* round the coordinates for linear filtering */
4669 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
4670 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
4671 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
4672 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
4674 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
4675 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
4676 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
4677 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
4680 if (!state
->normalized_coords
)
4683 STATIC_ASSERT(Elements(sampler
->payload
) >= 15);
4685 sampler
->payload
[0] = dw0
;
4686 sampler
->payload
[1] = dw1
;
4687 sampler
->payload
[2] = dw3
;
4689 sampler_init_border_color_gen6(dev
,
4690 &state
->border_color
, &sampler
->payload
[3], 12);
4695 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
4696 const struct ilo_sampler_cso
* const *samplers
,
4697 const struct pipe_sampler_view
* const *views
,
4698 const uint32_t *sampler_border_colors
,
4702 const int state_align
= 32 / 4;
4703 const int state_len
= 4 * num_samplers
;
4704 uint32_t state_offset
, *dw
;
4707 ILO_GPE_VALID_GEN(dev
, 6, 7);
4710 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
4712 * "The sampler state is stored as an array of up to 16 elements..."
4714 assert(num_samplers
<= 16);
4719 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
4720 state_len
, state_align
, &state_offset
);
4722 for (i
= 0; i
< num_samplers
; i
++) {
4723 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
4724 const struct pipe_sampler_view
*view
= views
[i
];
4725 const uint32_t border_color
= sampler_border_colors
[i
];
4726 uint32_t dw_filter
, dw_wrap
;
4728 /* there may be holes */
4729 if (!sampler
|| !view
) {
4730 /* disabled sampler */
4740 /* determine filter and wrap modes */
4741 switch (view
->texture
->target
) {
4742 case PIPE_TEXTURE_1D
:
4743 dw_filter
= (sampler
->anisotropic
) ?
4744 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
4745 dw_wrap
= sampler
->dw_wrap_1d
;
4747 case PIPE_TEXTURE_3D
:
4749 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
4751 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
4752 * surfaces of type SURFTYPE_3D."
4754 dw_filter
= sampler
->dw_filter
;
4755 dw_wrap
= sampler
->dw_wrap
;
4757 case PIPE_TEXTURE_CUBE
:
4758 dw_filter
= (sampler
->anisotropic
) ?
4759 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
4760 dw_wrap
= sampler
->dw_wrap_cube
;
4763 dw_filter
= (sampler
->anisotropic
) ?
4764 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
4765 dw_wrap
= sampler
->dw_wrap
;
4769 dw
[0] = sampler
->payload
[0];
4770 dw
[1] = sampler
->payload
[1];
4771 assert(!(border_color
& 0x1f));
4772 dw
[2] = border_color
;
4773 dw
[3] = sampler
->payload
[2];
4777 if (dev
->gen
>= ILO_GEN(7)) {
4782 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
4784 * "[DevSNB] Errata: Incorrect behavior is observed in cases
4785 * where the min and mag mode filters are different and
4786 * SurfMinLOD is nonzero. The determination of MagMode uses the
4787 * following equation instead of the one in the above
4788 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
4790 * As a way to work around that, we set Base to
4791 * view->u.tex.first_level.
4793 dw
[0] |= view
->u
.tex
.first_level
<< 22;
4801 return state_offset
;
4805 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
4806 const struct ilo_sampler_cso
*sampler
,
4809 const int state_align
= 32 / 4;
4810 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
4811 uint32_t state_offset
, *dw
;
4813 ILO_GPE_VALID_GEN(dev
, 6, 7);
4815 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
4816 state_len
, state_align
, &state_offset
);
4818 /* see ilo_gpe_init_sampler_cso() */
4819 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
4821 return state_offset
;
4825 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
4826 int size
, void **pcb
,
4830 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
4831 * to 32 bytes, and their sizes are specified in 256-bit units.
4833 const int state_align
= 32 / 4;
4834 const int state_len
= align(size
, 32) / 4;
4835 uint32_t state_offset
;
4838 ILO_GPE_VALID_GEN(dev
, 6, 7);
4840 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
4841 state_len
, state_align
, &state_offset
);
4843 /* zero out the unused range */
4844 if (size
< state_len
* 4)
4845 memset(&buf
[size
], 0, state_len
* 4 - size
);
4850 return state_offset
;
4854 gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
4855 enum ilo_gpe_gen6_command cmd
,
4858 static const struct {
4861 } gen6_command_size_table
[ILO_GPE_GEN6_COMMAND_COUNT
] = {
4862 [ILO_GPE_GEN6_STATE_BASE_ADDRESS
] = { 0, 10 },
4863 [ILO_GPE_GEN6_STATE_SIP
] = { 0, 2 },
4864 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
] = { 0, 1 },
4865 [ILO_GPE_GEN6_PIPELINE_SELECT
] = { 0, 1 },
4866 [ILO_GPE_GEN6_MEDIA_VFE_STATE
] = { 0, 8 },
4867 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD
] = { 0, 4 },
4868 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
] = { 0, 4 },
4869 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
] = { 0, 2 },
4870 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH
] = { 0, 2 },
4871 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
] = { 17, 1 },
4872 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
] = { 0, 4 },
4873 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
] = { 0, 4 },
4874 [ILO_GPE_GEN6_3DSTATE_URB
] = { 0, 3 },
4875 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
] = { 1, 4 },
4876 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
] = { 1, 2 },
4877 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
] = { 0, 3 },
4878 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
] = { 0, 4 },
4879 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
] = { 0, 4 },
4880 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
] = { 0, 2 },
4881 [ILO_GPE_GEN6_3DSTATE_VS
] = { 0, 6 },
4882 [ILO_GPE_GEN6_3DSTATE_GS
] = { 0, 7 },
4883 [ILO_GPE_GEN6_3DSTATE_CLIP
] = { 0, 4 },
4884 [ILO_GPE_GEN6_3DSTATE_SF
] = { 0, 20 },
4885 [ILO_GPE_GEN6_3DSTATE_WM
] = { 0, 9 },
4886 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
] = { 0, 5 },
4887 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
] = { 0, 5 },
4888 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
] = { 0, 5 },
4889 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
] = { 0, 2 },
4890 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
] = { 0, 4 },
4891 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
] = { 0, 7 },
4892 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
] = { 0, 2 },
4893 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
] = { 0, 33 },
4894 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
] = { 0, 3 },
4895 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
] = { 0, 3 },
4896 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
] = { 0, 4 },
4897 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
] = { 0, 3 },
4898 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
] = { 0, 3 },
4899 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
] = { 0, 3 },
4900 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
] = { 0, 2 },
4901 [ILO_GPE_GEN6_PIPE_CONTROL
] = { 0, 5 },
4902 [ILO_GPE_GEN6_3DPRIMITIVE
] = { 0, 6 },
4904 const int header
= gen6_command_size_table
[cmd
].header
;
4905 const int body
= gen6_command_size_table
[arg
].body
;
4906 const int count
= arg
;
4908 ILO_GPE_VALID_GEN(dev
, 6, 6);
4909 assert(cmd
< ILO_GPE_GEN6_COMMAND_COUNT
);
4911 return (likely(count
)) ? header
+ body
* count
: 0;
4915 gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
4916 enum ilo_gpe_gen6_state state
,
4919 static const struct {
4923 } gen6_state_size_table
[ILO_GPE_GEN6_STATE_COUNT
] = {
4924 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
] = { 8, 8, true },
4925 [ILO_GPE_GEN6_SF_VIEWPORT
] = { 8, 8, true },
4926 [ILO_GPE_GEN6_CLIP_VIEWPORT
] = { 8, 4, true },
4927 [ILO_GPE_GEN6_CC_VIEWPORT
] = { 8, 2, true },
4928 [ILO_GPE_GEN6_COLOR_CALC_STATE
] = { 16, 6, false },
4929 [ILO_GPE_GEN6_BLEND_STATE
] = { 16, 2, true },
4930 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE
] = { 16, 3, false },
4931 [ILO_GPE_GEN6_SCISSOR_RECT
] = { 8, 2, true },
4932 [ILO_GPE_GEN6_BINDING_TABLE_STATE
] = { 8, 1, true },
4933 [ILO_GPE_GEN6_SURFACE_STATE
] = { 8, 6, false },
4934 [ILO_GPE_GEN6_SAMPLER_STATE
] = { 8, 4, true },
4935 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
] = { 8, 12, false },
4936 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
] = { 8, 1, true },
4938 const int alignment
= gen6_state_size_table
[state
].alignment
;
4939 const int body
= gen6_state_size_table
[state
].body
;
4940 const bool is_array
= gen6_state_size_table
[state
].is_array
;
4941 const int count
= arg
;
4944 ILO_GPE_VALID_GEN(dev
, 6, 6);
4945 assert(state
< ILO_GPE_GEN6_STATE_COUNT
);
4947 if (likely(count
)) {
4949 estimate
= (alignment
- 1) + body
* count
;
4952 estimate
= (alignment
- 1) + body
;
4953 /* all states are aligned */
4955 estimate
+= util_align_npot(body
, alignment
) * (count
- 1);
4965 static const struct ilo_gpe_gen6 gen6_gpe
= {
4966 .estimate_command_size
= gen6_estimate_command_size
,
4967 .estimate_state_size
= gen6_estimate_state_size
,
4969 #define GEN6_SET(name) .emit_ ## name = gen6_emit_ ## name
4970 GEN6_SET(STATE_BASE_ADDRESS
),
4971 GEN6_SET(STATE_SIP
),
4972 GEN6_SET(3DSTATE_VF_STATISTICS
),
4973 GEN6_SET(PIPELINE_SELECT
),
4974 GEN6_SET(MEDIA_VFE_STATE
),
4975 GEN6_SET(MEDIA_CURBE_LOAD
),
4976 GEN6_SET(MEDIA_INTERFACE_DESCRIPTOR_LOAD
),
4977 GEN6_SET(MEDIA_GATEWAY_STATE
),
4978 GEN6_SET(MEDIA_STATE_FLUSH
),
4979 GEN6_SET(MEDIA_OBJECT_WALKER
),
4980 GEN6_SET(3DSTATE_BINDING_TABLE_POINTERS
),
4981 GEN6_SET(3DSTATE_SAMPLER_STATE_POINTERS
),
4982 GEN6_SET(3DSTATE_URB
),
4983 GEN6_SET(3DSTATE_VERTEX_BUFFERS
),
4984 GEN6_SET(3DSTATE_VERTEX_ELEMENTS
),
4985 GEN6_SET(3DSTATE_INDEX_BUFFER
),
4986 GEN6_SET(3DSTATE_VIEWPORT_STATE_POINTERS
),
4987 GEN6_SET(3DSTATE_CC_STATE_POINTERS
),
4988 GEN6_SET(3DSTATE_SCISSOR_STATE_POINTERS
),
4989 GEN6_SET(3DSTATE_VS
),
4990 GEN6_SET(3DSTATE_GS
),
4991 GEN6_SET(3DSTATE_CLIP
),
4992 GEN6_SET(3DSTATE_SF
),
4993 GEN6_SET(3DSTATE_WM
),
4994 GEN6_SET(3DSTATE_CONSTANT_VS
),
4995 GEN6_SET(3DSTATE_CONSTANT_GS
),
4996 GEN6_SET(3DSTATE_CONSTANT_PS
),
4997 GEN6_SET(3DSTATE_SAMPLE_MASK
),
4998 GEN6_SET(3DSTATE_DRAWING_RECTANGLE
),
4999 GEN6_SET(3DSTATE_DEPTH_BUFFER
),
5000 GEN6_SET(3DSTATE_POLY_STIPPLE_OFFSET
),
5001 GEN6_SET(3DSTATE_POLY_STIPPLE_PATTERN
),
5002 GEN6_SET(3DSTATE_LINE_STIPPLE
),
5003 GEN6_SET(3DSTATE_AA_LINE_PARAMETERS
),
5004 GEN6_SET(3DSTATE_GS_SVB_INDEX
),
5005 GEN6_SET(3DSTATE_MULTISAMPLE
),
5006 GEN6_SET(3DSTATE_STENCIL_BUFFER
),
5007 GEN6_SET(3DSTATE_HIER_DEPTH_BUFFER
),
5008 GEN6_SET(3DSTATE_CLEAR_PARAMS
),
5009 GEN6_SET(PIPE_CONTROL
),
5010 GEN6_SET(3DPRIMITIVE
),
5011 GEN6_SET(INTERFACE_DESCRIPTOR_DATA
),
5012 GEN6_SET(SF_VIEWPORT
),
5013 GEN6_SET(CLIP_VIEWPORT
),
5014 GEN6_SET(CC_VIEWPORT
),
5015 GEN6_SET(COLOR_CALC_STATE
),
5016 GEN6_SET(BLEND_STATE
),
5017 GEN6_SET(DEPTH_STENCIL_STATE
),
5018 GEN6_SET(SCISSOR_RECT
),
5019 GEN6_SET(BINDING_TABLE_STATE
),
5020 GEN6_SET(SURFACE_STATE
),
5021 GEN6_SET(so_SURFACE_STATE
),
5022 GEN6_SET(SAMPLER_STATE
),
5023 GEN6_SET(SAMPLER_BORDER_COLOR_STATE
),
5024 GEN6_SET(push_constant_buffer
),
5028 const struct ilo_gpe_gen6
*
5029 ilo_gpe_gen6_get(void)