2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
35 #include "ilo_common.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
49 * Commands that GEN6 GPE could emit.
51 enum ilo_gpe_gen6_command
{
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS
, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP
, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT
, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE
, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD
, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH
, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB
, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS
, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS
, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP
, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF
, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM
, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL
, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE
, /* (0x3, 0x3, 0x00) */
94 ILO_GPE_GEN6_COMMAND_COUNT
,
98 * Indirect states that GEN6 GPE could emit.
100 enum ilo_gpe_gen6_state
{
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
,
102 ILO_GPE_GEN6_SF_VIEWPORT
,
103 ILO_GPE_GEN6_CLIP_VIEWPORT
,
104 ILO_GPE_GEN6_CC_VIEWPORT
,
105 ILO_GPE_GEN6_COLOR_CALC_STATE
,
106 ILO_GPE_GEN6_BLEND_STATE
,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE
,
108 ILO_GPE_GEN6_SCISSOR_RECT
,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE
,
110 ILO_GPE_GEN6_SURFACE_STATE
,
111 ILO_GPE_GEN6_SAMPLER_STATE
,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
,
115 ILO_GPE_GEN6_STATE_COUNT
,
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
120 enum ilo_gpe_gen6_command cmd
,
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
125 enum ilo_gpe_gen6_state state
,
129 * Translate winsys tiling to hardware tiling.
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
135 case INTEL_TILING_NONE
:
138 return BRW_SURFACE_TILED
;
140 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
142 assert(!"unknown tiling");
148 * Translate a pipe primitive type to the matching hardware primitive type.
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
153 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
154 [PIPE_PRIM_POINTS
] = _3DPRIM_POINTLIST
,
155 [PIPE_PRIM_LINES
] = _3DPRIM_LINELIST
,
156 [PIPE_PRIM_LINE_LOOP
] = _3DPRIM_LINELOOP
,
157 [PIPE_PRIM_LINE_STRIP
] = _3DPRIM_LINESTRIP
,
158 [PIPE_PRIM_TRIANGLES
] = _3DPRIM_TRILIST
,
159 [PIPE_PRIM_TRIANGLE_STRIP
] = _3DPRIM_TRISTRIP
,
160 [PIPE_PRIM_TRIANGLE_FAN
] = _3DPRIM_TRIFAN
,
161 [PIPE_PRIM_QUADS
] = _3DPRIM_QUADLIST
,
162 [PIPE_PRIM_QUAD_STRIP
] = _3DPRIM_QUADSTRIP
,
163 [PIPE_PRIM_POLYGON
] = _3DPRIM_POLYGON
,
164 [PIPE_PRIM_LINES_ADJACENCY
] = _3DPRIM_LINELIST_ADJ
,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = _3DPRIM_LINESTRIP_ADJ
,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = _3DPRIM_TRILIST_ADJ
,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = _3DPRIM_TRISTRIP_ADJ
,
170 assert(prim_mapping
[prim
]);
172 return prim_mapping
[prim
];
176 * Translate a pipe texture target to the matching hardware surface type.
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
183 return BRW_SURFACE_BUFFER
;
184 case PIPE_TEXTURE_1D
:
185 case PIPE_TEXTURE_1D_ARRAY
:
186 return BRW_SURFACE_1D
;
187 case PIPE_TEXTURE_2D
:
188 case PIPE_TEXTURE_RECT
:
189 case PIPE_TEXTURE_2D_ARRAY
:
190 return BRW_SURFACE_2D
;
191 case PIPE_TEXTURE_3D
:
192 return BRW_SURFACE_3D
;
193 case PIPE_TEXTURE_CUBE
:
194 case PIPE_TEXTURE_CUBE_ARRAY
:
195 return BRW_SURFACE_CUBE
;
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER
;
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
207 const struct ilo_rasterizer_state
*rasterizer
,
209 enum pipe_format depth_format
,
210 uint32_t *payload
, unsigned payload_len
)
212 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
214 assert(payload_len
== Elements(sf
->payload
));
217 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
220 payload
[1] |= sf
->dw_msaa
;
222 if (dev
->gen
>= ILO_GEN(7)) {
225 /* separate stencil */
226 switch (depth_format
) {
227 case PIPE_FORMAT_Z16_UNORM
:
228 format
= BRW_DEPTHFORMAT_D16_UNORM
;
230 case PIPE_FORMAT_Z32_FLOAT
:
231 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
232 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
234 case PIPE_FORMAT_Z24X8_UNORM
:
235 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
236 format
= BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
239 /* FLOAT surface is assumed when there is no depth buffer */
240 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
244 payload
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
249 payload
[1] = (num_samples
> 1) ? GEN6_SF_MSRAST_ON_PATTERN
: 0;
258 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
261 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
262 const struct ilo_rasterizer_state
*rasterizer
,
263 const struct ilo_shader_state
*fs
,
264 uint32_t *dw
, int num_dwords
)
266 int output_count
, vue_offset
, vue_len
;
267 const struct ilo_kernel_routing
*routing
;
269 ILO_GPE_VALID_GEN(dev
, 6, 7);
270 assert(num_dwords
== 13);
273 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
275 if (dev
->gen
>= ILO_GEN(7))
276 dw
[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
;
278 dw
[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
;
283 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
284 assert(output_count
<= 32);
286 routing
= ilo_shader_get_kernel_routing(fs
);
288 vue_offset
= routing
->source_skip
;
289 assert(vue_offset
% 2 == 0);
292 vue_len
= (routing
->source_len
+ 1) / 2;
296 if (dev
->gen
>= ILO_GEN(7)) {
297 dw
[0] = output_count
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
298 vue_len
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
299 vue_offset
<< GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
;
300 if (routing
->swizzle_enable
)
301 dw
[0] |= GEN7_SBE_SWIZZLE_ENABLE
;
304 dw
[0] = output_count
<< GEN6_SF_NUM_OUTPUTS_SHIFT
|
305 vue_len
<< GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
|
306 vue_offset
<< GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT
;
307 if (routing
->swizzle_enable
)
308 dw
[0] |= GEN6_SF_SWIZZLE_ENABLE
;
311 switch (rasterizer
->state
.sprite_coord_mode
) {
312 case PIPE_SPRITE_COORD_UPPER_LEFT
:
313 dw
[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT
;
315 case PIPE_SPRITE_COORD_LOWER_LEFT
:
316 dw
[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT
;
320 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
321 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
324 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
326 * "This field (Point Sprite Texture Coordinate Enable) must be
327 * programmed to 0 when non-point primitives are rendered."
329 * TODO We do not check that yet.
331 dw
[9] = routing
->point_sprite_enable
;
333 dw
[10] = routing
->const_interp_enable
;
335 /* WrapShortest enables */
341 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
342 struct intel_bo
*general_state_bo
,
343 struct intel_bo
*surface_state_bo
,
344 struct intel_bo
*dynamic_state_bo
,
345 struct intel_bo
*indirect_object_bo
,
346 struct intel_bo
*instruction_bo
,
347 uint32_t general_state_size
,
348 uint32_t dynamic_state_size
,
349 uint32_t indirect_object_size
,
350 uint32_t instruction_size
,
353 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
354 const uint8_t cmd_len
= 10;
356 ILO_GPE_VALID_GEN(dev
, 6, 7);
358 /* 4K-page aligned */
359 assert(((general_state_size
| dynamic_state_size
|
360 indirect_object_size
| instruction_size
) & 0xfff) == 0);
362 ilo_cp_begin(cp
, cmd_len
);
363 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
365 ilo_cp_write_bo(cp
, 1, general_state_bo
,
368 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
369 INTEL_DOMAIN_SAMPLER
,
371 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
372 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
374 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
377 ilo_cp_write_bo(cp
, 1, instruction_bo
,
378 INTEL_DOMAIN_INSTRUCTION
,
381 if (general_state_size
) {
382 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
387 /* skip range check */
391 if (dynamic_state_size
) {
392 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
393 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
397 /* skip range check */
398 ilo_cp_write(cp
, 0xfffff000 + 1);
401 if (indirect_object_size
) {
402 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
407 /* skip range check */
408 ilo_cp_write(cp
, 0xfffff000 + 1);
411 if (instruction_size
) {
412 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
413 INTEL_DOMAIN_INSTRUCTION
,
417 /* skip range check */
425 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
429 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
430 const uint8_t cmd_len
= 2;
432 ILO_GPE_VALID_GEN(dev
, 6, 7);
434 ilo_cp_begin(cp
, cmd_len
);
435 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
436 ilo_cp_write(cp
, sip
);
441 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
445 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
446 const uint8_t cmd_len
= 1;
448 ILO_GPE_VALID_GEN(dev
, 6, 7);
450 ilo_cp_begin(cp
, cmd_len
);
451 ilo_cp_write(cp
, cmd
| enable
);
456 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
460 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
461 const uint8_t cmd_len
= 1;
463 ILO_GPE_VALID_GEN(dev
, 6, 7);
466 assert(pipeline
== 0x0 || pipeline
== 0x1);
468 ilo_cp_begin(cp
, cmd_len
);
469 ilo_cp_write(cp
, cmd
| pipeline
);
474 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
475 int max_threads
, int num_urb_entries
,
479 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
480 const uint8_t cmd_len
= 8;
483 ILO_GPE_VALID_GEN(dev
, 6, 6);
485 dw2
= (max_threads
- 1) << 16 |
486 num_urb_entries
<< 8 |
487 1 << 7 | /* Reset Gateway Timer */
488 1 << 6; /* Bypass Gateway Control */
490 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
491 480; /* CURBE Allocation Size */
493 ilo_cp_begin(cp
, cmd_len
);
494 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
495 ilo_cp_write(cp
, 0); /* scratch */
496 ilo_cp_write(cp
, dw2
);
497 ilo_cp_write(cp
, 0); /* MBZ */
498 ilo_cp_write(cp
, dw4
);
499 ilo_cp_write(cp
, 0); /* scoreboard */
506 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
507 uint32_t buf
, int size
,
510 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
511 const uint8_t cmd_len
= 4;
513 ILO_GPE_VALID_GEN(dev
, 6, 6);
515 assert(buf
% 32 == 0);
516 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
517 size
= align(size
, 32);
519 ilo_cp_begin(cp
, cmd_len
);
520 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
521 ilo_cp_write(cp
, 0); /* MBZ */
522 ilo_cp_write(cp
, size
);
523 ilo_cp_write(cp
, buf
);
528 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
529 uint32_t offset
, int num_ids
,
532 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
533 const uint8_t cmd_len
= 4;
535 ILO_GPE_VALID_GEN(dev
, 6, 6);
537 assert(offset
% 32 == 0);
539 ilo_cp_begin(cp
, cmd_len
);
540 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
541 ilo_cp_write(cp
, 0); /* MBZ */
542 /* every ID has 8 DWords */
543 ilo_cp_write(cp
, num_ids
* 8 * 4);
544 ilo_cp_write(cp
, offset
);
549 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
550 int id
, int byte
, int thread_count
,
553 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
554 const uint8_t cmd_len
= 2;
557 ILO_GPE_VALID_GEN(dev
, 6, 6);
563 ilo_cp_begin(cp
, cmd_len
);
564 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
565 ilo_cp_write(cp
, dw1
);
570 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
571 int thread_count_water_mark
,
575 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
576 const uint8_t cmd_len
= 2;
579 ILO_GPE_VALID_GEN(dev
, 6, 6);
581 dw1
= thread_count_water_mark
<< 16 |
584 ilo_cp_begin(cp
, cmd_len
);
585 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
586 ilo_cp_write(cp
, dw1
);
591 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
594 assert(!"MEDIA_OBJECT_WALKER unsupported");
598 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
599 uint32_t vs_binding_table
,
600 uint32_t gs_binding_table
,
601 uint32_t ps_binding_table
,
604 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
605 const uint8_t cmd_len
= 4;
607 ILO_GPE_VALID_GEN(dev
, 6, 6);
609 ilo_cp_begin(cp
, cmd_len
);
610 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
611 GEN6_BINDING_TABLE_MODIFY_VS
|
612 GEN6_BINDING_TABLE_MODIFY_GS
|
613 GEN6_BINDING_TABLE_MODIFY_PS
);
614 ilo_cp_write(cp
, vs_binding_table
);
615 ilo_cp_write(cp
, gs_binding_table
);
616 ilo_cp_write(cp
, ps_binding_table
);
621 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
622 uint32_t vs_sampler_state
,
623 uint32_t gs_sampler_state
,
624 uint32_t ps_sampler_state
,
627 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
628 const uint8_t cmd_len
= 4;
630 ILO_GPE_VALID_GEN(dev
, 6, 6);
632 ilo_cp_begin(cp
, cmd_len
);
633 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
634 VS_SAMPLER_STATE_CHANGE
|
635 GS_SAMPLER_STATE_CHANGE
|
636 PS_SAMPLER_STATE_CHANGE
);
637 ilo_cp_write(cp
, vs_sampler_state
);
638 ilo_cp_write(cp
, gs_sampler_state
);
639 ilo_cp_write(cp
, ps_sampler_state
);
644 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
645 int vs_total_size
, int gs_total_size
,
646 int vs_entry_size
, int gs_entry_size
,
649 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
650 const uint8_t cmd_len
= 3;
651 const int row_size
= 128; /* 1024 bits */
652 int vs_alloc_size
, gs_alloc_size
;
653 int vs_num_entries
, gs_num_entries
;
655 ILO_GPE_VALID_GEN(dev
, 6, 6);
657 /* in 1024-bit URB rows */
658 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
659 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
661 /* the valid range is [1, 5] */
666 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
668 /* the valid range is [24, 256] in multiples of 4 */
669 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
670 if (vs_num_entries
> 256)
671 vs_num_entries
= 256;
672 assert(vs_num_entries
>= 24);
674 /* the valid range is [0, 256] in multiples of 4 */
675 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
676 if (gs_num_entries
> 256)
677 gs_num_entries
= 256;
679 ilo_cp_begin(cp
, cmd_len
);
680 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
681 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_VS_SIZE_SHIFT
|
682 vs_num_entries
<< GEN6_URB_VS_ENTRIES_SHIFT
);
683 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_GS_ENTRIES_SHIFT
|
684 (gs_alloc_size
- 1) << GEN6_URB_GS_SIZE_SHIFT
);
689 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
690 const struct ilo_ve_state
*ve
,
691 const struct ilo_vb_state
*vb
,
694 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
698 ILO_GPE_VALID_GEN(dev
, 6, 7);
701 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
703 * "From 1 to 33 VBs can be specified..."
705 assert(ve
->vb_count
<= 33);
710 cmd_len
= 1 + 4 * ve
->vb_count
;
712 ilo_cp_begin(cp
, cmd_len
);
713 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
715 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
716 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
717 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
718 const struct pipe_vertex_buffer
*cso
= &vb
->states
[pipe_idx
];
721 dw
= hw_idx
<< GEN6_VB0_INDEX_SHIFT
;
723 if (instance_divisor
)
724 dw
|= GEN6_VB0_ACCESS_INSTANCEDATA
;
726 dw
|= GEN6_VB0_ACCESS_VERTEXDATA
;
728 if (dev
->gen
>= ILO_GEN(7))
729 dw
|= GEN7_VB0_ADDRESS_MODIFYENABLE
;
731 /* use null vb if there is no buffer or the stride is out of range */
732 if (cso
->buffer
&& cso
->stride
<= 2048) {
733 const struct ilo_buffer
*buf
= ilo_buffer(cso
->buffer
);
734 const uint32_t start_offset
= cso
->buffer_offset
;
736 * As noted in ilo_translate_format(), we treat some 3-component
737 * formats as 4-component formats to work around hardware
738 * limitations. Imagine the case where the vertex buffer holds a
739 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
740 * The hardware would not be able to fetch it because the vertex
741 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
742 * and that takes at least 8 bytes.
744 * For the workaround to work, we query the physical size, which is
745 * page aligned, to calculate end_offset so that the last vertex has
746 * a better chance to be fetched.
748 const uint32_t end_offset
= intel_bo_get_size(buf
->bo
) - 1;
750 dw
|= cso
->stride
<< BRW_VB0_PITCH_SHIFT
;
752 ilo_cp_write(cp
, dw
);
753 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
754 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
755 ilo_cp_write(cp
, instance_divisor
);
760 ilo_cp_write(cp
, dw
);
763 ilo_cp_write(cp
, instance_divisor
);
771 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
772 int comp0
, int comp1
, int comp2
, int comp3
,
773 struct ilo_ve_cso
*cso
)
775 ILO_GPE_VALID_GEN(dev
, 6, 7);
777 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
778 cso
->payload
[0] = GEN6_VE0_VALID
;
780 comp0
<< BRW_VE1_COMPONENT_0_SHIFT
|
781 comp1
<< BRW_VE1_COMPONENT_1_SHIFT
|
782 comp2
<< BRW_VE1_COMPONENT_2_SHIFT
|
783 comp3
<< BRW_VE1_COMPONENT_3_SHIFT
;
787 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
788 struct ilo_ve_cso
*cso
)
792 ILO_GPE_VALID_GEN(dev
, 6, 7);
795 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
797 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
798 * valid VERTEX_ELEMENT structure.
800 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
801 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
803 * - The Source Element Format must be set to the UINT format.
805 * - [DevSNB]: Edge Flags are not supported for QUADLIST
806 * primitives. Software may elect to convert QUADLIST primitives
807 * to some set of corresponding edge-flag-supported primitive
808 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
811 cso
->payload
[0] |= GEN6_VE0_EDGE_FLAG_ENABLE
;
813 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
814 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_1_SHIFT
|
815 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_2_SHIFT
|
816 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_3_SHIFT
;
819 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
820 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
821 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
823 * Since all the hardware cares about is whether the flags are zero or not,
824 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
826 format
= (cso
->payload
[0] >> BRW_VE0_FORMAT_SHIFT
) & 0x1ff;
827 if (format
== BRW_SURFACEFORMAT_R32_FLOAT
) {
828 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT
==
829 BRW_SURFACEFORMAT_R32_FLOAT
- 1);
831 cso
->payload
[0] -= (1 << BRW_VE0_FORMAT_SHIFT
);
834 assert(format
== BRW_SURFACEFORMAT_R8_UINT
);
839 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
840 const struct ilo_ve_state
*ve
,
841 bool last_velement_edgeflag
,
842 bool prepend_generated_ids
,
845 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
849 ILO_GPE_VALID_GEN(dev
, 6, 7);
852 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
854 * "Up to 34 (DevSNB+) vertex elements are supported."
856 assert(ve
->count
+ prepend_generated_ids
<= 34);
858 if (!ve
->count
&& !prepend_generated_ids
) {
859 struct ilo_ve_cso dummy
;
861 ve_init_cso_with_components(dev
,
862 BRW_VE1_COMPONENT_STORE_0
,
863 BRW_VE1_COMPONENT_STORE_0
,
864 BRW_VE1_COMPONENT_STORE_0
,
865 BRW_VE1_COMPONENT_STORE_1_FLT
,
869 ilo_cp_begin(cp
, cmd_len
);
870 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
871 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
877 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
879 ilo_cp_begin(cp
, cmd_len
);
880 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
882 if (prepend_generated_ids
) {
883 struct ilo_ve_cso gen_ids
;
885 ve_init_cso_with_components(dev
,
886 BRW_VE1_COMPONENT_STORE_VID
,
887 BRW_VE1_COMPONENT_STORE_IID
,
888 BRW_VE1_COMPONENT_NOSTORE
,
889 BRW_VE1_COMPONENT_NOSTORE
,
892 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
895 if (last_velement_edgeflag
) {
896 struct ilo_ve_cso edgeflag
;
898 for (i
= 0; i
< ve
->count
- 1; i
++)
899 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
901 edgeflag
= ve
->cso
[i
];
902 ve_set_cso_edgeflag(dev
, &edgeflag
);
903 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
906 for (i
= 0; i
< ve
->count
; i
++)
907 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
914 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
915 const struct ilo_ib_state
*ib
,
916 bool enable_cut_index
,
919 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
920 const uint8_t cmd_len
= 3;
921 struct ilo_buffer
*buf
= ilo_buffer(ib
->hw_resource
);
922 uint32_t start_offset
, end_offset
;
925 ILO_GPE_VALID_GEN(dev
, 6, 7);
930 switch (ib
->hw_index_size
) {
932 format
= BRW_INDEX_DWORD
;
935 format
= BRW_INDEX_WORD
;
938 format
= BRW_INDEX_BYTE
;
941 assert(!"unknown index size");
942 format
= BRW_INDEX_BYTE
;
947 * set start_offset to 0 here and adjust pipe_draw_info::start with
948 * ib->draw_start_offset in 3DPRIMITIVE
951 end_offset
= buf
->bo_size
;
953 /* end_offset must also be aligned and is inclusive */
954 end_offset
-= (end_offset
% ib
->hw_index_size
);
957 ilo_cp_begin(cp
, cmd_len
);
958 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
959 ((enable_cut_index
) ? BRW_CUT_INDEX_ENABLE
: 0) |
961 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
962 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
967 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
968 uint32_t clip_viewport
,
969 uint32_t sf_viewport
,
970 uint32_t cc_viewport
,
973 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
974 const uint8_t cmd_len
= 4;
976 ILO_GPE_VALID_GEN(dev
, 6, 6);
978 ilo_cp_begin(cp
, cmd_len
);
979 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
980 GEN6_CLIP_VIEWPORT_MODIFY
|
981 GEN6_SF_VIEWPORT_MODIFY
|
982 GEN6_CC_VIEWPORT_MODIFY
);
983 ilo_cp_write(cp
, clip_viewport
);
984 ilo_cp_write(cp
, sf_viewport
);
985 ilo_cp_write(cp
, cc_viewport
);
990 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
991 uint32_t blend_state
,
992 uint32_t depth_stencil_state
,
993 uint32_t color_calc_state
,
996 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
997 const uint8_t cmd_len
= 4;
999 ILO_GPE_VALID_GEN(dev
, 6, 6);
1001 ilo_cp_begin(cp
, cmd_len
);
1002 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1003 ilo_cp_write(cp
, blend_state
| 1);
1004 ilo_cp_write(cp
, depth_stencil_state
| 1);
1005 ilo_cp_write(cp
, color_calc_state
| 1);
1010 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1011 uint32_t scissor_rect
,
1014 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1015 const uint8_t cmd_len
= 2;
1017 ILO_GPE_VALID_GEN(dev
, 6, 7);
1019 ilo_cp_begin(cp
, cmd_len
);
1020 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1021 ilo_cp_write(cp
, scissor_rect
);
1026 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1027 const struct ilo_shader_state
*vs
,
1031 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1032 const uint8_t cmd_len
= 6;
1033 const struct ilo_shader_cso
*cso
;
1034 uint32_t dw2
, dw4
, dw5
;
1036 ILO_GPE_VALID_GEN(dev
, 6, 7);
1039 ilo_cp_begin(cp
, cmd_len
);
1040 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1041 ilo_cp_write(cp
, 0);
1042 ilo_cp_write(cp
, 0);
1043 ilo_cp_write(cp
, 0);
1044 ilo_cp_write(cp
, 0);
1045 ilo_cp_write(cp
, 0);
1050 cso
= ilo_shader_get_kernel_cso(vs
);
1051 dw2
= cso
->payload
[0];
1052 dw4
= cso
->payload
[1];
1053 dw5
= cso
->payload
[2];
1055 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT
;
1057 ilo_cp_begin(cp
, cmd_len
);
1058 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1059 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1060 ilo_cp_write(cp
, dw2
);
1061 ilo_cp_write(cp
, 0); /* scratch */
1062 ilo_cp_write(cp
, dw4
);
1063 ilo_cp_write(cp
, dw5
);
1068 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1069 const struct ilo_shader_state
*gs
,
1070 const struct ilo_shader_state
*vs
,
1074 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1075 const uint8_t cmd_len
= 7;
1076 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1078 ILO_GPE_VALID_GEN(dev
, 6, 6);
1081 const struct ilo_shader_cso
*cso
;
1083 dw1
= ilo_shader_get_kernel_offset(gs
);
1085 cso
= ilo_shader_get_kernel_cso(gs
);
1086 dw2
= cso
->payload
[0];
1087 dw4
= cso
->payload
[1];
1088 dw5
= cso
->payload
[2];
1089 dw6
= cso
->payload
[3];
1091 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1092 struct ilo_shader_cso cso
;
1093 enum ilo_kernel_param param
;
1095 switch (verts_per_prim
) {
1097 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1100 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1103 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1107 dw1
= ilo_shader_get_kernel_offset(vs
) +
1108 ilo_shader_get_kernel_param(vs
, param
);
1110 /* cannot use VS's CSO */
1111 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1112 dw2
= cso
.payload
[0];
1113 dw4
= cso
.payload
[1];
1114 dw5
= cso
.payload
[2];
1115 dw6
= cso
.payload
[3];
1120 dw4
= 1 << GEN6_GS_URB_READ_LENGTH_SHIFT
;
1121 dw5
= GEN6_GS_STATISTICS_ENABLE
;
1125 ilo_cp_begin(cp
, cmd_len
);
1126 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1127 ilo_cp_write(cp
, dw1
);
1128 ilo_cp_write(cp
, dw2
);
1129 ilo_cp_write(cp
, 0);
1130 ilo_cp_write(cp
, dw4
);
1131 ilo_cp_write(cp
, dw5
);
1132 ilo_cp_write(cp
, dw6
);
1137 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1138 const struct ilo_rasterizer_state
*rasterizer
,
1139 const struct ilo_shader_state
*fs
,
1140 bool enable_guardband
,
1144 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1145 const uint8_t cmd_len
= 4;
1146 uint32_t dw1
, dw2
, dw3
;
1148 ILO_GPE_VALID_GEN(dev
, 6, 7);
1153 dw1
= rasterizer
->clip
.payload
[0];
1154 dw2
= rasterizer
->clip
.payload
[1];
1155 dw3
= rasterizer
->clip
.payload
[2];
1157 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1158 dw2
|= GEN6_CLIP_GB_TEST
;
1160 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1161 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1163 if (interps
& (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
|
1164 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
|
1165 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC
))
1166 dw2
|= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE
;
1168 dw3
|= GEN6_CLIP_FORCE_ZERO_RTAINDEX
|
1169 (num_viewports
- 1);
1177 ilo_cp_begin(cp
, cmd_len
);
1178 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1179 ilo_cp_write(cp
, dw1
);
1180 ilo_cp_write(cp
, dw2
);
1181 ilo_cp_write(cp
, dw3
);
1186 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1187 const struct ilo_rasterizer_state
*rasterizer
,
1188 const struct ilo_shader_state
*fs
,
1191 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1192 const uint8_t cmd_len
= 20;
1193 uint32_t payload_raster
[6], payload_sbe
[13];
1195 ILO_GPE_VALID_GEN(dev
, 6, 6);
1197 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1198 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1199 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1200 fs
, payload_sbe
, Elements(payload_sbe
));
1202 ilo_cp_begin(cp
, cmd_len
);
1203 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1204 ilo_cp_write(cp
, payload_sbe
[0]);
1205 ilo_cp_write_multi(cp
, payload_raster
, 6);
1206 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1211 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1212 const struct ilo_shader_state
*fs
,
1214 const struct ilo_rasterizer_state
*rasterizer
,
1215 bool dual_blend
, bool cc_may_kill
,
1218 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
1219 const uint8_t cmd_len
= 9;
1220 const int num_samples
= 1;
1221 const struct ilo_shader_cso
*fs_cso
;
1222 uint32_t dw2
, dw4
, dw5
, dw6
;
1224 ILO_GPE_VALID_GEN(dev
, 6, 6);
1227 /* see brwCreateContext() */
1228 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
1230 ilo_cp_begin(cp
, cmd_len
);
1231 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1232 ilo_cp_write(cp
, 0);
1233 ilo_cp_write(cp
, 0);
1234 ilo_cp_write(cp
, 0);
1235 ilo_cp_write(cp
, 0);
1236 /* honor the valid range even if dispatching is disabled */
1237 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
);
1238 ilo_cp_write(cp
, 0);
1239 ilo_cp_write(cp
, 0);
1240 ilo_cp_write(cp
, 0);
1246 fs_cso
= ilo_shader_get_kernel_cso(fs
);
1247 dw2
= fs_cso
->payload
[0];
1248 dw4
= fs_cso
->payload
[1];
1249 dw5
= fs_cso
->payload
[2];
1250 dw6
= fs_cso
->payload
[3];
1252 dw2
|= (num_samplers
+ 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT
;
1255 dw4
|= GEN6_WM_STATISTICS_ENABLE
;
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1263 * Resolve Enable or Depth Buffer Resolve Enable."
1265 dw4
|= GEN6_WM_DEPTH_CLEAR
;
1266 dw4
|= GEN6_WM_DEPTH_RESOLVE
;
1267 dw4
|= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE
;
1271 dw5
|= GEN6_WM_KILL_ENABLE
|
1272 GEN6_WM_DISPATCH_ENABLE
;
1276 dw5
|= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE
;
1278 dw5
|= rasterizer
->wm
.payload
[0];
1280 dw6
|= rasterizer
->wm
.payload
[1];
1282 if (num_samples
> 1) {
1283 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
1284 rasterizer
->wm
.dw_msaa_disp
;
1287 ilo_cp_begin(cp
, cmd_len
);
1288 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1289 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
1290 ilo_cp_write(cp
, dw2
);
1291 ilo_cp_write(cp
, 0); /* scratch */
1292 ilo_cp_write(cp
, dw4
);
1293 ilo_cp_write(cp
, dw5
);
1294 ilo_cp_write(cp
, dw6
);
1295 ilo_cp_write(cp
, 0); /* kernel 1 */
1296 ilo_cp_write(cp
, 0); /* kernel 2 */
1300 static inline unsigned
1301 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
1302 const uint32_t *bufs
, const int *sizes
,
1303 int num_bufs
, int max_read_length
,
1304 uint32_t *dw
, int num_dwords
)
1306 unsigned enabled
= 0x0;
1307 int total_read_length
, i
;
1309 assert(num_dwords
== 4);
1311 total_read_length
= 0;
1312 for (i
= 0; i
< 4; i
++) {
1313 if (i
< num_bufs
&& sizes
[i
]) {
1314 /* in 256-bit units minus one */
1315 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
1317 assert(bufs
[i
] % 32 == 0);
1318 assert(read_len
< 32);
1321 dw
[i
] = bufs
[i
] | read_len
;
1323 total_read_length
+= read_len
+ 1;
1330 assert(total_read_length
<= max_read_length
);
1336 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
1337 const uint32_t *bufs
, const int *sizes
,
1341 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
1342 const uint8_t cmd_len
= 5;
1343 uint32_t buf_dw
[4], buf_enabled
;
1345 ILO_GPE_VALID_GEN(dev
, 6, 6);
1346 assert(num_bufs
<= 4);
1349 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1351 * "The sum of all four read length fields (each incremented to
1352 * represent the actual read length) must be less than or equal to 32"
1354 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1355 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
1357 ilo_cp_begin(cp
, cmd_len
);
1358 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1359 ilo_cp_write(cp
, buf_dw
[0]);
1360 ilo_cp_write(cp
, buf_dw
[1]);
1361 ilo_cp_write(cp
, buf_dw
[2]);
1362 ilo_cp_write(cp
, buf_dw
[3]);
1367 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
1368 const uint32_t *bufs
, const int *sizes
,
1372 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
1373 const uint8_t cmd_len
= 5;
1374 uint32_t buf_dw
[4], buf_enabled
;
1376 ILO_GPE_VALID_GEN(dev
, 6, 6);
1377 assert(num_bufs
<= 4);
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1382 * "The sum of all four read length fields (each incremented to
1383 * represent the actual read length) must be less than or equal to 64"
1385 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1386 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1388 ilo_cp_begin(cp
, cmd_len
);
1389 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1390 ilo_cp_write(cp
, buf_dw
[0]);
1391 ilo_cp_write(cp
, buf_dw
[1]);
1392 ilo_cp_write(cp
, buf_dw
[2]);
1393 ilo_cp_write(cp
, buf_dw
[3]);
1398 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
1399 const uint32_t *bufs
, const int *sizes
,
1403 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
1404 const uint8_t cmd_len
= 5;
1405 uint32_t buf_dw
[4], buf_enabled
;
1407 ILO_GPE_VALID_GEN(dev
, 6, 6);
1408 assert(num_bufs
<= 4);
1411 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1413 * "The sum of all four read length fields (each incremented to
1414 * represent the actual read length) must be less than or equal to 64"
1416 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1417 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1419 ilo_cp_begin(cp
, cmd_len
);
1420 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1421 ilo_cp_write(cp
, buf_dw
[0]);
1422 ilo_cp_write(cp
, buf_dw
[1]);
1423 ilo_cp_write(cp
, buf_dw
[2]);
1424 ilo_cp_write(cp
, buf_dw
[3]);
1429 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
1430 unsigned sample_mask
,
1433 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
1434 const uint8_t cmd_len
= 2;
1435 const unsigned valid_mask
= 0xf;
1437 ILO_GPE_VALID_GEN(dev
, 6, 6);
1439 sample_mask
&= valid_mask
;
1441 ilo_cp_begin(cp
, cmd_len
);
1442 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1443 ilo_cp_write(cp
, sample_mask
);
1448 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
1449 unsigned x
, unsigned y
,
1450 unsigned width
, unsigned height
,
1453 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
1454 const uint8_t cmd_len
= 4;
1455 unsigned xmax
= x
+ width
- 1;
1456 unsigned ymax
= y
+ height
- 1;
1459 ILO_GPE_VALID_GEN(dev
, 6, 7);
1461 if (dev
->gen
>= ILO_GEN(7)) {
1466 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1468 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1469 * must be an even number"
1476 if (x
> rect_limit
) x
= rect_limit
;
1477 if (y
> rect_limit
) y
= rect_limit
;
1478 if (xmax
> rect_limit
) xmax
= rect_limit
;
1479 if (ymax
> rect_limit
) ymax
= rect_limit
;
1481 ilo_cp_begin(cp
, cmd_len
);
1482 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1483 ilo_cp_write(cp
, y
<< 16 | x
);
1484 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
1487 * There is no need to set the origin. It is intended to support front
1490 ilo_cp_write(cp
, 0);
1496 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1497 const struct ilo_zs_surface
*zs
,
1500 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1501 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1502 const uint8_t cmd_len
= 7;
1504 ILO_GPE_VALID_GEN(dev
, 6, 7);
1506 ilo_cp_begin(cp
, cmd_len
);
1507 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1508 ilo_cp_write(cp
, zs
->payload
[0]);
1509 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
,
1510 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1511 ilo_cp_write(cp
, zs
->payload
[2]);
1512 ilo_cp_write(cp
, zs
->payload
[3]);
1513 ilo_cp_write(cp
, zs
->payload
[4]);
1514 ilo_cp_write(cp
, zs
->payload
[5]);
1519 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
1520 int x_offset
, int y_offset
,
1523 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
1524 const uint8_t cmd_len
= 2;
1526 ILO_GPE_VALID_GEN(dev
, 6, 7);
1527 assert(x_offset
>= 0 && x_offset
<= 31);
1528 assert(y_offset
>= 0 && y_offset
<= 31);
1530 ilo_cp_begin(cp
, cmd_len
);
1531 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1532 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
1537 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
1538 const struct pipe_poly_stipple
*pattern
,
1541 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
1542 const uint8_t cmd_len
= 33;
1545 ILO_GPE_VALID_GEN(dev
, 6, 7);
1546 assert(Elements(pattern
->stipple
) == 32);
1548 ilo_cp_begin(cp
, cmd_len
);
1549 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1550 for (i
= 0; i
< 32; i
++)
1551 ilo_cp_write(cp
, pattern
->stipple
[i
]);
1556 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
1557 unsigned pattern
, unsigned factor
,
1560 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
1561 const uint8_t cmd_len
= 3;
1564 ILO_GPE_VALID_GEN(dev
, 6, 7);
1565 assert((pattern
& 0xffff) == pattern
);
1566 assert(factor
>= 1 && factor
<= 256);
1568 ilo_cp_begin(cp
, cmd_len
);
1569 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1570 ilo_cp_write(cp
, pattern
);
1572 if (dev
->gen
>= ILO_GEN(7)) {
1574 inverse
= (unsigned) (65536.0f
/ factor
);
1575 ilo_cp_write(cp
, inverse
<< 15 | factor
);
1579 inverse
= (unsigned) (8192.0f
/ factor
);
1580 ilo_cp_write(cp
, inverse
<< 16 | factor
);
1587 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
1590 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
1591 const uint8_t cmd_len
= 3;
1593 ILO_GPE_VALID_GEN(dev
, 6, 7);
1595 ilo_cp_begin(cp
, cmd_len
);
1596 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1597 ilo_cp_write(cp
, 0 << 16 | 0);
1598 ilo_cp_write(cp
, 0 << 16 | 0);
1603 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
1604 int index
, unsigned svbi
,
1606 bool load_vertex_count
,
1609 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
1610 const uint8_t cmd_len
= 4;
1613 ILO_GPE_VALID_GEN(dev
, 6, 6);
1614 assert(index
>= 0 && index
< 4);
1616 dw1
= index
<< SVB_INDEX_SHIFT
;
1617 if (load_vertex_count
)
1618 dw1
|= SVB_LOAD_INTERNAL_VERTEX_COUNT
;
1620 ilo_cp_begin(cp
, cmd_len
);
1621 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1622 ilo_cp_write(cp
, dw1
);
1623 ilo_cp_write(cp
, svbi
);
1624 ilo_cp_write(cp
, max_svbi
);
1629 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
1631 const uint32_t *packed_sample_pos
,
1632 bool pixel_location_center
,
1635 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
1636 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
1637 uint32_t dw1
, dw2
, dw3
;
1639 ILO_GPE_VALID_GEN(dev
, 6, 7);
1641 dw1
= (pixel_location_center
) ?
1642 MS_PIXEL_LOCATION_CENTER
: MS_PIXEL_LOCATION_UPPER_LEFT
;
1644 switch (num_samples
) {
1647 dw1
|= MS_NUMSAMPLES_1
;
1652 dw1
|= MS_NUMSAMPLES_4
;
1653 dw2
= packed_sample_pos
[0];
1657 assert(dev
->gen
>= ILO_GEN(7));
1658 dw1
|= MS_NUMSAMPLES_8
;
1659 dw2
= packed_sample_pos
[0];
1660 dw3
= packed_sample_pos
[1];
1663 assert(!"unsupported sample count");
1664 dw1
|= MS_NUMSAMPLES_1
;
1670 ilo_cp_begin(cp
, cmd_len
);
1671 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1672 ilo_cp_write(cp
, dw1
);
1673 ilo_cp_write(cp
, dw2
);
1674 if (dev
->gen
>= ILO_GEN(7))
1675 ilo_cp_write(cp
, dw3
);
1680 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
1681 const struct ilo_zs_surface
*zs
,
1684 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1685 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1686 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1687 const uint8_t cmd_len
= 3;
1689 ILO_GPE_VALID_GEN(dev
, 6, 7);
1691 ilo_cp_begin(cp
, cmd_len
);
1692 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1693 /* see ilo_gpe_init_zs_surface() */
1694 ilo_cp_write(cp
, zs
->payload
[6]);
1695 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
,
1696 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1701 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1702 const struct ilo_zs_surface
*zs
,
1705 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1708 const uint8_t cmd_len
= 3;
1710 ILO_GPE_VALID_GEN(dev
, 6, 7);
1712 ilo_cp_begin(cp
, cmd_len
);
1713 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp
, zs
->payload
[8]);
1716 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
,
1717 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1722 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
1726 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
1727 const uint8_t cmd_len
= 2;
1729 ILO_GPE_VALID_GEN(dev
, 6, 6);
1731 ilo_cp_begin(cp
, cmd_len
);
1732 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1733 GEN5_DEPTH_CLEAR_VALID
);
1734 ilo_cp_write(cp
, clear_val
);
1739 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
1741 struct intel_bo
*bo
, uint32_t bo_offset
,
1745 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
1746 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
1747 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
1748 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
1750 ILO_GPE_VALID_GEN(dev
, 6, 7);
1752 if (dw1
& PIPE_CONTROL_CS_STALL
) {
1754 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1756 * "1 of the following must also be set (when CS stall is set):
1758 * * Depth Cache Flush Enable ([0] of DW1)
1759 * * Stall at Pixel Scoreboard ([1] of DW1)
1760 * * Depth Stall ([13] of DW1)
1761 * * Post-Sync Operation ([13] of DW1)
1762 * * Render Target Cache Flush Enable ([12] of DW1)
1763 * * Notify Enable ([8] of DW1)"
1765 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1767 * "One of the following must also be set (when CS stall is set):
1769 * * Render Target Cache Flush Enable ([12] of DW1)
1770 * * Depth Cache Flush Enable ([0] of DW1)
1771 * * Stall at Pixel Scoreboard ([1] of DW1)
1772 * * Depth Stall ([13] of DW1)
1773 * * Post-Sync Operation ([13] of DW1)"
1775 uint32_t bit_test
= PIPE_CONTROL_WRITE_FLUSH
|
1776 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1777 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
1778 PIPE_CONTROL_DEPTH_STALL
;
1781 bit_test
|= PIPE_CONTROL_WRITE_IMMEDIATE
|
1782 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
1783 PIPE_CONTROL_WRITE_TIMESTAMP
;
1785 if (dev
->gen
== ILO_GEN(6))
1786 bit_test
|= PIPE_CONTROL_INTERRUPT_ENABLE
;
1788 assert(dw1
& bit_test
);
1791 if (dw1
& PIPE_CONTROL_DEPTH_STALL
) {
1793 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1795 * "Following bits must be clear (when Depth Stall is set):
1797 * * Render Target Cache Flush Enable ([12] of DW1)
1798 * * Depth Cache Flush Enable ([0] of DW1)"
1800 assert(!(dw1
& (PIPE_CONTROL_WRITE_FLUSH
|
1801 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
1804 ilo_cp_begin(cp
, cmd_len
);
1805 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1806 ilo_cp_write(cp
, dw1
);
1807 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
1808 ilo_cp_write(cp
, 0);
1810 ilo_cp_write(cp
, 0);
1815 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1816 const struct pipe_draw_info
*info
,
1817 const struct ilo_ib_state
*ib
,
1821 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1822 const uint8_t cmd_len
= 6;
1823 const int prim
= (rectlist
) ?
1824 _3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1825 const int vb_access
= (info
->indexed
) ?
1826 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM
:
1827 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
;
1828 const uint32_t vb_start
= info
->start
+
1829 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1831 ILO_GPE_VALID_GEN(dev
, 6, 6);
1833 ilo_cp_begin(cp
, cmd_len
);
1834 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1835 prim
<< GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT
|
1837 ilo_cp_write(cp
, info
->count
);
1838 ilo_cp_write(cp
, vb_start
);
1839 ilo_cp_write(cp
, info
->instance_count
);
1840 ilo_cp_write(cp
, info
->start_instance
);
1841 ilo_cp_write(cp
, info
->index_bias
);
1845 static inline uint32_t
1846 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
1847 const struct ilo_shader_state
**cs
,
1848 uint32_t *sampler_state
,
1850 uint32_t *binding_table_state
,
1856 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1858 * "(Interface Descriptor Total Length) This field must have the same
1859 * alignment as the Interface Descriptor Data Start Address.
1861 * It must be DQWord (32-byte) aligned..."
1863 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1865 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1866 * aligned address of the Interface Descriptor data."
1868 const int state_align
= 32 / 4;
1869 const int state_len
= (32 / 4) * num_ids
;
1870 uint32_t state_offset
, *dw
;
1873 ILO_GPE_VALID_GEN(dev
, 6, 6);
1875 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
1876 state_len
, state_align
, &state_offset
);
1878 for (i
= 0; i
< num_ids
; i
++) {
1879 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
1880 dw
[1] = 1 << 18; /* SPF */
1881 dw
[2] = sampler_state
[i
] |
1882 (num_samplers
[i
] + 3) / 4 << 2;
1883 dw
[3] = binding_table_state
[i
] |
1885 dw
[4] = 0 << 16 | /* CURBE Read Length */
1886 0; /* CURBE Read Offset */
1887 dw
[5] = 0; /* Barrier ID */
1894 return state_offset
;
1897 static inline uint32_t
1898 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
1899 const struct ilo_viewport_cso
*viewports
,
1900 unsigned num_viewports
,
1903 const int state_align
= 32 / 4;
1904 const int state_len
= 8 * num_viewports
;
1905 uint32_t state_offset
, *dw
;
1908 ILO_GPE_VALID_GEN(dev
, 6, 6);
1911 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1913 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1914 * stored as an array of up to 16 elements..."
1916 assert(num_viewports
&& num_viewports
<= 16);
1918 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
1919 state_len
, state_align
, &state_offset
);
1921 for (i
= 0; i
< num_viewports
; i
++) {
1922 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1924 dw
[0] = fui(vp
->m00
);
1925 dw
[1] = fui(vp
->m11
);
1926 dw
[2] = fui(vp
->m22
);
1927 dw
[3] = fui(vp
->m30
);
1928 dw
[4] = fui(vp
->m31
);
1929 dw
[5] = fui(vp
->m32
);
1936 return state_offset
;
1939 static inline uint32_t
1940 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1941 const struct ilo_viewport_cso
*viewports
,
1942 unsigned num_viewports
,
1945 const int state_align
= 32 / 4;
1946 const int state_len
= 4 * num_viewports
;
1947 uint32_t state_offset
, *dw
;
1950 ILO_GPE_VALID_GEN(dev
, 6, 6);
1953 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1955 * "The viewport-related state is stored as an array of up to 16
1958 assert(num_viewports
&& num_viewports
<= 16);
1960 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
1961 state_len
, state_align
, &state_offset
);
1963 for (i
= 0; i
< num_viewports
; i
++) {
1964 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1966 dw
[0] = fui(vp
->min_gbx
);
1967 dw
[1] = fui(vp
->max_gbx
);
1968 dw
[2] = fui(vp
->min_gby
);
1969 dw
[3] = fui(vp
->max_gby
);
1974 return state_offset
;
1977 static inline uint32_t
1978 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
1979 const struct ilo_viewport_cso
*viewports
,
1980 unsigned num_viewports
,
1983 const int state_align
= 32 / 4;
1984 const int state_len
= 2 * num_viewports
;
1985 uint32_t state_offset
, *dw
;
1988 ILO_GPE_VALID_GEN(dev
, 6, 7);
1991 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
1993 * "The viewport state is stored as an array of up to 16 elements..."
1995 assert(num_viewports
&& num_viewports
<= 16);
1997 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
1998 state_len
, state_align
, &state_offset
);
2000 for (i
= 0; i
< num_viewports
; i
++) {
2001 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2003 dw
[0] = fui(vp
->min_z
);
2004 dw
[1] = fui(vp
->max_z
);
2009 return state_offset
;
2012 static inline uint32_t
2013 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
2014 const struct pipe_stencil_ref
*stencil_ref
,
2016 const struct pipe_blend_color
*blend_color
,
2019 const int state_align
= 64 / 4;
2020 const int state_len
= 6;
2021 uint32_t state_offset
, *dw
;
2023 ILO_GPE_VALID_GEN(dev
, 6, 7);
2025 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
2026 state_len
, state_align
, &state_offset
);
2028 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
2029 stencil_ref
->ref_value
[1] << 16 |
2030 BRW_ALPHATEST_FORMAT_UNORM8
;
2032 dw
[2] = fui(blend_color
->color
[0]);
2033 dw
[3] = fui(blend_color
->color
[1]);
2034 dw
[4] = fui(blend_color
->color
[2]);
2035 dw
[5] = fui(blend_color
->color
[3]);
2037 return state_offset
;
2040 static inline uint32_t
2041 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
2042 const struct ilo_blend_state
*blend
,
2043 const struct ilo_fb_state
*fb
,
2044 const struct ilo_dsa_state
*dsa
,
2047 const int state_align
= 64 / 4;
2049 uint32_t state_offset
, *dw
;
2050 unsigned num_targets
, i
;
2052 ILO_GPE_VALID_GEN(dev
, 6, 7);
2055 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2057 * "The blend state is stored as an array of up to 8 elements..."
2059 num_targets
= fb
->state
.nr_cbufs
;
2060 assert(num_targets
<= 8);
2065 /* to be able to reference alpha func */
2069 state_len
= 2 * num_targets
;
2071 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
2072 state_len
, state_align
, &state_offset
);
2074 for (i
= 0; i
< num_targets
; i
++) {
2075 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
2076 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
2077 const int num_samples
= fb
->num_samples
;
2078 const struct util_format_description
*format_desc
=
2079 (idx
< fb
->state
.nr_cbufs
) ?
2080 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
2081 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
2084 rt_is_pure_integer
= false;
2085 rt_dst_alpha_forced_one
= false;
2090 switch (format_desc
->format
) {
2091 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2092 /* force alpha to one when the HW format has alpha */
2093 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
2094 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM
);
2095 rt_dst_alpha_forced_one
= true;
2101 for (ch
= 0; ch
< 4; ch
++) {
2102 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
2105 if (format_desc
->channel
[ch
].pure_integer
) {
2106 rt_is_unorm
= false;
2107 rt_is_pure_integer
= true;
2111 if (!format_desc
->channel
[ch
].normalized
||
2112 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
2113 rt_is_unorm
= false;
2117 dw
[0] = cso
->payload
[0];
2118 dw
[1] = cso
->payload
[1];
2120 if (!rt_is_pure_integer
) {
2121 if (rt_dst_alpha_forced_one
)
2122 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
2124 dw
[0] |= cso
->dw_blend
;
2128 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2130 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2131 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2133 * Since logicop is ignored for non-UNORM color buffers, no special care
2137 dw
[1] |= cso
->dw_logicop
;
2140 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2142 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2143 * Dither both must be disabled."
2145 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2146 * requires that anyway.
2148 if (num_samples
> 1)
2149 dw
[1] |= cso
->dw_alpha_mod
;
2152 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2154 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2157 if (!rt_is_pure_integer
)
2158 dw
[1] |= dsa
->dw_alpha
;
2163 return state_offset
;
2166 static inline uint32_t
2167 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
2168 const struct ilo_dsa_state
*dsa
,
2171 const int state_align
= 64 / 4;
2172 const int state_len
= 3;
2173 uint32_t state_offset
, *dw
;
2176 ILO_GPE_VALID_GEN(dev
, 6, 7);
2178 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
2179 state_len
, state_align
, &state_offset
);
2181 dw
[0] = dsa
->payload
[0];
2182 dw
[1] = dsa
->payload
[1];
2183 dw
[2] = dsa
->payload
[2];
2185 return state_offset
;
2188 static inline uint32_t
2189 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
2190 const struct ilo_scissor_state
*scissor
,
2191 unsigned num_viewports
,
2194 const int state_align
= 32 / 4;
2195 const int state_len
= 2 * num_viewports
;
2196 uint32_t state_offset
, *dw
;
2198 ILO_GPE_VALID_GEN(dev
, 6, 7);
2201 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2203 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2204 * stored as an array of up to 16 elements..."
2206 assert(num_viewports
&& num_viewports
<= 16);
2208 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
2209 state_len
, state_align
, &state_offset
);
2211 memcpy(dw
, scissor
->payload
, state_len
* 4);
2213 return state_offset
;
2216 static inline uint32_t
2217 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
2218 uint32_t *surface_states
,
2219 int num_surface_states
,
2222 const int state_align
= 32 / 4;
2223 const int state_len
= num_surface_states
;
2224 uint32_t state_offset
, *dw
;
2226 ILO_GPE_VALID_GEN(dev
, 6, 7);
2229 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2231 * "It is stored as an array of up to 256 elements..."
2233 assert(num_surface_states
<= 256);
2235 if (!num_surface_states
)
2238 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
2239 state_len
, state_align
, &state_offset
);
2240 memcpy(dw
, surface_states
,
2241 num_surface_states
* sizeof(surface_states
[0]));
2243 return state_offset
;
2246 static inline uint32_t
2247 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2248 const struct ilo_view_surface
*surf
,
2252 const int state_align
= 32 / 4;
2253 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
2254 uint32_t state_offset
;
2255 uint32_t read_domains
, write_domain
;
2257 ILO_GPE_VALID_GEN(dev
, 6, 7);
2260 read_domains
= INTEL_DOMAIN_RENDER
;
2261 write_domain
= INTEL_DOMAIN_RENDER
;
2264 read_domains
= INTEL_DOMAIN_SAMPLER
;
2268 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
2270 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
2272 ilo_cp_write(cp
, surf
->payload
[0]);
2273 ilo_cp_write_bo(cp
, surf
->payload
[1],
2274 surf
->bo
, read_domains
, write_domain
);
2275 ilo_cp_write(cp
, surf
->payload
[2]);
2276 ilo_cp_write(cp
, surf
->payload
[3]);
2277 ilo_cp_write(cp
, surf
->payload
[4]);
2278 ilo_cp_write(cp
, surf
->payload
[5]);
2280 if (dev
->gen
>= ILO_GEN(7)) {
2281 ilo_cp_write(cp
, surf
->payload
[6]);
2282 ilo_cp_write(cp
, surf
->payload
[7]);
2287 return state_offset
;
2290 static inline uint32_t
2291 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2292 const struct pipe_stream_output_target
*so
,
2293 const struct pipe_stream_output_info
*so_info
,
2297 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
2298 unsigned bo_offset
, struct_size
;
2299 enum pipe_format elem_format
;
2300 struct ilo_view_surface surf
;
2302 ILO_GPE_VALID_GEN(dev
, 6, 6);
2304 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
2305 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
2307 switch (so_info
->output
[so_index
].num_components
) {
2309 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2312 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
2315 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
2318 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
2321 assert(!"unexpected SO components length");
2322 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2326 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
2327 struct_size
, elem_format
, false, true, &surf
);
2329 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
2332 static inline uint32_t
2333 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
2334 const struct ilo_sampler_cso
* const *samplers
,
2335 const struct pipe_sampler_view
* const *views
,
2336 const uint32_t *sampler_border_colors
,
2340 const int state_align
= 32 / 4;
2341 const int state_len
= 4 * num_samplers
;
2342 uint32_t state_offset
, *dw
;
2345 ILO_GPE_VALID_GEN(dev
, 6, 7);
2348 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2350 * "The sampler state is stored as an array of up to 16 elements..."
2352 assert(num_samplers
<= 16);
2357 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
2358 state_len
, state_align
, &state_offset
);
2360 for (i
= 0; i
< num_samplers
; i
++) {
2361 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
2362 const struct pipe_sampler_view
*view
= views
[i
];
2363 const uint32_t border_color
= sampler_border_colors
[i
];
2364 uint32_t dw_filter
, dw_wrap
;
2366 /* there may be holes */
2367 if (!sampler
|| !view
) {
2368 /* disabled sampler */
2378 /* determine filter and wrap modes */
2379 switch (view
->texture
->target
) {
2380 case PIPE_TEXTURE_1D
:
2381 dw_filter
= (sampler
->anisotropic
) ?
2382 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2383 dw_wrap
= sampler
->dw_wrap_1d
;
2385 case PIPE_TEXTURE_3D
:
2387 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2389 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2390 * surfaces of type SURFTYPE_3D."
2392 dw_filter
= sampler
->dw_filter
;
2393 dw_wrap
= sampler
->dw_wrap
;
2395 case PIPE_TEXTURE_CUBE
:
2396 dw_filter
= (sampler
->anisotropic
) ?
2397 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2398 dw_wrap
= sampler
->dw_wrap_cube
;
2401 dw_filter
= (sampler
->anisotropic
) ?
2402 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2403 dw_wrap
= sampler
->dw_wrap
;
2407 dw
[0] = sampler
->payload
[0];
2408 dw
[1] = sampler
->payload
[1];
2409 assert(!(border_color
& 0x1f));
2410 dw
[2] = border_color
;
2411 dw
[3] = sampler
->payload
[2];
2415 if (dev
->gen
>= ILO_GEN(7)) {
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2422 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2423 * where the min and mag mode filters are different and
2424 * SurfMinLOD is nonzero. The determination of MagMode uses the
2425 * following equation instead of the one in the above
2426 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2428 * As a way to work around that, we set Base to
2429 * view->u.tex.first_level.
2431 dw
[0] |= view
->u
.tex
.first_level
<< 22;
2439 return state_offset
;
2442 static inline uint32_t
2443 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
2444 const struct ilo_sampler_cso
*sampler
,
2447 const int state_align
= 32 / 4;
2448 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
2449 uint32_t state_offset
, *dw
;
2451 ILO_GPE_VALID_GEN(dev
, 6, 7);
2453 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
2454 state_len
, state_align
, &state_offset
);
2456 /* see ilo_gpe_init_sampler_cso() */
2457 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
2459 return state_offset
;
2462 static inline uint32_t
2463 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
2464 int size
, void **pcb
,
2468 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2469 * to 32 bytes, and their sizes are specified in 256-bit units.
2471 const int state_align
= 32 / 4;
2472 const int state_len
= align(size
, 32) / 4;
2473 uint32_t state_offset
;
2476 ILO_GPE_VALID_GEN(dev
, 6, 7);
2478 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
2479 state_len
, state_align
, &state_offset
);
2481 /* zero out the unused range */
2482 if (size
< state_len
* 4)
2483 memset(&buf
[size
], 0, state_len
* 4 - size
);
2488 return state_offset
;
2491 #endif /* ILO_GPE_GEN6_H */