2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
35 #include "ilo_common.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
49 * Commands that GEN6 GPE could emit.
51 enum ilo_gpe_gen6_command
{
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS
, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP
, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT
, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE
, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD
, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH
, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB
, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS
, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS
, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP
, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF
, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM
, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL
, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE
, /* (0x3, 0x3, 0x00) */
94 ILO_GPE_GEN6_COMMAND_COUNT
,
98 * Indirect states that GEN6 GPE could emit.
100 enum ilo_gpe_gen6_state
{
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
,
102 ILO_GPE_GEN6_SF_VIEWPORT
,
103 ILO_GPE_GEN6_CLIP_VIEWPORT
,
104 ILO_GPE_GEN6_CC_VIEWPORT
,
105 ILO_GPE_GEN6_COLOR_CALC_STATE
,
106 ILO_GPE_GEN6_BLEND_STATE
,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE
,
108 ILO_GPE_GEN6_SCISSOR_RECT
,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE
,
110 ILO_GPE_GEN6_SURFACE_STATE
,
111 ILO_GPE_GEN6_SAMPLER_STATE
,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
,
115 ILO_GPE_GEN6_STATE_COUNT
,
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
120 enum ilo_gpe_gen6_command cmd
,
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
125 enum ilo_gpe_gen6_state state
,
129 * Translate winsys tiling to hardware tiling.
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
135 case INTEL_TILING_NONE
:
138 return BRW_SURFACE_TILED
;
140 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
142 assert(!"unknown tiling");
148 * Translate a pipe primitive type to the matching hardware primitive type.
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
153 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
154 [PIPE_PRIM_POINTS
] = _3DPRIM_POINTLIST
,
155 [PIPE_PRIM_LINES
] = _3DPRIM_LINELIST
,
156 [PIPE_PRIM_LINE_LOOP
] = _3DPRIM_LINELOOP
,
157 [PIPE_PRIM_LINE_STRIP
] = _3DPRIM_LINESTRIP
,
158 [PIPE_PRIM_TRIANGLES
] = _3DPRIM_TRILIST
,
159 [PIPE_PRIM_TRIANGLE_STRIP
] = _3DPRIM_TRISTRIP
,
160 [PIPE_PRIM_TRIANGLE_FAN
] = _3DPRIM_TRIFAN
,
161 [PIPE_PRIM_QUADS
] = _3DPRIM_QUADLIST
,
162 [PIPE_PRIM_QUAD_STRIP
] = _3DPRIM_QUADSTRIP
,
163 [PIPE_PRIM_POLYGON
] = _3DPRIM_POLYGON
,
164 [PIPE_PRIM_LINES_ADJACENCY
] = _3DPRIM_LINELIST_ADJ
,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = _3DPRIM_LINESTRIP_ADJ
,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = _3DPRIM_TRILIST_ADJ
,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = _3DPRIM_TRISTRIP_ADJ
,
170 assert(prim_mapping
[prim
]);
172 return prim_mapping
[prim
];
176 * Translate a pipe texture target to the matching hardware surface type.
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
183 return BRW_SURFACE_BUFFER
;
184 case PIPE_TEXTURE_1D
:
185 case PIPE_TEXTURE_1D_ARRAY
:
186 return BRW_SURFACE_1D
;
187 case PIPE_TEXTURE_2D
:
188 case PIPE_TEXTURE_RECT
:
189 case PIPE_TEXTURE_2D_ARRAY
:
190 return BRW_SURFACE_2D
;
191 case PIPE_TEXTURE_3D
:
192 return BRW_SURFACE_3D
;
193 case PIPE_TEXTURE_CUBE
:
194 case PIPE_TEXTURE_CUBE_ARRAY
:
195 return BRW_SURFACE_CUBE
;
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER
;
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
207 const struct ilo_rasterizer_state
*rasterizer
,
209 enum pipe_format depth_format
,
210 uint32_t *payload
, unsigned payload_len
)
212 assert(payload_len
== Elements(rasterizer
->sf
.payload
));
215 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
217 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
219 payload
[1] |= sf
->dw_msaa
;
223 payload
[1] = (num_samples
> 1) ? GEN6_SF_MSRAST_ON_PATTERN
: 0;
230 if (dev
->gen
>= ILO_GEN(7)) {
233 /* separate stencil */
234 switch (depth_format
) {
235 case PIPE_FORMAT_Z16_UNORM
:
236 format
= BRW_DEPTHFORMAT_D16_UNORM
;
238 case PIPE_FORMAT_Z32_FLOAT
:
239 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
240 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
242 case PIPE_FORMAT_Z24X8_UNORM
:
243 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
244 format
= BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
247 /* FLOAT surface is assumed when there is no depth buffer */
248 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
252 payload
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
257 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
260 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
261 const struct ilo_rasterizer_state
*rasterizer
,
262 const struct ilo_shader_state
*fs
,
263 uint32_t *dw
, int num_dwords
)
265 int output_count
, vue_offset
, vue_len
;
266 const struct ilo_kernel_routing
*routing
;
268 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
269 assert(num_dwords
== 13);
272 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
274 if (dev
->gen
>= ILO_GEN(7))
275 dw
[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
;
277 dw
[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
;
282 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
283 assert(output_count
<= 32);
285 routing
= ilo_shader_get_kernel_routing(fs
);
287 vue_offset
= routing
->source_skip
;
288 assert(vue_offset
% 2 == 0);
291 vue_len
= (routing
->source_len
+ 1) / 2;
295 if (dev
->gen
>= ILO_GEN(7)) {
296 dw
[0] = output_count
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
297 vue_len
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
298 vue_offset
<< GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
;
299 if (routing
->swizzle_enable
)
300 dw
[0] |= GEN7_SBE_SWIZZLE_ENABLE
;
303 dw
[0] = output_count
<< GEN6_SF_NUM_OUTPUTS_SHIFT
|
304 vue_len
<< GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
|
305 vue_offset
<< GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT
;
306 if (routing
->swizzle_enable
)
307 dw
[0] |= GEN6_SF_SWIZZLE_ENABLE
;
310 switch (rasterizer
->state
.sprite_coord_mode
) {
311 case PIPE_SPRITE_COORD_UPPER_LEFT
:
312 dw
[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT
;
314 case PIPE_SPRITE_COORD_LOWER_LEFT
:
315 dw
[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT
;
319 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
320 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
323 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
325 * "This field (Point Sprite Texture Coordinate Enable) must be
326 * programmed to 0 when non-point primitives are rendered."
328 * TODO We do not check that yet.
330 dw
[9] = routing
->point_sprite_enable
;
332 dw
[10] = routing
->const_interp_enable
;
334 /* WrapShortest enables */
340 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
341 struct intel_bo
*general_state_bo
,
342 struct intel_bo
*surface_state_bo
,
343 struct intel_bo
*dynamic_state_bo
,
344 struct intel_bo
*indirect_object_bo
,
345 struct intel_bo
*instruction_bo
,
346 uint32_t general_state_size
,
347 uint32_t dynamic_state_size
,
348 uint32_t indirect_object_size
,
349 uint32_t instruction_size
,
352 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
353 const uint8_t cmd_len
= 10;
355 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
357 /* 4K-page aligned */
358 assert(((general_state_size
| dynamic_state_size
|
359 indirect_object_size
| instruction_size
) & 0xfff) == 0);
361 ilo_cp_begin(cp
, cmd_len
);
362 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
364 ilo_cp_write_bo(cp
, 1, general_state_bo
,
367 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
368 INTEL_DOMAIN_SAMPLER
,
370 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
371 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
373 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
376 ilo_cp_write_bo(cp
, 1, instruction_bo
,
377 INTEL_DOMAIN_INSTRUCTION
,
380 if (general_state_size
) {
381 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
386 /* skip range check */
390 if (dynamic_state_size
) {
391 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
392 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
396 /* skip range check */
397 ilo_cp_write(cp
, 0xfffff000 + 1);
400 if (indirect_object_size
) {
401 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
406 /* skip range check */
407 ilo_cp_write(cp
, 0xfffff000 + 1);
410 if (instruction_size
) {
411 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
412 INTEL_DOMAIN_INSTRUCTION
,
416 /* skip range check */
424 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
428 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
429 const uint8_t cmd_len
= 2;
431 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
433 ilo_cp_begin(cp
, cmd_len
);
434 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
435 ilo_cp_write(cp
, sip
);
440 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
444 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
445 const uint8_t cmd_len
= 1;
447 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
449 ilo_cp_begin(cp
, cmd_len
);
450 ilo_cp_write(cp
, cmd
| enable
);
455 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
459 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
460 const uint8_t cmd_len
= 1;
462 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
465 assert(pipeline
== 0x0 || pipeline
== 0x1);
467 ilo_cp_begin(cp
, cmd_len
);
468 ilo_cp_write(cp
, cmd
| pipeline
);
473 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
474 int max_threads
, int num_urb_entries
,
478 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
479 const uint8_t cmd_len
= 8;
482 ILO_GPE_VALID_GEN(dev
, 6, 6);
484 dw2
= (max_threads
- 1) << 16 |
485 num_urb_entries
<< 8 |
486 1 << 7 | /* Reset Gateway Timer */
487 1 << 6; /* Bypass Gateway Control */
489 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
490 480; /* CURBE Allocation Size */
492 ilo_cp_begin(cp
, cmd_len
);
493 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
494 ilo_cp_write(cp
, 0); /* scratch */
495 ilo_cp_write(cp
, dw2
);
496 ilo_cp_write(cp
, 0); /* MBZ */
497 ilo_cp_write(cp
, dw4
);
498 ilo_cp_write(cp
, 0); /* scoreboard */
505 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
506 uint32_t buf
, int size
,
509 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
510 const uint8_t cmd_len
= 4;
512 ILO_GPE_VALID_GEN(dev
, 6, 6);
514 assert(buf
% 32 == 0);
515 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
516 size
= align(size
, 32);
518 ilo_cp_begin(cp
, cmd_len
);
519 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
520 ilo_cp_write(cp
, 0); /* MBZ */
521 ilo_cp_write(cp
, size
);
522 ilo_cp_write(cp
, buf
);
527 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
528 uint32_t offset
, int num_ids
,
531 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
532 const uint8_t cmd_len
= 4;
534 ILO_GPE_VALID_GEN(dev
, 6, 6);
536 assert(offset
% 32 == 0);
538 ilo_cp_begin(cp
, cmd_len
);
539 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
540 ilo_cp_write(cp
, 0); /* MBZ */
541 /* every ID has 8 DWords */
542 ilo_cp_write(cp
, num_ids
* 8 * 4);
543 ilo_cp_write(cp
, offset
);
548 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
549 int id
, int byte
, int thread_count
,
552 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
553 const uint8_t cmd_len
= 2;
556 ILO_GPE_VALID_GEN(dev
, 6, 6);
562 ilo_cp_begin(cp
, cmd_len
);
563 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
564 ilo_cp_write(cp
, dw1
);
569 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
570 int thread_count_water_mark
,
574 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
575 const uint8_t cmd_len
= 2;
578 ILO_GPE_VALID_GEN(dev
, 6, 6);
580 dw1
= thread_count_water_mark
<< 16 |
583 ilo_cp_begin(cp
, cmd_len
);
584 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
585 ilo_cp_write(cp
, dw1
);
590 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
593 assert(!"MEDIA_OBJECT_WALKER unsupported");
597 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
598 uint32_t vs_binding_table
,
599 uint32_t gs_binding_table
,
600 uint32_t ps_binding_table
,
603 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
604 const uint8_t cmd_len
= 4;
606 ILO_GPE_VALID_GEN(dev
, 6, 6);
608 ilo_cp_begin(cp
, cmd_len
);
609 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
610 GEN6_BINDING_TABLE_MODIFY_VS
|
611 GEN6_BINDING_TABLE_MODIFY_GS
|
612 GEN6_BINDING_TABLE_MODIFY_PS
);
613 ilo_cp_write(cp
, vs_binding_table
);
614 ilo_cp_write(cp
, gs_binding_table
);
615 ilo_cp_write(cp
, ps_binding_table
);
620 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
621 uint32_t vs_sampler_state
,
622 uint32_t gs_sampler_state
,
623 uint32_t ps_sampler_state
,
626 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
627 const uint8_t cmd_len
= 4;
629 ILO_GPE_VALID_GEN(dev
, 6, 6);
631 ilo_cp_begin(cp
, cmd_len
);
632 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
633 VS_SAMPLER_STATE_CHANGE
|
634 GS_SAMPLER_STATE_CHANGE
|
635 PS_SAMPLER_STATE_CHANGE
);
636 ilo_cp_write(cp
, vs_sampler_state
);
637 ilo_cp_write(cp
, gs_sampler_state
);
638 ilo_cp_write(cp
, ps_sampler_state
);
643 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
644 int vs_total_size
, int gs_total_size
,
645 int vs_entry_size
, int gs_entry_size
,
648 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
649 const uint8_t cmd_len
= 3;
650 const int row_size
= 128; /* 1024 bits */
651 int vs_alloc_size
, gs_alloc_size
;
652 int vs_num_entries
, gs_num_entries
;
654 ILO_GPE_VALID_GEN(dev
, 6, 6);
656 /* in 1024-bit URB rows */
657 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
658 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
660 /* the valid range is [1, 5] */
665 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
667 /* the valid range is [24, 256] in multiples of 4 */
668 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
669 if (vs_num_entries
> 256)
670 vs_num_entries
= 256;
671 assert(vs_num_entries
>= 24);
673 /* the valid range is [0, 256] in multiples of 4 */
674 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
675 if (gs_num_entries
> 256)
676 gs_num_entries
= 256;
678 ilo_cp_begin(cp
, cmd_len
);
679 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
680 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_VS_SIZE_SHIFT
|
681 vs_num_entries
<< GEN6_URB_VS_ENTRIES_SHIFT
);
682 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_GS_ENTRIES_SHIFT
|
683 (gs_alloc_size
- 1) << GEN6_URB_GS_SIZE_SHIFT
);
688 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
689 const struct ilo_ve_state
*ve
,
690 const struct ilo_vb_state
*vb
,
693 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
697 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
700 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
702 * "From 1 to 33 VBs can be specified..."
704 assert(ve
->vb_count
<= 33);
709 cmd_len
= 1 + 4 * ve
->vb_count
;
711 ilo_cp_begin(cp
, cmd_len
);
712 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
714 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
715 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
716 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
717 const struct pipe_vertex_buffer
*cso
= &vb
->states
[pipe_idx
];
720 dw
= hw_idx
<< GEN6_VB0_INDEX_SHIFT
;
722 if (instance_divisor
)
723 dw
|= GEN6_VB0_ACCESS_INSTANCEDATA
;
725 dw
|= GEN6_VB0_ACCESS_VERTEXDATA
;
727 if (dev
->gen
>= ILO_GEN(7))
728 dw
|= GEN7_VB0_ADDRESS_MODIFYENABLE
;
730 /* use null vb if there is no buffer or the stride is out of range */
731 if (cso
->buffer
&& cso
->stride
<= 2048) {
732 const struct ilo_buffer
*buf
= ilo_buffer(cso
->buffer
);
733 const uint32_t start_offset
= cso
->buffer_offset
;
735 * As noted in ilo_translate_format(), we treat some 3-component
736 * formats as 4-component formats to work around hardware
737 * limitations. Imagine the case where the vertex buffer holds a
738 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
739 * The hardware would not be able to fetch it because the vertex
740 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
741 * and that takes at least 8 bytes.
743 * For the workaround to work, we query the physical size, which is
744 * page aligned, to calculate end_offset so that the last vertex has
745 * a better chance to be fetched.
747 const uint32_t end_offset
= intel_bo_get_size(buf
->bo
) - 1;
749 dw
|= cso
->stride
<< BRW_VB0_PITCH_SHIFT
;
751 ilo_cp_write(cp
, dw
);
752 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
753 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
754 ilo_cp_write(cp
, instance_divisor
);
759 ilo_cp_write(cp
, dw
);
762 ilo_cp_write(cp
, instance_divisor
);
770 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
771 int comp0
, int comp1
, int comp2
, int comp3
,
772 struct ilo_ve_cso
*cso
)
774 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
776 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
777 cso
->payload
[0] = GEN6_VE0_VALID
;
779 comp0
<< BRW_VE1_COMPONENT_0_SHIFT
|
780 comp1
<< BRW_VE1_COMPONENT_1_SHIFT
|
781 comp2
<< BRW_VE1_COMPONENT_2_SHIFT
|
782 comp3
<< BRW_VE1_COMPONENT_3_SHIFT
;
786 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
787 struct ilo_ve_cso
*cso
)
791 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
794 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
796 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
797 * valid VERTEX_ELEMENT structure.
799 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
800 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
802 * - The Source Element Format must be set to the UINT format.
804 * - [DevSNB]: Edge Flags are not supported for QUADLIST
805 * primitives. Software may elect to convert QUADLIST primitives
806 * to some set of corresponding edge-flag-supported primitive
807 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
810 cso
->payload
[0] |= GEN6_VE0_EDGE_FLAG_ENABLE
;
812 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
813 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_1_SHIFT
|
814 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_2_SHIFT
|
815 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_3_SHIFT
;
818 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
819 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
820 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
822 * Since all the hardware cares about is whether the flags are zero or not,
823 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
825 format
= (cso
->payload
[0] >> BRW_VE0_FORMAT_SHIFT
) & 0x1ff;
826 if (format
== BRW_SURFACEFORMAT_R32_FLOAT
) {
827 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT
==
828 BRW_SURFACEFORMAT_R32_FLOAT
- 1);
830 cso
->payload
[0] -= (1 << BRW_VE0_FORMAT_SHIFT
);
833 assert(format
== BRW_SURFACEFORMAT_R8_UINT
);
838 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
839 const struct ilo_ve_state
*ve
,
840 bool last_velement_edgeflag
,
841 bool prepend_generated_ids
,
844 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
848 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
851 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
853 * "Up to 34 (DevSNB+) vertex elements are supported."
855 assert(ve
->count
+ prepend_generated_ids
<= 34);
857 if (!ve
->count
&& !prepend_generated_ids
) {
858 struct ilo_ve_cso dummy
;
860 ve_init_cso_with_components(dev
,
861 BRW_VE1_COMPONENT_STORE_0
,
862 BRW_VE1_COMPONENT_STORE_0
,
863 BRW_VE1_COMPONENT_STORE_0
,
864 BRW_VE1_COMPONENT_STORE_1_FLT
,
868 ilo_cp_begin(cp
, cmd_len
);
869 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
870 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
876 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
878 ilo_cp_begin(cp
, cmd_len
);
879 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
881 if (prepend_generated_ids
) {
882 struct ilo_ve_cso gen_ids
;
884 ve_init_cso_with_components(dev
,
885 BRW_VE1_COMPONENT_STORE_VID
,
886 BRW_VE1_COMPONENT_STORE_IID
,
887 BRW_VE1_COMPONENT_NOSTORE
,
888 BRW_VE1_COMPONENT_NOSTORE
,
891 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
894 if (last_velement_edgeflag
) {
895 struct ilo_ve_cso edgeflag
;
897 for (i
= 0; i
< ve
->count
- 1; i
++)
898 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
900 edgeflag
= ve
->cso
[i
];
901 ve_set_cso_edgeflag(dev
, &edgeflag
);
902 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
905 for (i
= 0; i
< ve
->count
; i
++)
906 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
913 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
914 const struct ilo_ib_state
*ib
,
915 bool enable_cut_index
,
918 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
919 const uint8_t cmd_len
= 3;
920 struct ilo_buffer
*buf
= ilo_buffer(ib
->hw_resource
);
921 uint32_t start_offset
, end_offset
;
924 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
929 /* this is moved to the new 3DSTATE_VF */
930 if (dev
->gen
>= ILO_GEN(7.5))
931 assert(!enable_cut_index
);
933 switch (ib
->hw_index_size
) {
935 format
= BRW_INDEX_DWORD
;
938 format
= BRW_INDEX_WORD
;
941 format
= BRW_INDEX_BYTE
;
944 assert(!"unknown index size");
945 format
= BRW_INDEX_BYTE
;
950 * set start_offset to 0 here and adjust pipe_draw_info::start with
951 * ib->draw_start_offset in 3DPRIMITIVE
954 end_offset
= buf
->bo_size
;
956 /* end_offset must also be aligned and is inclusive */
957 end_offset
-= (end_offset
% ib
->hw_index_size
);
960 ilo_cp_begin(cp
, cmd_len
);
961 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
962 ((enable_cut_index
) ? BRW_CUT_INDEX_ENABLE
: 0) |
964 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
965 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
970 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
971 uint32_t clip_viewport
,
972 uint32_t sf_viewport
,
973 uint32_t cc_viewport
,
976 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
977 const uint8_t cmd_len
= 4;
979 ILO_GPE_VALID_GEN(dev
, 6, 6);
981 ilo_cp_begin(cp
, cmd_len
);
982 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
983 GEN6_CLIP_VIEWPORT_MODIFY
|
984 GEN6_SF_VIEWPORT_MODIFY
|
985 GEN6_CC_VIEWPORT_MODIFY
);
986 ilo_cp_write(cp
, clip_viewport
);
987 ilo_cp_write(cp
, sf_viewport
);
988 ilo_cp_write(cp
, cc_viewport
);
993 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
994 uint32_t blend_state
,
995 uint32_t depth_stencil_state
,
996 uint32_t color_calc_state
,
999 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
1000 const uint8_t cmd_len
= 4;
1002 ILO_GPE_VALID_GEN(dev
, 6, 6);
1004 ilo_cp_begin(cp
, cmd_len
);
1005 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1006 ilo_cp_write(cp
, blend_state
| 1);
1007 ilo_cp_write(cp
, depth_stencil_state
| 1);
1008 ilo_cp_write(cp
, color_calc_state
| 1);
1013 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1014 uint32_t scissor_rect
,
1017 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1018 const uint8_t cmd_len
= 2;
1020 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1022 ilo_cp_begin(cp
, cmd_len
);
1023 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1024 ilo_cp_write(cp
, scissor_rect
);
1029 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1030 const struct ilo_shader_state
*vs
,
1034 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1035 const uint8_t cmd_len
= 6;
1036 const struct ilo_shader_cso
*cso
;
1037 uint32_t dw2
, dw4
, dw5
;
1039 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1042 ilo_cp_begin(cp
, cmd_len
);
1043 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1044 ilo_cp_write(cp
, 0);
1045 ilo_cp_write(cp
, 0);
1046 ilo_cp_write(cp
, 0);
1047 ilo_cp_write(cp
, 0);
1048 ilo_cp_write(cp
, 0);
1053 cso
= ilo_shader_get_kernel_cso(vs
);
1054 dw2
= cso
->payload
[0];
1055 dw4
= cso
->payload
[1];
1056 dw5
= cso
->payload
[2];
1058 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT
;
1060 ilo_cp_begin(cp
, cmd_len
);
1061 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1062 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1063 ilo_cp_write(cp
, dw2
);
1064 ilo_cp_write(cp
, 0); /* scratch */
1065 ilo_cp_write(cp
, dw4
);
1066 ilo_cp_write(cp
, dw5
);
1071 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1072 const struct ilo_shader_state
*gs
,
1073 const struct ilo_shader_state
*vs
,
1077 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1078 const uint8_t cmd_len
= 7;
1079 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1081 ILO_GPE_VALID_GEN(dev
, 6, 6);
1084 const struct ilo_shader_cso
*cso
;
1086 dw1
= ilo_shader_get_kernel_offset(gs
);
1088 cso
= ilo_shader_get_kernel_cso(gs
);
1089 dw2
= cso
->payload
[0];
1090 dw4
= cso
->payload
[1];
1091 dw5
= cso
->payload
[2];
1092 dw6
= cso
->payload
[3];
1094 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1095 struct ilo_shader_cso cso
;
1096 enum ilo_kernel_param param
;
1098 switch (verts_per_prim
) {
1100 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1103 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1106 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1110 dw1
= ilo_shader_get_kernel_offset(vs
) +
1111 ilo_shader_get_kernel_param(vs
, param
);
1113 /* cannot use VS's CSO */
1114 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1115 dw2
= cso
.payload
[0];
1116 dw4
= cso
.payload
[1];
1117 dw5
= cso
.payload
[2];
1118 dw6
= cso
.payload
[3];
1123 dw4
= 1 << GEN6_GS_URB_READ_LENGTH_SHIFT
;
1124 dw5
= GEN6_GS_STATISTICS_ENABLE
;
1128 ilo_cp_begin(cp
, cmd_len
);
1129 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1130 ilo_cp_write(cp
, dw1
);
1131 ilo_cp_write(cp
, dw2
);
1132 ilo_cp_write(cp
, 0);
1133 ilo_cp_write(cp
, dw4
);
1134 ilo_cp_write(cp
, dw5
);
1135 ilo_cp_write(cp
, dw6
);
1140 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1141 const struct ilo_rasterizer_state
*rasterizer
,
1142 const struct ilo_shader_state
*fs
,
1143 bool enable_guardband
,
1147 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1148 const uint8_t cmd_len
= 4;
1149 uint32_t dw1
, dw2
, dw3
;
1151 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1156 dw1
= rasterizer
->clip
.payload
[0];
1157 dw2
= rasterizer
->clip
.payload
[1];
1158 dw3
= rasterizer
->clip
.payload
[2];
1160 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1161 dw2
|= GEN6_CLIP_GB_TEST
;
1163 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1164 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1166 if (interps
& (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
|
1167 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
|
1168 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC
))
1169 dw2
|= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE
;
1171 dw3
|= GEN6_CLIP_FORCE_ZERO_RTAINDEX
|
1172 (num_viewports
- 1);
1180 ilo_cp_begin(cp
, cmd_len
);
1181 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1182 ilo_cp_write(cp
, dw1
);
1183 ilo_cp_write(cp
, dw2
);
1184 ilo_cp_write(cp
, dw3
);
1189 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1190 const struct ilo_rasterizer_state
*rasterizer
,
1191 const struct ilo_shader_state
*fs
,
1194 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1195 const uint8_t cmd_len
= 20;
1196 uint32_t payload_raster
[6], payload_sbe
[13];
1198 ILO_GPE_VALID_GEN(dev
, 6, 6);
1200 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1201 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1202 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1203 fs
, payload_sbe
, Elements(payload_sbe
));
1205 ilo_cp_begin(cp
, cmd_len
);
1206 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1207 ilo_cp_write(cp
, payload_sbe
[0]);
1208 ilo_cp_write_multi(cp
, payload_raster
, 6);
1209 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1214 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1215 const struct ilo_shader_state
*fs
,
1217 const struct ilo_rasterizer_state
*rasterizer
,
1218 bool dual_blend
, bool cc_may_kill
,
1222 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
1223 const uint8_t cmd_len
= 9;
1224 const int num_samples
= 1;
1225 const struct ilo_shader_cso
*fs_cso
;
1226 uint32_t dw2
, dw4
, dw5
, dw6
;
1228 ILO_GPE_VALID_GEN(dev
, 6, 6);
1231 /* see brwCreateContext() */
1232 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
1234 ilo_cp_begin(cp
, cmd_len
);
1235 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1236 ilo_cp_write(cp
, 0);
1237 ilo_cp_write(cp
, 0);
1238 ilo_cp_write(cp
, 0);
1239 ilo_cp_write(cp
, hiz_op
);
1240 /* honor the valid range even if dispatching is disabled */
1241 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
);
1242 ilo_cp_write(cp
, 0);
1243 ilo_cp_write(cp
, 0);
1244 ilo_cp_write(cp
, 0);
1250 fs_cso
= ilo_shader_get_kernel_cso(fs
);
1251 dw2
= fs_cso
->payload
[0];
1252 dw4
= fs_cso
->payload
[1];
1253 dw5
= fs_cso
->payload
[2];
1254 dw6
= fs_cso
->payload
[3];
1256 dw2
|= (num_samplers
+ 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT
;
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1263 * Enable or Depth Buffer Resolve Enable."
1266 dw4
|= GEN6_WM_STATISTICS_ENABLE
;
1269 dw5
|= GEN6_WM_KILL_ENABLE
|
1270 GEN6_WM_DISPATCH_ENABLE
;
1274 dw5
|= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE
;
1276 dw5
|= rasterizer
->wm
.payload
[0];
1278 dw6
|= rasterizer
->wm
.payload
[1];
1280 if (num_samples
> 1) {
1281 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
1282 rasterizer
->wm
.dw_msaa_disp
;
1285 ilo_cp_begin(cp
, cmd_len
);
1286 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1287 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
1288 ilo_cp_write(cp
, dw2
);
1289 ilo_cp_write(cp
, 0); /* scratch */
1290 ilo_cp_write(cp
, dw4
);
1291 ilo_cp_write(cp
, dw5
);
1292 ilo_cp_write(cp
, dw6
);
1293 ilo_cp_write(cp
, 0); /* kernel 1 */
1294 ilo_cp_write(cp
, 0); /* kernel 2 */
1298 static inline unsigned
1299 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
1300 const uint32_t *bufs
, const int *sizes
,
1301 int num_bufs
, int max_read_length
,
1302 uint32_t *dw
, int num_dwords
)
1304 unsigned enabled
= 0x0;
1305 int total_read_length
, i
;
1307 assert(num_dwords
== 4);
1309 total_read_length
= 0;
1310 for (i
= 0; i
< 4; i
++) {
1311 if (i
< num_bufs
&& sizes
[i
]) {
1312 /* in 256-bit units minus one */
1313 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
1315 assert(bufs
[i
] % 32 == 0);
1316 assert(read_len
< 32);
1319 dw
[i
] = bufs
[i
] | read_len
;
1321 total_read_length
+= read_len
+ 1;
1328 assert(total_read_length
<= max_read_length
);
1334 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
1335 const uint32_t *bufs
, const int *sizes
,
1339 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
1340 const uint8_t cmd_len
= 5;
1341 uint32_t buf_dw
[4], buf_enabled
;
1343 ILO_GPE_VALID_GEN(dev
, 6, 6);
1344 assert(num_bufs
<= 4);
1347 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1349 * "The sum of all four read length fields (each incremented to
1350 * represent the actual read length) must be less than or equal to 32"
1352 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1353 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
1355 ilo_cp_begin(cp
, cmd_len
);
1356 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1357 ilo_cp_write(cp
, buf_dw
[0]);
1358 ilo_cp_write(cp
, buf_dw
[1]);
1359 ilo_cp_write(cp
, buf_dw
[2]);
1360 ilo_cp_write(cp
, buf_dw
[3]);
1365 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
1366 const uint32_t *bufs
, const int *sizes
,
1370 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
1371 const uint8_t cmd_len
= 5;
1372 uint32_t buf_dw
[4], buf_enabled
;
1374 ILO_GPE_VALID_GEN(dev
, 6, 6);
1375 assert(num_bufs
<= 4);
1378 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1380 * "The sum of all four read length fields (each incremented to
1381 * represent the actual read length) must be less than or equal to 64"
1383 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1384 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1386 ilo_cp_begin(cp
, cmd_len
);
1387 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1388 ilo_cp_write(cp
, buf_dw
[0]);
1389 ilo_cp_write(cp
, buf_dw
[1]);
1390 ilo_cp_write(cp
, buf_dw
[2]);
1391 ilo_cp_write(cp
, buf_dw
[3]);
1396 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
1397 const uint32_t *bufs
, const int *sizes
,
1401 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
1402 const uint8_t cmd_len
= 5;
1403 uint32_t buf_dw
[4], buf_enabled
;
1405 ILO_GPE_VALID_GEN(dev
, 6, 6);
1406 assert(num_bufs
<= 4);
1409 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1411 * "The sum of all four read length fields (each incremented to
1412 * represent the actual read length) must be less than or equal to 64"
1414 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1415 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1417 ilo_cp_begin(cp
, cmd_len
);
1418 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1419 ilo_cp_write(cp
, buf_dw
[0]);
1420 ilo_cp_write(cp
, buf_dw
[1]);
1421 ilo_cp_write(cp
, buf_dw
[2]);
1422 ilo_cp_write(cp
, buf_dw
[3]);
1427 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
1428 unsigned sample_mask
,
1431 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
1432 const uint8_t cmd_len
= 2;
1433 const unsigned valid_mask
= 0xf;
1435 ILO_GPE_VALID_GEN(dev
, 6, 6);
1437 sample_mask
&= valid_mask
;
1439 ilo_cp_begin(cp
, cmd_len
);
1440 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1441 ilo_cp_write(cp
, sample_mask
);
1446 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
1447 unsigned x
, unsigned y
,
1448 unsigned width
, unsigned height
,
1451 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
1452 const uint8_t cmd_len
= 4;
1453 unsigned xmax
= x
+ width
- 1;
1454 unsigned ymax
= y
+ height
- 1;
1457 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1459 if (dev
->gen
>= ILO_GEN(7)) {
1464 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1466 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1467 * must be an even number"
1474 if (x
> rect_limit
) x
= rect_limit
;
1475 if (y
> rect_limit
) y
= rect_limit
;
1476 if (xmax
> rect_limit
) xmax
= rect_limit
;
1477 if (ymax
> rect_limit
) ymax
= rect_limit
;
1479 ilo_cp_begin(cp
, cmd_len
);
1480 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1481 ilo_cp_write(cp
, y
<< 16 | x
);
1482 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
1485 * There is no need to set the origin. It is intended to support front
1488 ilo_cp_write(cp
, 0);
1494 zs_align_surface(const struct ilo_dev_info
*dev
,
1495 unsigned align_w
, unsigned align_h
,
1496 struct ilo_zs_surface
*zs
)
1498 unsigned mask
, shift_w
, shift_h
;
1499 unsigned width
, height
;
1502 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1504 if (dev
->gen
>= ILO_GEN(7)) {
1515 dw3
= zs
->payload
[2];
1517 /* aligned width and height */
1518 width
= align(((dw3
>> shift_w
) & mask
) + 1, align_w
);
1519 height
= align(((dw3
>> shift_h
) & mask
) + 1, align_h
);
1521 dw3
= (dw3
& ~((mask
<< shift_w
) | (mask
<< shift_h
))) |
1522 (width
- 1) << shift_w
|
1523 (height
- 1) << shift_h
;
1525 zs
->payload
[2] = dw3
;
1529 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1530 const struct ilo_zs_surface
*zs
,
1533 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1534 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1535 const uint8_t cmd_len
= 7;
1537 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1539 ilo_cp_begin(cp
, cmd_len
);
1540 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1541 ilo_cp_write(cp
, zs
->payload
[0]);
1542 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
,
1543 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1544 ilo_cp_write(cp
, zs
->payload
[2]);
1545 ilo_cp_write(cp
, zs
->payload
[3]);
1546 ilo_cp_write(cp
, zs
->payload
[4]);
1547 ilo_cp_write(cp
, zs
->payload
[5]);
1552 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
1553 int x_offset
, int y_offset
,
1556 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
1557 const uint8_t cmd_len
= 2;
1559 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1560 assert(x_offset
>= 0 && x_offset
<= 31);
1561 assert(y_offset
>= 0 && y_offset
<= 31);
1563 ilo_cp_begin(cp
, cmd_len
);
1564 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1565 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
1570 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
1571 const struct pipe_poly_stipple
*pattern
,
1574 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
1575 const uint8_t cmd_len
= 33;
1578 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1579 assert(Elements(pattern
->stipple
) == 32);
1581 ilo_cp_begin(cp
, cmd_len
);
1582 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1583 for (i
= 0; i
< 32; i
++)
1584 ilo_cp_write(cp
, pattern
->stipple
[i
]);
1589 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
1590 unsigned pattern
, unsigned factor
,
1593 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
1594 const uint8_t cmd_len
= 3;
1597 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1598 assert((pattern
& 0xffff) == pattern
);
1599 assert(factor
>= 1 && factor
<= 256);
1601 ilo_cp_begin(cp
, cmd_len
);
1602 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1603 ilo_cp_write(cp
, pattern
);
1605 if (dev
->gen
>= ILO_GEN(7)) {
1607 inverse
= (unsigned) (65536.0f
/ factor
);
1608 ilo_cp_write(cp
, inverse
<< 15 | factor
);
1612 inverse
= (unsigned) (8192.0f
/ factor
);
1613 ilo_cp_write(cp
, inverse
<< 16 | factor
);
1620 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
1623 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
1624 const uint8_t cmd_len
= 3;
1626 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1628 ilo_cp_begin(cp
, cmd_len
);
1629 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1630 ilo_cp_write(cp
, 0 << 16 | 0);
1631 ilo_cp_write(cp
, 0 << 16 | 0);
1636 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
1637 int index
, unsigned svbi
,
1639 bool load_vertex_count
,
1642 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
1643 const uint8_t cmd_len
= 4;
1646 ILO_GPE_VALID_GEN(dev
, 6, 6);
1647 assert(index
>= 0 && index
< 4);
1649 dw1
= index
<< SVB_INDEX_SHIFT
;
1650 if (load_vertex_count
)
1651 dw1
|= SVB_LOAD_INTERNAL_VERTEX_COUNT
;
1653 ilo_cp_begin(cp
, cmd_len
);
1654 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1655 ilo_cp_write(cp
, dw1
);
1656 ilo_cp_write(cp
, svbi
);
1657 ilo_cp_write(cp
, max_svbi
);
1662 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
1664 const uint32_t *packed_sample_pos
,
1665 bool pixel_location_center
,
1668 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
1669 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
1670 uint32_t dw1
, dw2
, dw3
;
1672 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1674 dw1
= (pixel_location_center
) ?
1675 MS_PIXEL_LOCATION_CENTER
: MS_PIXEL_LOCATION_UPPER_LEFT
;
1677 switch (num_samples
) {
1680 dw1
|= MS_NUMSAMPLES_1
;
1685 dw1
|= MS_NUMSAMPLES_4
;
1686 dw2
= packed_sample_pos
[0];
1690 assert(dev
->gen
>= ILO_GEN(7));
1691 dw1
|= MS_NUMSAMPLES_8
;
1692 dw2
= packed_sample_pos
[0];
1693 dw3
= packed_sample_pos
[1];
1696 assert(!"unsupported sample count");
1697 dw1
|= MS_NUMSAMPLES_1
;
1703 ilo_cp_begin(cp
, cmd_len
);
1704 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1705 ilo_cp_write(cp
, dw1
);
1706 ilo_cp_write(cp
, dw2
);
1707 if (dev
->gen
>= ILO_GEN(7))
1708 ilo_cp_write(cp
, dw3
);
1713 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
1714 const struct ilo_zs_surface
*zs
,
1717 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1718 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1719 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1720 const uint8_t cmd_len
= 3;
1722 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1724 ilo_cp_begin(cp
, cmd_len
);
1725 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1726 /* see ilo_gpe_init_zs_surface() */
1727 ilo_cp_write(cp
, zs
->payload
[6]);
1728 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
,
1729 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1734 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1735 const struct ilo_zs_surface
*zs
,
1738 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1739 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1740 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1741 const uint8_t cmd_len
= 3;
1743 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1745 ilo_cp_begin(cp
, cmd_len
);
1746 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1747 /* see ilo_gpe_init_zs_surface() */
1748 ilo_cp_write(cp
, zs
->payload
[8]);
1749 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
,
1750 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1755 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
1759 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
1760 const uint8_t cmd_len
= 2;
1762 ILO_GPE_VALID_GEN(dev
, 6, 6);
1764 ilo_cp_begin(cp
, cmd_len
);
1765 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1766 GEN5_DEPTH_CLEAR_VALID
);
1767 ilo_cp_write(cp
, clear_val
);
1772 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
1774 struct intel_bo
*bo
, uint32_t bo_offset
,
1778 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
1779 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
1780 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
1781 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
1783 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1785 if (dw1
& PIPE_CONTROL_CS_STALL
) {
1787 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1789 * "1 of the following must also be set (when CS stall is set):
1791 * * Depth Cache Flush Enable ([0] of DW1)
1792 * * Stall at Pixel Scoreboard ([1] of DW1)
1793 * * Depth Stall ([13] of DW1)
1794 * * Post-Sync Operation ([13] of DW1)
1795 * * Render Target Cache Flush Enable ([12] of DW1)
1796 * * Notify Enable ([8] of DW1)"
1798 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1800 * "One of the following must also be set (when CS stall is set):
1802 * * Render Target Cache Flush Enable ([12] of DW1)
1803 * * Depth Cache Flush Enable ([0] of DW1)
1804 * * Stall at Pixel Scoreboard ([1] of DW1)
1805 * * Depth Stall ([13] of DW1)
1806 * * Post-Sync Operation ([13] of DW1)"
1808 uint32_t bit_test
= PIPE_CONTROL_WRITE_FLUSH
|
1809 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1810 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
1811 PIPE_CONTROL_DEPTH_STALL
;
1814 bit_test
|= PIPE_CONTROL_WRITE_IMMEDIATE
|
1815 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
1816 PIPE_CONTROL_WRITE_TIMESTAMP
;
1818 if (dev
->gen
== ILO_GEN(6))
1819 bit_test
|= PIPE_CONTROL_INTERRUPT_ENABLE
;
1821 assert(dw1
& bit_test
);
1824 if (dw1
& PIPE_CONTROL_DEPTH_STALL
) {
1826 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1828 * "Following bits must be clear (when Depth Stall is set):
1830 * * Render Target Cache Flush Enable ([12] of DW1)
1831 * * Depth Cache Flush Enable ([0] of DW1)"
1833 assert(!(dw1
& (PIPE_CONTROL_WRITE_FLUSH
|
1834 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
1837 ilo_cp_begin(cp
, cmd_len
);
1838 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1839 ilo_cp_write(cp
, dw1
);
1840 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
1841 ilo_cp_write(cp
, 0);
1843 ilo_cp_write(cp
, 0);
1848 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1849 const struct pipe_draw_info
*info
,
1850 const struct ilo_ib_state
*ib
,
1854 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1855 const uint8_t cmd_len
= 6;
1856 const int prim
= (rectlist
) ?
1857 _3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1858 const int vb_access
= (info
->indexed
) ?
1859 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM
:
1860 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
;
1861 const uint32_t vb_start
= info
->start
+
1862 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1864 ILO_GPE_VALID_GEN(dev
, 6, 6);
1866 ilo_cp_begin(cp
, cmd_len
);
1867 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1868 prim
<< GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT
|
1870 ilo_cp_write(cp
, info
->count
);
1871 ilo_cp_write(cp
, vb_start
);
1872 ilo_cp_write(cp
, info
->instance_count
);
1873 ilo_cp_write(cp
, info
->start_instance
);
1874 ilo_cp_write(cp
, info
->index_bias
);
1878 static inline uint32_t
1879 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
1880 const struct ilo_shader_state
**cs
,
1881 uint32_t *sampler_state
,
1883 uint32_t *binding_table_state
,
1889 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1891 * "(Interface Descriptor Total Length) This field must have the same
1892 * alignment as the Interface Descriptor Data Start Address.
1894 * It must be DQWord (32-byte) aligned..."
1896 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1898 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1899 * aligned address of the Interface Descriptor data."
1901 const int state_align
= 32 / 4;
1902 const int state_len
= (32 / 4) * num_ids
;
1903 uint32_t state_offset
, *dw
;
1906 ILO_GPE_VALID_GEN(dev
, 6, 6);
1908 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
1909 state_len
, state_align
, &state_offset
);
1911 for (i
= 0; i
< num_ids
; i
++) {
1912 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
1913 dw
[1] = 1 << 18; /* SPF */
1914 dw
[2] = sampler_state
[i
] |
1915 (num_samplers
[i
] + 3) / 4 << 2;
1916 dw
[3] = binding_table_state
[i
] |
1918 dw
[4] = 0 << 16 | /* CURBE Read Length */
1919 0; /* CURBE Read Offset */
1920 dw
[5] = 0; /* Barrier ID */
1927 return state_offset
;
1930 static inline uint32_t
1931 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
1932 const struct ilo_viewport_cso
*viewports
,
1933 unsigned num_viewports
,
1936 const int state_align
= 32 / 4;
1937 const int state_len
= 8 * num_viewports
;
1938 uint32_t state_offset
, *dw
;
1941 ILO_GPE_VALID_GEN(dev
, 6, 6);
1944 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1946 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1947 * stored as an array of up to 16 elements..."
1949 assert(num_viewports
&& num_viewports
<= 16);
1951 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
1952 state_len
, state_align
, &state_offset
);
1954 for (i
= 0; i
< num_viewports
; i
++) {
1955 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1957 dw
[0] = fui(vp
->m00
);
1958 dw
[1] = fui(vp
->m11
);
1959 dw
[2] = fui(vp
->m22
);
1960 dw
[3] = fui(vp
->m30
);
1961 dw
[4] = fui(vp
->m31
);
1962 dw
[5] = fui(vp
->m32
);
1969 return state_offset
;
1972 static inline uint32_t
1973 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1974 const struct ilo_viewport_cso
*viewports
,
1975 unsigned num_viewports
,
1978 const int state_align
= 32 / 4;
1979 const int state_len
= 4 * num_viewports
;
1980 uint32_t state_offset
, *dw
;
1983 ILO_GPE_VALID_GEN(dev
, 6, 6);
1986 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1988 * "The viewport-related state is stored as an array of up to 16
1991 assert(num_viewports
&& num_viewports
<= 16);
1993 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
1994 state_len
, state_align
, &state_offset
);
1996 for (i
= 0; i
< num_viewports
; i
++) {
1997 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1999 dw
[0] = fui(vp
->min_gbx
);
2000 dw
[1] = fui(vp
->max_gbx
);
2001 dw
[2] = fui(vp
->min_gby
);
2002 dw
[3] = fui(vp
->max_gby
);
2007 return state_offset
;
2010 static inline uint32_t
2011 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
2012 const struct ilo_viewport_cso
*viewports
,
2013 unsigned num_viewports
,
2016 const int state_align
= 32 / 4;
2017 const int state_len
= 2 * num_viewports
;
2018 uint32_t state_offset
, *dw
;
2021 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2024 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2026 * "The viewport state is stored as an array of up to 16 elements..."
2028 assert(num_viewports
&& num_viewports
<= 16);
2030 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
2031 state_len
, state_align
, &state_offset
);
2033 for (i
= 0; i
< num_viewports
; i
++) {
2034 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2036 dw
[0] = fui(vp
->min_z
);
2037 dw
[1] = fui(vp
->max_z
);
2042 return state_offset
;
2045 static inline uint32_t
2046 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
2047 const struct pipe_stencil_ref
*stencil_ref
,
2049 const struct pipe_blend_color
*blend_color
,
2052 const int state_align
= 64 / 4;
2053 const int state_len
= 6;
2054 uint32_t state_offset
, *dw
;
2056 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2058 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
2059 state_len
, state_align
, &state_offset
);
2061 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
2062 stencil_ref
->ref_value
[1] << 16 |
2063 BRW_ALPHATEST_FORMAT_UNORM8
;
2065 dw
[2] = fui(blend_color
->color
[0]);
2066 dw
[3] = fui(blend_color
->color
[1]);
2067 dw
[4] = fui(blend_color
->color
[2]);
2068 dw
[5] = fui(blend_color
->color
[3]);
2070 return state_offset
;
2073 static inline uint32_t
2074 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
2075 const struct ilo_blend_state
*blend
,
2076 const struct ilo_fb_state
*fb
,
2077 const struct ilo_dsa_state
*dsa
,
2080 const int state_align
= 64 / 4;
2082 uint32_t state_offset
, *dw
;
2083 unsigned num_targets
, i
;
2085 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2088 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2090 * "The blend state is stored as an array of up to 8 elements..."
2092 num_targets
= fb
->state
.nr_cbufs
;
2093 assert(num_targets
<= 8);
2098 /* to be able to reference alpha func */
2102 state_len
= 2 * num_targets
;
2104 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
2105 state_len
, state_align
, &state_offset
);
2107 for (i
= 0; i
< num_targets
; i
++) {
2108 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
2109 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
2110 const int num_samples
= fb
->num_samples
;
2111 const struct util_format_description
*format_desc
=
2112 (idx
< fb
->state
.nr_cbufs
&& fb
->state
.cbufs
[idx
]) ?
2113 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
2114 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
2117 rt_is_pure_integer
= false;
2118 rt_dst_alpha_forced_one
= false;
2123 switch (format_desc
->format
) {
2124 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2125 /* force alpha to one when the HW format has alpha */
2126 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
2127 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM
);
2128 rt_dst_alpha_forced_one
= true;
2134 for (ch
= 0; ch
< 4; ch
++) {
2135 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
2138 if (format_desc
->channel
[ch
].pure_integer
) {
2139 rt_is_unorm
= false;
2140 rt_is_pure_integer
= true;
2144 if (!format_desc
->channel
[ch
].normalized
||
2145 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
2146 rt_is_unorm
= false;
2150 dw
[0] = cso
->payload
[0];
2151 dw
[1] = cso
->payload
[1];
2153 if (!rt_is_pure_integer
) {
2154 if (rt_dst_alpha_forced_one
)
2155 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
2157 dw
[0] |= cso
->dw_blend
;
2161 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2163 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2164 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2166 * Since logicop is ignored for non-UNORM color buffers, no special care
2170 dw
[1] |= cso
->dw_logicop
;
2173 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2175 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2176 * Dither both must be disabled."
2178 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2179 * requires that anyway.
2181 if (num_samples
> 1)
2182 dw
[1] |= cso
->dw_alpha_mod
;
2185 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2187 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2190 if (!rt_is_pure_integer
)
2191 dw
[1] |= dsa
->dw_alpha
;
2196 return state_offset
;
2199 static inline uint32_t
2200 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
2201 const struct ilo_dsa_state
*dsa
,
2204 const int state_align
= 64 / 4;
2205 const int state_len
= 3;
2206 uint32_t state_offset
, *dw
;
2209 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2211 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
2212 state_len
, state_align
, &state_offset
);
2214 dw
[0] = dsa
->payload
[0];
2215 dw
[1] = dsa
->payload
[1];
2216 dw
[2] = dsa
->payload
[2];
2218 return state_offset
;
2221 static inline uint32_t
2222 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
2223 const struct ilo_scissor_state
*scissor
,
2224 unsigned num_viewports
,
2227 const int state_align
= 32 / 4;
2228 const int state_len
= 2 * num_viewports
;
2229 uint32_t state_offset
, *dw
;
2231 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2234 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2236 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2237 * stored as an array of up to 16 elements..."
2239 assert(num_viewports
&& num_viewports
<= 16);
2241 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
2242 state_len
, state_align
, &state_offset
);
2244 memcpy(dw
, scissor
->payload
, state_len
* 4);
2246 return state_offset
;
2249 static inline uint32_t
2250 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
2251 uint32_t *surface_states
,
2252 int num_surface_states
,
2255 const int state_align
= 32 / 4;
2256 const int state_len
= num_surface_states
;
2257 uint32_t state_offset
, *dw
;
2259 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2262 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2264 * "It is stored as an array of up to 256 elements..."
2266 assert(num_surface_states
<= 256);
2268 if (!num_surface_states
)
2271 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
2272 state_len
, state_align
, &state_offset
);
2273 memcpy(dw
, surface_states
,
2274 num_surface_states
* sizeof(surface_states
[0]));
2276 return state_offset
;
2279 static inline uint32_t
2280 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2281 const struct ilo_view_surface
*surf
,
2285 const int state_align
= 32 / 4;
2286 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
2287 uint32_t state_offset
;
2288 uint32_t read_domains
, write_domain
;
2290 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2293 read_domains
= INTEL_DOMAIN_RENDER
;
2294 write_domain
= INTEL_DOMAIN_RENDER
;
2297 read_domains
= INTEL_DOMAIN_SAMPLER
;
2301 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
2303 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
2305 ilo_cp_write(cp
, surf
->payload
[0]);
2306 ilo_cp_write_bo(cp
, surf
->payload
[1],
2307 surf
->bo
, read_domains
, write_domain
);
2308 ilo_cp_write(cp
, surf
->payload
[2]);
2309 ilo_cp_write(cp
, surf
->payload
[3]);
2310 ilo_cp_write(cp
, surf
->payload
[4]);
2311 ilo_cp_write(cp
, surf
->payload
[5]);
2313 if (dev
->gen
>= ILO_GEN(7)) {
2314 ilo_cp_write(cp
, surf
->payload
[6]);
2315 ilo_cp_write(cp
, surf
->payload
[7]);
2320 return state_offset
;
2323 static inline uint32_t
2324 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2325 const struct pipe_stream_output_target
*so
,
2326 const struct pipe_stream_output_info
*so_info
,
2330 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
2331 unsigned bo_offset
, struct_size
;
2332 enum pipe_format elem_format
;
2333 struct ilo_view_surface surf
;
2335 ILO_GPE_VALID_GEN(dev
, 6, 6);
2337 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
2338 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
2340 switch (so_info
->output
[so_index
].num_components
) {
2342 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2345 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
2348 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
2351 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
2354 assert(!"unexpected SO components length");
2355 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2359 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
2360 struct_size
, elem_format
, false, true, &surf
);
2362 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
2365 static inline uint32_t
2366 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
2367 const struct ilo_sampler_cso
* const *samplers
,
2368 const struct pipe_sampler_view
* const *views
,
2369 const uint32_t *sampler_border_colors
,
2373 const int state_align
= 32 / 4;
2374 const int state_len
= 4 * num_samplers
;
2375 uint32_t state_offset
, *dw
;
2378 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2381 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2383 * "The sampler state is stored as an array of up to 16 elements..."
2385 assert(num_samplers
<= 16);
2390 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
2391 state_len
, state_align
, &state_offset
);
2393 for (i
= 0; i
< num_samplers
; i
++) {
2394 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
2395 const struct pipe_sampler_view
*view
= views
[i
];
2396 const uint32_t border_color
= sampler_border_colors
[i
];
2397 uint32_t dw_filter
, dw_wrap
;
2399 /* there may be holes */
2400 if (!sampler
|| !view
) {
2401 /* disabled sampler */
2411 /* determine filter and wrap modes */
2412 switch (view
->texture
->target
) {
2413 case PIPE_TEXTURE_1D
:
2414 dw_filter
= (sampler
->anisotropic
) ?
2415 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2416 dw_wrap
= sampler
->dw_wrap_1d
;
2418 case PIPE_TEXTURE_3D
:
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2422 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2423 * surfaces of type SURFTYPE_3D."
2425 dw_filter
= sampler
->dw_filter
;
2426 dw_wrap
= sampler
->dw_wrap
;
2428 case PIPE_TEXTURE_CUBE
:
2429 dw_filter
= (sampler
->anisotropic
) ?
2430 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2431 dw_wrap
= sampler
->dw_wrap_cube
;
2434 dw_filter
= (sampler
->anisotropic
) ?
2435 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2436 dw_wrap
= sampler
->dw_wrap
;
2440 dw
[0] = sampler
->payload
[0];
2441 dw
[1] = sampler
->payload
[1];
2442 assert(!(border_color
& 0x1f));
2443 dw
[2] = border_color
;
2444 dw
[3] = sampler
->payload
[2];
2448 if (dev
->gen
>= ILO_GEN(7)) {
2453 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2455 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2456 * where the min and mag mode filters are different and
2457 * SurfMinLOD is nonzero. The determination of MagMode uses the
2458 * following equation instead of the one in the above
2459 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2461 * As a way to work around that, we set Base to
2462 * view->u.tex.first_level.
2464 dw
[0] |= view
->u
.tex
.first_level
<< 22;
2472 return state_offset
;
2475 static inline uint32_t
2476 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
2477 const struct ilo_sampler_cso
*sampler
,
2480 const int state_align
= 32 / 4;
2481 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
2482 uint32_t state_offset
, *dw
;
2484 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2486 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
2487 state_len
, state_align
, &state_offset
);
2489 /* see ilo_gpe_init_sampler_cso() */
2490 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
2492 return state_offset
;
2495 static inline uint32_t
2496 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
2497 int size
, void **pcb
,
2501 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2502 * to 32 bytes, and their sizes are specified in 256-bit units.
2504 const int state_align
= 32 / 4;
2505 const int state_len
= align(size
, 32) / 4;
2506 uint32_t state_offset
;
2509 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2511 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
2512 state_len
, state_align
, &state_offset
);
2514 /* zero out the unused range */
2515 if (size
< state_len
* 4)
2516 memset(&buf
[size
], 0, state_len
* 4 - size
);
2521 return state_offset
;
2524 #endif /* ILO_GPE_GEN6_H */