2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
35 #include "ilo_common.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
49 * Commands that GEN6 GPE could emit.
51 enum ilo_gpe_gen6_command
{
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS
, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP
, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT
, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE
, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD
, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH
, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB
, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS
, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS
, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP
, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF
, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM
, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL
, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE
, /* (0x3, 0x3, 0x00) */
94 ILO_GPE_GEN6_COMMAND_COUNT
,
98 * Indirect states that GEN6 GPE could emit.
100 enum ilo_gpe_gen6_state
{
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
,
102 ILO_GPE_GEN6_SF_VIEWPORT
,
103 ILO_GPE_GEN6_CLIP_VIEWPORT
,
104 ILO_GPE_GEN6_CC_VIEWPORT
,
105 ILO_GPE_GEN6_COLOR_CALC_STATE
,
106 ILO_GPE_GEN6_BLEND_STATE
,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE
,
108 ILO_GPE_GEN6_SCISSOR_RECT
,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE
,
110 ILO_GPE_GEN6_SURFACE_STATE
,
111 ILO_GPE_GEN6_SAMPLER_STATE
,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
,
115 ILO_GPE_GEN6_STATE_COUNT
,
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
120 enum ilo_gpe_gen6_command cmd
,
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
125 enum ilo_gpe_gen6_state state
,
129 * Translate winsys tiling to hardware tiling.
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
135 case INTEL_TILING_NONE
:
138 return BRW_SURFACE_TILED
;
140 return BRW_SURFACE_TILED
| BRW_SURFACE_TILED_Y
;
142 assert(!"unknown tiling");
148 * Translate a pipe primitive type to the matching hardware primitive type.
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
153 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
154 [PIPE_PRIM_POINTS
] = _3DPRIM_POINTLIST
,
155 [PIPE_PRIM_LINES
] = _3DPRIM_LINELIST
,
156 [PIPE_PRIM_LINE_LOOP
] = _3DPRIM_LINELOOP
,
157 [PIPE_PRIM_LINE_STRIP
] = _3DPRIM_LINESTRIP
,
158 [PIPE_PRIM_TRIANGLES
] = _3DPRIM_TRILIST
,
159 [PIPE_PRIM_TRIANGLE_STRIP
] = _3DPRIM_TRISTRIP
,
160 [PIPE_PRIM_TRIANGLE_FAN
] = _3DPRIM_TRIFAN
,
161 [PIPE_PRIM_QUADS
] = _3DPRIM_QUADLIST
,
162 [PIPE_PRIM_QUAD_STRIP
] = _3DPRIM_QUADSTRIP
,
163 [PIPE_PRIM_POLYGON
] = _3DPRIM_POLYGON
,
164 [PIPE_PRIM_LINES_ADJACENCY
] = _3DPRIM_LINELIST_ADJ
,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = _3DPRIM_LINESTRIP_ADJ
,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = _3DPRIM_TRILIST_ADJ
,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = _3DPRIM_TRISTRIP_ADJ
,
170 assert(prim_mapping
[prim
]);
172 return prim_mapping
[prim
];
176 * Translate a pipe texture target to the matching hardware surface type.
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
183 return BRW_SURFACE_BUFFER
;
184 case PIPE_TEXTURE_1D
:
185 case PIPE_TEXTURE_1D_ARRAY
:
186 return BRW_SURFACE_1D
;
187 case PIPE_TEXTURE_2D
:
188 case PIPE_TEXTURE_RECT
:
189 case PIPE_TEXTURE_2D_ARRAY
:
190 return BRW_SURFACE_2D
;
191 case PIPE_TEXTURE_3D
:
192 return BRW_SURFACE_3D
;
193 case PIPE_TEXTURE_CUBE
:
194 case PIPE_TEXTURE_CUBE_ARRAY
:
195 return BRW_SURFACE_CUBE
;
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER
;
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
207 const struct ilo_rasterizer_state
*rasterizer
,
209 enum pipe_format depth_format
,
210 uint32_t *payload
, unsigned payload_len
)
212 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
214 assert(payload_len
== Elements(sf
->payload
));
217 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
220 payload
[1] |= sf
->dw_msaa
;
222 if (dev
->gen
>= ILO_GEN(7)) {
225 /* separate stencil */
226 switch (depth_format
) {
227 case PIPE_FORMAT_Z16_UNORM
:
228 format
= BRW_DEPTHFORMAT_D16_UNORM
;
230 case PIPE_FORMAT_Z32_FLOAT
:
231 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
232 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
234 case PIPE_FORMAT_Z24X8_UNORM
:
235 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
236 format
= BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
;
239 /* FLOAT surface is assumed when there is no depth buffer */
240 format
= BRW_DEPTHFORMAT_D32_FLOAT
;
244 payload
[0] |= format
<< GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
;
249 payload
[1] = (num_samples
> 1) ? GEN6_SF_MSRAST_ON_PATTERN
: 0;
258 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
261 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
262 const struct ilo_rasterizer_state
*rasterizer
,
263 const struct ilo_shader_state
*fs
,
264 const struct ilo_shader_state
*last_sh
,
265 uint32_t *dw
, int num_dwords
)
267 int output_count
, vue_offset
, vue_len
;
268 const struct ilo_kernel_routing
*routing
;
270 ILO_GPE_VALID_GEN(dev
, 6, 7);
271 assert(num_dwords
== 13);
274 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
276 if (dev
->gen
>= ILO_GEN(7))
277 dw
[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
;
279 dw
[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
;
284 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
285 assert(output_count
<= 32);
287 routing
= ilo_shader_get_kernel_routing(fs
);
289 vue_offset
= routing
->source_skip
;
290 assert(vue_offset
% 2 == 0);
293 vue_len
= (routing
->source_len
+ 1) / 2;
297 if (dev
->gen
>= ILO_GEN(7)) {
298 dw
[0] = output_count
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
299 vue_len
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
300 vue_offset
<< GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
;
301 if (routing
->swizzle_enable
)
302 dw
[0] |= GEN7_SBE_SWIZZLE_ENABLE
;
305 dw
[0] = output_count
<< GEN6_SF_NUM_OUTPUTS_SHIFT
|
306 vue_len
<< GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT
|
307 vue_offset
<< GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT
;
308 if (routing
->swizzle_enable
)
309 dw
[0] |= GEN6_SF_SWIZZLE_ENABLE
;
312 switch (rasterizer
->state
.sprite_coord_mode
) {
313 case PIPE_SPRITE_COORD_UPPER_LEFT
:
314 dw
[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT
;
316 case PIPE_SPRITE_COORD_LOWER_LEFT
:
317 dw
[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT
;
321 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
322 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
325 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
327 * "This field (Point Sprite Texture Coordinate Enable) must be
328 * programmed to 0 when non-point primitives are rendered."
330 * TODO We do not check that yet.
332 dw
[9] = routing
->point_sprite_enable
;
334 dw
[10] = routing
->const_interp_enable
;
336 /* WrapShortest enables */
342 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
343 struct intel_bo
*general_state_bo
,
344 struct intel_bo
*surface_state_bo
,
345 struct intel_bo
*dynamic_state_bo
,
346 struct intel_bo
*indirect_object_bo
,
347 struct intel_bo
*instruction_bo
,
348 uint32_t general_state_size
,
349 uint32_t dynamic_state_size
,
350 uint32_t indirect_object_size
,
351 uint32_t instruction_size
,
354 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
355 const uint8_t cmd_len
= 10;
357 ILO_GPE_VALID_GEN(dev
, 6, 7);
359 /* 4K-page aligned */
360 assert(((general_state_size
| dynamic_state_size
|
361 indirect_object_size
| instruction_size
) & 0xfff) == 0);
363 ilo_cp_begin(cp
, cmd_len
);
364 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
366 ilo_cp_write_bo(cp
, 1, general_state_bo
,
369 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
370 INTEL_DOMAIN_SAMPLER
,
372 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
373 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
375 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
378 ilo_cp_write_bo(cp
, 1, instruction_bo
,
379 INTEL_DOMAIN_INSTRUCTION
,
382 if (general_state_size
) {
383 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
388 /* skip range check */
392 if (dynamic_state_size
) {
393 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
394 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
398 /* skip range check */
399 ilo_cp_write(cp
, 0xfffff000 + 1);
402 if (indirect_object_size
) {
403 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
408 /* skip range check */
409 ilo_cp_write(cp
, 0xfffff000 + 1);
412 if (instruction_size
) {
413 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
414 INTEL_DOMAIN_INSTRUCTION
,
418 /* skip range check */
426 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
430 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
431 const uint8_t cmd_len
= 2;
433 ILO_GPE_VALID_GEN(dev
, 6, 7);
435 ilo_cp_begin(cp
, cmd_len
| (cmd_len
- 2));
436 ilo_cp_write(cp
, cmd
);
437 ilo_cp_write(cp
, sip
);
442 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
446 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
447 const uint8_t cmd_len
= 1;
449 ILO_GPE_VALID_GEN(dev
, 6, 7);
451 ilo_cp_begin(cp
, cmd_len
);
452 ilo_cp_write(cp
, cmd
| enable
);
457 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
461 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
462 const uint8_t cmd_len
= 1;
464 ILO_GPE_VALID_GEN(dev
, 6, 7);
467 assert(pipeline
== 0x0 || pipeline
== 0x1);
469 ilo_cp_begin(cp
, cmd_len
);
470 ilo_cp_write(cp
, cmd
| pipeline
);
475 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
476 int max_threads
, int num_urb_entries
,
480 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
481 const uint8_t cmd_len
= 8;
484 ILO_GPE_VALID_GEN(dev
, 6, 6);
486 dw2
= (max_threads
- 1) << 16 |
487 num_urb_entries
<< 8 |
488 1 << 7 | /* Reset Gateway Timer */
489 1 << 6; /* Bypass Gateway Control */
491 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
492 480; /* CURBE Allocation Size */
494 ilo_cp_begin(cp
, cmd_len
);
495 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
496 ilo_cp_write(cp
, 0); /* scratch */
497 ilo_cp_write(cp
, dw2
);
498 ilo_cp_write(cp
, 0); /* MBZ */
499 ilo_cp_write(cp
, dw4
);
500 ilo_cp_write(cp
, 0); /* scoreboard */
507 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
508 uint32_t buf
, int size
,
511 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
512 const uint8_t cmd_len
= 4;
514 ILO_GPE_VALID_GEN(dev
, 6, 6);
516 assert(buf
% 32 == 0);
517 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
518 size
= align(size
, 32);
520 ilo_cp_begin(cp
, cmd_len
);
521 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
522 ilo_cp_write(cp
, 0); /* MBZ */
523 ilo_cp_write(cp
, size
);
524 ilo_cp_write(cp
, buf
);
529 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
530 uint32_t offset
, int num_ids
,
533 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
534 const uint8_t cmd_len
= 4;
536 ILO_GPE_VALID_GEN(dev
, 6, 6);
538 assert(offset
% 32 == 0);
540 ilo_cp_begin(cp
, cmd_len
);
541 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
542 ilo_cp_write(cp
, 0); /* MBZ */
543 /* every ID has 8 DWords */
544 ilo_cp_write(cp
, num_ids
* 8 * 4);
545 ilo_cp_write(cp
, offset
);
550 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
551 int id
, int byte
, int thread_count
,
554 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
555 const uint8_t cmd_len
= 2;
558 ILO_GPE_VALID_GEN(dev
, 6, 6);
564 ilo_cp_begin(cp
, cmd_len
);
565 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
566 ilo_cp_write(cp
, dw1
);
571 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
572 int thread_count_water_mark
,
576 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
577 const uint8_t cmd_len
= 2;
580 ILO_GPE_VALID_GEN(dev
, 6, 6);
582 dw1
= thread_count_water_mark
<< 16 |
585 ilo_cp_begin(cp
, cmd_len
);
586 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
587 ilo_cp_write(cp
, dw1
);
592 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
595 assert(!"MEDIA_OBJECT_WALKER unsupported");
599 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
600 uint32_t vs_binding_table
,
601 uint32_t gs_binding_table
,
602 uint32_t ps_binding_table
,
605 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
606 const uint8_t cmd_len
= 4;
608 ILO_GPE_VALID_GEN(dev
, 6, 6);
610 ilo_cp_begin(cp
, cmd_len
);
611 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
612 GEN6_BINDING_TABLE_MODIFY_VS
|
613 GEN6_BINDING_TABLE_MODIFY_GS
|
614 GEN6_BINDING_TABLE_MODIFY_PS
);
615 ilo_cp_write(cp
, vs_binding_table
);
616 ilo_cp_write(cp
, gs_binding_table
);
617 ilo_cp_write(cp
, ps_binding_table
);
622 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
623 uint32_t vs_sampler_state
,
624 uint32_t gs_sampler_state
,
625 uint32_t ps_sampler_state
,
628 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
629 const uint8_t cmd_len
= 4;
631 ILO_GPE_VALID_GEN(dev
, 6, 6);
633 ilo_cp_begin(cp
, cmd_len
);
634 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
635 VS_SAMPLER_STATE_CHANGE
|
636 GS_SAMPLER_STATE_CHANGE
|
637 PS_SAMPLER_STATE_CHANGE
);
638 ilo_cp_write(cp
, vs_sampler_state
);
639 ilo_cp_write(cp
, gs_sampler_state
);
640 ilo_cp_write(cp
, ps_sampler_state
);
645 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
646 int vs_total_size
, int gs_total_size
,
647 int vs_entry_size
, int gs_entry_size
,
650 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
651 const uint8_t cmd_len
= 3;
652 const int row_size
= 128; /* 1024 bits */
653 int vs_alloc_size
, gs_alloc_size
;
654 int vs_num_entries
, gs_num_entries
;
656 ILO_GPE_VALID_GEN(dev
, 6, 6);
658 /* in 1024-bit URB rows */
659 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
660 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
662 /* the valid range is [1, 5] */
667 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
669 /* the valid range is [24, 256] in multiples of 4 */
670 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
671 if (vs_num_entries
> 256)
672 vs_num_entries
= 256;
673 assert(vs_num_entries
>= 24);
675 /* the valid range is [0, 256] in multiples of 4 */
676 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
677 if (gs_num_entries
> 256)
678 gs_num_entries
= 256;
680 ilo_cp_begin(cp
, cmd_len
);
681 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
682 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_VS_SIZE_SHIFT
|
683 vs_num_entries
<< GEN6_URB_VS_ENTRIES_SHIFT
);
684 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_GS_ENTRIES_SHIFT
|
685 (gs_alloc_size
- 1) << GEN6_URB_GS_SIZE_SHIFT
);
690 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
691 const struct ilo_ve_state
*ve
,
692 const struct ilo_vb_state
*vb
,
695 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
699 ILO_GPE_VALID_GEN(dev
, 6, 7);
702 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
704 * "From 1 to 33 VBs can be specified..."
706 assert(ve
->vb_count
<= 33);
711 cmd_len
= 1 + 4 * ve
->vb_count
;
713 ilo_cp_begin(cp
, cmd_len
);
714 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
716 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
717 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
718 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
719 const struct pipe_vertex_buffer
*cso
= &vb
->states
[pipe_idx
];
722 dw
= hw_idx
<< GEN6_VB0_INDEX_SHIFT
;
724 if (instance_divisor
)
725 dw
|= GEN6_VB0_ACCESS_INSTANCEDATA
;
727 dw
|= GEN6_VB0_ACCESS_VERTEXDATA
;
729 if (dev
->gen
>= ILO_GEN(7))
730 dw
|= GEN7_VB0_ADDRESS_MODIFYENABLE
;
732 /* use null vb if there is no buffer or the stride is out of range */
733 if (cso
->buffer
&& cso
->stride
<= 2048) {
734 const struct ilo_buffer
*buf
= ilo_buffer(cso
->buffer
);
735 const uint32_t start_offset
= cso
->buffer_offset
;
737 * As noted in ilo_translate_format(), we treat some 3-component
738 * formats as 4-component formats to work around hardware
739 * limitations. Imagine the case where the vertex buffer holds a
740 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
741 * The hardware would not be able to fetch it because the vertex
742 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
743 * and that takes at least 8 bytes.
745 * For the workaround to work, we query the physical size, which is
746 * page aligned, to calculate end_offset so that the last vertex has
747 * a better chance to be fetched.
749 const uint32_t end_offset
= intel_bo_get_size(buf
->bo
) - 1;
751 dw
|= cso
->stride
<< BRW_VB0_PITCH_SHIFT
;
753 ilo_cp_write(cp
, dw
);
754 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
755 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
756 ilo_cp_write(cp
, instance_divisor
);
761 ilo_cp_write(cp
, dw
);
764 ilo_cp_write(cp
, instance_divisor
);
772 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
773 int comp0
, int comp1
, int comp2
, int comp3
,
774 struct ilo_ve_cso
*cso
)
776 ILO_GPE_VALID_GEN(dev
, 6, 7);
778 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
779 cso
->payload
[0] = GEN6_VE0_VALID
;
781 comp0
<< BRW_VE1_COMPONENT_0_SHIFT
|
782 comp1
<< BRW_VE1_COMPONENT_1_SHIFT
|
783 comp2
<< BRW_VE1_COMPONENT_2_SHIFT
|
784 comp3
<< BRW_VE1_COMPONENT_3_SHIFT
;
788 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
789 struct ilo_ve_cso
*cso
)
793 ILO_GPE_VALID_GEN(dev
, 6, 7);
796 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
798 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
799 * valid VERTEX_ELEMENT structure.
801 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
802 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
804 * - The Source Element Format must be set to the UINT format.
806 * - [DevSNB]: Edge Flags are not supported for QUADLIST
807 * primitives. Software may elect to convert QUADLIST primitives
808 * to some set of corresponding edge-flag-supported primitive
809 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
812 cso
->payload
[0] |= GEN6_VE0_EDGE_FLAG_ENABLE
;
814 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
815 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_1_SHIFT
|
816 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_2_SHIFT
|
817 BRW_VE1_COMPONENT_NOSTORE
<< BRW_VE1_COMPONENT_3_SHIFT
;
820 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
821 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
822 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
824 * Since all the hardware cares about is whether the flags are zero or not,
825 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
827 format
= (cso
->payload
[0] >> BRW_VE0_FORMAT_SHIFT
) & 0x1ff;
828 if (format
== BRW_SURFACEFORMAT_R32_FLOAT
) {
829 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT
==
830 BRW_SURFACEFORMAT_R32_FLOAT
- 1);
832 cso
->payload
[0] -= (1 << BRW_VE0_FORMAT_SHIFT
);
835 assert(format
== BRW_SURFACEFORMAT_R8_UINT
);
840 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
841 const struct ilo_ve_state
*ve
,
842 bool last_velement_edgeflag
,
843 bool prepend_generated_ids
,
846 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
850 ILO_GPE_VALID_GEN(dev
, 6, 7);
853 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
855 * "Up to 34 (DevSNB+) vertex elements are supported."
857 assert(ve
->count
+ prepend_generated_ids
<= 34);
859 if (!ve
->count
&& !prepend_generated_ids
) {
860 struct ilo_ve_cso dummy
;
862 ve_init_cso_with_components(dev
,
863 BRW_VE1_COMPONENT_STORE_0
,
864 BRW_VE1_COMPONENT_STORE_0
,
865 BRW_VE1_COMPONENT_STORE_0
,
866 BRW_VE1_COMPONENT_STORE_1_FLT
,
870 ilo_cp_begin(cp
, cmd_len
);
871 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
872 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
878 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
880 ilo_cp_begin(cp
, cmd_len
);
881 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
883 if (prepend_generated_ids
) {
884 struct ilo_ve_cso gen_ids
;
886 ve_init_cso_with_components(dev
,
887 BRW_VE1_COMPONENT_STORE_VID
,
888 BRW_VE1_COMPONENT_STORE_IID
,
889 BRW_VE1_COMPONENT_NOSTORE
,
890 BRW_VE1_COMPONENT_NOSTORE
,
893 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
896 if (last_velement_edgeflag
) {
897 struct ilo_ve_cso edgeflag
;
899 for (i
= 0; i
< ve
->count
- 1; i
++)
900 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
902 edgeflag
= ve
->cso
[i
];
903 ve_set_cso_edgeflag(dev
, &edgeflag
);
904 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
907 for (i
= 0; i
< ve
->count
; i
++)
908 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
915 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
916 const struct ilo_ib_state
*ib
,
917 bool enable_cut_index
,
920 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
921 const uint8_t cmd_len
= 3;
922 struct ilo_buffer
*buf
= ilo_buffer(ib
->hw_resource
);
923 uint32_t start_offset
, end_offset
;
926 ILO_GPE_VALID_GEN(dev
, 6, 7);
931 switch (ib
->hw_index_size
) {
933 format
= BRW_INDEX_DWORD
;
936 format
= BRW_INDEX_WORD
;
939 format
= BRW_INDEX_BYTE
;
942 assert(!"unknown index size");
943 format
= BRW_INDEX_BYTE
;
948 * set start_offset to 0 here and adjust pipe_draw_info::start with
949 * ib->draw_start_offset in 3DPRIMITIVE
952 end_offset
= buf
->bo_size
;
954 /* end_offset must also be aligned and is inclusive */
955 end_offset
-= (end_offset
% ib
->hw_index_size
);
958 ilo_cp_begin(cp
, cmd_len
);
959 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
960 ((enable_cut_index
) ? BRW_CUT_INDEX_ENABLE
: 0) |
962 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
963 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
968 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
969 uint32_t clip_viewport
,
970 uint32_t sf_viewport
,
971 uint32_t cc_viewport
,
974 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
975 const uint8_t cmd_len
= 4;
977 ILO_GPE_VALID_GEN(dev
, 6, 6);
979 ilo_cp_begin(cp
, cmd_len
);
980 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
981 GEN6_CLIP_VIEWPORT_MODIFY
|
982 GEN6_SF_VIEWPORT_MODIFY
|
983 GEN6_CC_VIEWPORT_MODIFY
);
984 ilo_cp_write(cp
, clip_viewport
);
985 ilo_cp_write(cp
, sf_viewport
);
986 ilo_cp_write(cp
, cc_viewport
);
991 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
992 uint32_t blend_state
,
993 uint32_t depth_stencil_state
,
994 uint32_t color_calc_state
,
997 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
998 const uint8_t cmd_len
= 4;
1000 ILO_GPE_VALID_GEN(dev
, 6, 6);
1002 ilo_cp_begin(cp
, cmd_len
);
1003 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1004 ilo_cp_write(cp
, blend_state
| 1);
1005 ilo_cp_write(cp
, depth_stencil_state
| 1);
1006 ilo_cp_write(cp
, color_calc_state
| 1);
1011 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1012 uint32_t scissor_rect
,
1015 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1016 const uint8_t cmd_len
= 2;
1018 ILO_GPE_VALID_GEN(dev
, 6, 7);
1020 ilo_cp_begin(cp
, cmd_len
);
1021 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1022 ilo_cp_write(cp
, scissor_rect
);
1027 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1028 const struct ilo_shader_state
*vs
,
1032 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1033 const uint8_t cmd_len
= 6;
1034 const struct ilo_shader_cso
*cso
;
1035 uint32_t dw2
, dw4
, dw5
;
1037 ILO_GPE_VALID_GEN(dev
, 6, 7);
1040 ilo_cp_begin(cp
, cmd_len
);
1041 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1042 ilo_cp_write(cp
, 0);
1043 ilo_cp_write(cp
, 0);
1044 ilo_cp_write(cp
, 0);
1045 ilo_cp_write(cp
, 0);
1046 ilo_cp_write(cp
, 0);
1051 cso
= ilo_shader_get_kernel_cso(vs
);
1052 dw2
= cso
->payload
[0];
1053 dw4
= cso
->payload
[1];
1054 dw5
= cso
->payload
[2];
1056 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT
;
1058 ilo_cp_begin(cp
, cmd_len
);
1059 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1060 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1061 ilo_cp_write(cp
, dw2
);
1062 ilo_cp_write(cp
, 0); /* scratch */
1063 ilo_cp_write(cp
, dw4
);
1064 ilo_cp_write(cp
, dw5
);
1069 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1070 const struct ilo_shader_state
*gs
,
1071 const struct ilo_shader_state
*vs
,
1075 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1076 const uint8_t cmd_len
= 7;
1077 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1079 ILO_GPE_VALID_GEN(dev
, 6, 6);
1082 const struct ilo_shader_cso
*cso
;
1084 dw1
= ilo_shader_get_kernel_offset(gs
);
1086 cso
= ilo_shader_get_kernel_cso(gs
);
1087 dw2
= cso
->payload
[0];
1088 dw4
= cso
->payload
[1];
1089 dw5
= cso
->payload
[2];
1090 dw6
= cso
->payload
[3];
1092 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1093 struct ilo_shader_cso cso
;
1094 enum ilo_kernel_param param
;
1096 switch (verts_per_prim
) {
1098 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1101 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1104 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1108 dw1
= ilo_shader_get_kernel_offset(vs
) +
1109 ilo_shader_get_kernel_param(vs
, param
);
1111 /* cannot use VS's CSO */
1112 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1113 dw2
= cso
.payload
[0];
1114 dw4
= cso
.payload
[1];
1115 dw5
= cso
.payload
[2];
1116 dw6
= cso
.payload
[3];
1121 dw4
= 1 << GEN6_GS_URB_READ_LENGTH_SHIFT
;
1122 dw5
= GEN6_GS_STATISTICS_ENABLE
;
1126 ilo_cp_begin(cp
, cmd_len
);
1127 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1128 ilo_cp_write(cp
, dw1
);
1129 ilo_cp_write(cp
, dw2
);
1130 ilo_cp_write(cp
, 0);
1131 ilo_cp_write(cp
, dw4
);
1132 ilo_cp_write(cp
, dw5
);
1133 ilo_cp_write(cp
, dw6
);
1138 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1139 const struct ilo_rasterizer_state
*rasterizer
,
1140 const struct ilo_shader_state
*fs
,
1141 bool enable_guardband
,
1145 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1146 const uint8_t cmd_len
= 4;
1147 uint32_t dw1
, dw2
, dw3
;
1152 dw1
= rasterizer
->clip
.payload
[0];
1153 dw2
= rasterizer
->clip
.payload
[1];
1154 dw3
= rasterizer
->clip
.payload
[2];
1156 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1157 dw2
|= GEN6_CLIP_GB_TEST
;
1159 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1160 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1162 if (interps
& (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC
|
1163 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC
|
1164 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC
))
1165 dw2
|= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE
;
1167 dw3
|= GEN6_CLIP_FORCE_ZERO_RTAINDEX
|
1168 (num_viewports
- 1);
1176 ilo_cp_begin(cp
, cmd_len
);
1177 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1178 ilo_cp_write(cp
, dw1
);
1179 ilo_cp_write(cp
, dw2
);
1180 ilo_cp_write(cp
, dw3
);
1185 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1186 const struct ilo_rasterizer_state
*rasterizer
,
1187 const struct ilo_shader_state
*fs
,
1188 const struct ilo_shader_state
*last_sh
,
1191 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1192 const uint8_t cmd_len
= 20;
1193 uint32_t payload_raster
[6], payload_sbe
[13];
1195 ILO_GPE_VALID_GEN(dev
, 6, 6);
1197 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1198 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1199 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1200 fs
, last_sh
, payload_sbe
, Elements(payload_sbe
));
1202 ilo_cp_begin(cp
, cmd_len
);
1203 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1204 ilo_cp_write(cp
, payload_sbe
[0]);
1205 ilo_cp_write_multi(cp
, payload_raster
, 6);
1206 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1211 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1212 const struct ilo_shader_state
*fs
,
1214 const struct ilo_rasterizer_state
*rasterizer
,
1215 bool dual_blend
, bool cc_may_kill
,
1218 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
1219 const uint8_t cmd_len
= 9;
1220 const int num_samples
= 1;
1221 const struct ilo_shader_cso
*fs_cso
;
1222 uint32_t dw2
, dw4
, dw5
, dw6
;
1224 ILO_GPE_VALID_GEN(dev
, 6, 6);
1227 /* see brwCreateContext() */
1228 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
1230 ilo_cp_begin(cp
, cmd_len
);
1231 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1232 ilo_cp_write(cp
, 0);
1233 ilo_cp_write(cp
, 0);
1234 ilo_cp_write(cp
, 0);
1235 ilo_cp_write(cp
, 0);
1236 /* honor the valid range even if dispatching is disabled */
1237 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
);
1238 ilo_cp_write(cp
, 0);
1239 ilo_cp_write(cp
, 0);
1240 ilo_cp_write(cp
, 0);
1246 fs_cso
= ilo_shader_get_kernel_cso(fs
);
1247 dw2
= fs_cso
->payload
[0];
1248 dw4
= fs_cso
->payload
[1];
1249 dw5
= fs_cso
->payload
[2];
1250 dw6
= fs_cso
->payload
[3];
1252 dw2
|= (num_samplers
+ 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT
;
1255 dw4
|= GEN6_WM_STATISTICS_ENABLE
;
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1263 * Resolve Enable or Depth Buffer Resolve Enable."
1265 dw4
|= GEN6_WM_DEPTH_CLEAR
;
1266 dw4
|= GEN6_WM_DEPTH_RESOLVE
;
1267 dw4
|= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE
;
1271 dw5
|= GEN6_WM_KILL_ENABLE
|
1272 GEN6_WM_DISPATCH_ENABLE
;
1276 dw5
|= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE
;
1278 dw5
|= rasterizer
->wm
.payload
[0];
1280 dw6
|= rasterizer
->wm
.payload
[1];
1282 if (num_samples
> 1) {
1283 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
1284 rasterizer
->wm
.dw_msaa_disp
;
1287 ilo_cp_begin(cp
, cmd_len
);
1288 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1289 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
1290 ilo_cp_write(cp
, dw2
);
1291 ilo_cp_write(cp
, 0); /* scratch */
1292 ilo_cp_write(cp
, dw4
);
1293 ilo_cp_write(cp
, dw5
);
1294 ilo_cp_write(cp
, dw6
);
1295 ilo_cp_write(cp
, 0); /* kernel 1 */
1296 ilo_cp_write(cp
, 0); /* kernel 2 */
1300 static inline unsigned
1301 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
1302 const uint32_t *bufs
, const int *sizes
,
1303 int num_bufs
, int max_read_length
,
1304 uint32_t *dw
, int num_dwords
)
1306 unsigned enabled
= 0x0;
1307 int total_read_length
, i
;
1309 assert(num_dwords
== 4);
1311 total_read_length
= 0;
1312 for (i
= 0; i
< 4; i
++) {
1313 if (i
< num_bufs
&& sizes
[i
]) {
1314 /* in 256-bit units minus one */
1315 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
1317 assert(bufs
[i
] % 32 == 0);
1318 assert(read_len
< 32);
1321 dw
[i
] = bufs
[i
] | read_len
;
1323 total_read_length
+= read_len
+ 1;
1330 assert(total_read_length
<= max_read_length
);
1336 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
1337 const uint32_t *bufs
, const int *sizes
,
1341 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
1342 const uint8_t cmd_len
= 5;
1343 uint32_t buf_dw
[4], buf_enabled
;
1345 ILO_GPE_VALID_GEN(dev
, 6, 6);
1346 assert(num_bufs
<= 4);
1349 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1351 * "The sum of all four read length fields (each incremented to
1352 * represent the actual read length) must be less than or equal to 32"
1354 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1355 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
1357 ilo_cp_begin(cp
, cmd_len
);
1358 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1359 ilo_cp_write(cp
, buf_dw
[0]);
1360 ilo_cp_write(cp
, buf_dw
[1]);
1361 ilo_cp_write(cp
, buf_dw
[2]);
1362 ilo_cp_write(cp
, buf_dw
[3]);
1367 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
1368 const uint32_t *bufs
, const int *sizes
,
1372 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
1373 const uint8_t cmd_len
= 5;
1374 uint32_t buf_dw
[4], buf_enabled
;
1376 ILO_GPE_VALID_GEN(dev
, 6, 6);
1377 assert(num_bufs
<= 4);
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1382 * "The sum of all four read length fields (each incremented to
1383 * represent the actual read length) must be less than or equal to 64"
1385 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1386 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1388 ilo_cp_begin(cp
, cmd_len
);
1389 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1390 ilo_cp_write(cp
, buf_dw
[0]);
1391 ilo_cp_write(cp
, buf_dw
[1]);
1392 ilo_cp_write(cp
, buf_dw
[2]);
1393 ilo_cp_write(cp
, buf_dw
[3]);
1398 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
1399 const uint32_t *bufs
, const int *sizes
,
1403 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
1404 const uint8_t cmd_len
= 5;
1405 uint32_t buf_dw
[4], buf_enabled
;
1407 ILO_GPE_VALID_GEN(dev
, 6, 6);
1408 assert(num_bufs
<= 4);
1411 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1413 * "The sum of all four read length fields (each incremented to
1414 * represent the actual read length) must be less than or equal to 64"
1416 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1417 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1419 ilo_cp_begin(cp
, cmd_len
);
1420 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1421 ilo_cp_write(cp
, buf_dw
[0]);
1422 ilo_cp_write(cp
, buf_dw
[1]);
1423 ilo_cp_write(cp
, buf_dw
[2]);
1424 ilo_cp_write(cp
, buf_dw
[3]);
1429 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
1430 unsigned sample_mask
,
1433 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
1434 const uint8_t cmd_len
= 2;
1435 const unsigned valid_mask
= 0xf;
1437 ILO_GPE_VALID_GEN(dev
, 6, 6);
1439 sample_mask
&= valid_mask
;
1441 ilo_cp_begin(cp
, cmd_len
);
1442 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1443 ilo_cp_write(cp
, sample_mask
);
1448 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
1449 unsigned x
, unsigned y
,
1450 unsigned width
, unsigned height
,
1453 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
1454 const uint8_t cmd_len
= 4;
1455 unsigned xmax
= x
+ width
- 1;
1456 unsigned ymax
= y
+ height
- 1;
1459 ILO_GPE_VALID_GEN(dev
, 6, 7);
1461 if (dev
->gen
>= ILO_GEN(7)) {
1466 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1468 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1469 * must be an even number"
1476 if (x
> rect_limit
) x
= rect_limit
;
1477 if (y
> rect_limit
) y
= rect_limit
;
1478 if (xmax
> rect_limit
) xmax
= rect_limit
;
1479 if (ymax
> rect_limit
) ymax
= rect_limit
;
1481 ilo_cp_begin(cp
, cmd_len
);
1482 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1483 ilo_cp_write(cp
, y
<< 16 | x
);
1484 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
1487 * There is no need to set the origin. It is intended to support front
1490 ilo_cp_write(cp
, 0);
1496 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1497 const struct ilo_zs_surface
*zs
,
1500 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1501 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1502 const uint8_t cmd_len
= 7;
1504 ILO_GPE_VALID_GEN(dev
, 6, 7);
1506 ilo_cp_begin(cp
, cmd_len
);
1507 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1508 ilo_cp_write(cp
, zs
->payload
[0]);
1509 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
,
1510 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1511 ilo_cp_write(cp
, zs
->payload
[2]);
1512 ilo_cp_write(cp
, zs
->payload
[3]);
1513 ilo_cp_write(cp
, zs
->payload
[4]);
1514 ilo_cp_write(cp
, zs
->payload
[5]);
1519 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
1520 int x_offset
, int y_offset
,
1523 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
1524 const uint8_t cmd_len
= 2;
1526 ILO_GPE_VALID_GEN(dev
, 6, 7);
1527 assert(x_offset
>= 0 && x_offset
<= 31);
1528 assert(y_offset
>= 0 && y_offset
<= 31);
1530 ilo_cp_begin(cp
, cmd_len
);
1531 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1532 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
1537 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
1538 const struct pipe_poly_stipple
*pattern
,
1541 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
1542 const uint8_t cmd_len
= 33;
1545 ILO_GPE_VALID_GEN(dev
, 6, 7);
1546 assert(Elements(pattern
->stipple
) == 32);
1548 ilo_cp_begin(cp
, cmd_len
);
1549 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1550 for (i
= 0; i
< 32; i
++)
1551 ilo_cp_write(cp
, pattern
->stipple
[i
]);
1556 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
1557 unsigned pattern
, unsigned factor
,
1560 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
1561 const uint8_t cmd_len
= 3;
1564 ILO_GPE_VALID_GEN(dev
, 6, 7);
1565 assert((pattern
& 0xffff) == pattern
);
1566 assert(factor
>= 1 && factor
<= 256);
1568 ilo_cp_begin(cp
, cmd_len
);
1569 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1570 ilo_cp_write(cp
, pattern
);
1572 if (dev
->gen
>= ILO_GEN(7)) {
1574 inverse
= (unsigned) (65536.0f
/ factor
);
1575 ilo_cp_write(cp
, inverse
<< 15 | factor
);
1579 inverse
= (unsigned) (8192.0f
/ factor
);
1580 ilo_cp_write(cp
, inverse
<< 16 | factor
);
1587 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
1590 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
1591 const uint8_t cmd_len
= 3;
1593 ILO_GPE_VALID_GEN(dev
, 6, 7);
1595 ilo_cp_begin(cp
, cmd_len
);
1596 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1597 ilo_cp_write(cp
, 0 << 16 | 0);
1598 ilo_cp_write(cp
, 0 << 16 | 0);
1603 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
1604 int index
, unsigned svbi
,
1606 bool load_vertex_count
,
1609 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
1610 const uint8_t cmd_len
= 4;
1613 ILO_GPE_VALID_GEN(dev
, 6, 6);
1614 assert(index
>= 0 && index
< 4);
1616 dw1
= index
<< SVB_INDEX_SHIFT
;
1617 if (load_vertex_count
)
1618 dw1
|= SVB_LOAD_INTERNAL_VERTEX_COUNT
;
1620 ilo_cp_begin(cp
, cmd_len
);
1621 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1622 ilo_cp_write(cp
, dw1
);
1623 ilo_cp_write(cp
, svbi
);
1624 ilo_cp_write(cp
, max_svbi
);
1629 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
1631 const uint32_t *packed_sample_pos
,
1632 bool pixel_location_center
,
1635 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
1636 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
1637 uint32_t dw1
, dw2
, dw3
;
1639 ILO_GPE_VALID_GEN(dev
, 6, 7);
1641 dw1
= (pixel_location_center
) ?
1642 MS_PIXEL_LOCATION_CENTER
: MS_PIXEL_LOCATION_UPPER_LEFT
;
1644 switch (num_samples
) {
1647 dw1
|= MS_NUMSAMPLES_1
;
1652 dw1
|= MS_NUMSAMPLES_4
;
1653 dw2
= packed_sample_pos
[0];
1657 assert(dev
->gen
>= ILO_GEN(7));
1658 dw1
|= MS_NUMSAMPLES_8
;
1659 dw2
= packed_sample_pos
[0];
1660 dw3
= packed_sample_pos
[1];
1663 assert(!"unsupported sample count");
1664 dw1
|= MS_NUMSAMPLES_1
;
1670 ilo_cp_begin(cp
, cmd_len
);
1671 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1672 ilo_cp_write(cp
, dw1
);
1673 ilo_cp_write(cp
, dw2
);
1674 if (dev
->gen
>= ILO_GEN(7))
1675 ilo_cp_write(cp
, dw3
);
1680 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
1681 const struct ilo_zs_surface
*zs
,
1684 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1685 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1686 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1687 const uint8_t cmd_len
= 3;
1689 ILO_GPE_VALID_GEN(dev
, 6, 7);
1691 ilo_cp_begin(cp
, cmd_len
);
1692 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1693 /* see ilo_gpe_init_zs_surface() */
1694 ilo_cp_write(cp
, zs
->payload
[6]);
1695 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
,
1696 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1701 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1702 const struct ilo_zs_surface
*zs
,
1705 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1708 const uint8_t cmd_len
= 3;
1710 ILO_GPE_VALID_GEN(dev
, 6, 7);
1712 ilo_cp_begin(cp
, cmd_len
);
1713 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp
, zs
->payload
[8]);
1716 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
,
1717 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1722 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
1726 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
1727 const uint8_t cmd_len
= 2;
1729 ILO_GPE_VALID_GEN(dev
, 6, 6);
1731 ilo_cp_begin(cp
, cmd_len
);
1732 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1733 GEN5_DEPTH_CLEAR_VALID
);
1734 ilo_cp_write(cp
, clear_val
);
1739 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
1741 struct intel_bo
*bo
, uint32_t bo_offset
,
1745 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
1746 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
1747 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
1748 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
1750 ILO_GPE_VALID_GEN(dev
, 6, 7);
1752 if (dw1
& PIPE_CONTROL_CS_STALL
) {
1754 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1756 * "1 of the following must also be set (when CS stall is set):
1758 * * Depth Cache Flush Enable ([0] of DW1)
1759 * * Stall at Pixel Scoreboard ([1] of DW1)
1760 * * Depth Stall ([13] of DW1)
1761 * * Post-Sync Operation ([13] of DW1)
1762 * * Render Target Cache Flush Enable ([12] of DW1)
1763 * * Notify Enable ([8] of DW1)"
1765 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1767 * "One of the following must also be set (when CS stall is set):
1769 * * Render Target Cache Flush Enable ([12] of DW1)
1770 * * Depth Cache Flush Enable ([0] of DW1)
1771 * * Stall at Pixel Scoreboard ([1] of DW1)
1772 * * Depth Stall ([13] of DW1)
1773 * * Post-Sync Operation ([13] of DW1)"
1775 uint32_t bit_test
= PIPE_CONTROL_WRITE_FLUSH
|
1776 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1777 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
1778 PIPE_CONTROL_DEPTH_STALL
;
1781 bit_test
|= PIPE_CONTROL_WRITE_IMMEDIATE
|
1782 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
1783 PIPE_CONTROL_WRITE_TIMESTAMP
;
1785 if (dev
->gen
== ILO_GEN(6))
1786 bit_test
|= PIPE_CONTROL_INTERRUPT_ENABLE
;
1788 assert(dw1
& bit_test
);
1791 if (dw1
& PIPE_CONTROL_DEPTH_STALL
) {
1793 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1795 * "Following bits must be clear (when Depth Stall is set):
1797 * * Render Target Cache Flush Enable ([12] of DW1)
1798 * * Depth Cache Flush Enable ([0] of DW1)"
1800 assert(!(dw1
& (PIPE_CONTROL_WRITE_FLUSH
|
1801 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
1804 ilo_cp_begin(cp
, cmd_len
);
1805 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1806 ilo_cp_write(cp
, dw1
);
1807 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
1808 ilo_cp_write(cp
, 0);
1810 ilo_cp_write(cp
, 0);
1815 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1816 const struct pipe_draw_info
*info
,
1817 const struct ilo_ib_state
*ib
,
1821 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1822 const uint8_t cmd_len
= 6;
1823 const int prim
= (rectlist
) ?
1824 _3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1825 const int vb_access
= (info
->indexed
) ?
1826 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM
:
1827 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
;
1828 const uint32_t vb_start
= info
->start
+
1829 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1831 ILO_GPE_VALID_GEN(dev
, 6, 6);
1833 ilo_cp_begin(cp
, cmd_len
);
1834 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1835 prim
<< GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT
|
1837 ilo_cp_write(cp
, info
->count
);
1838 ilo_cp_write(cp
, vb_start
);
1839 ilo_cp_write(cp
, info
->instance_count
);
1840 ilo_cp_write(cp
, info
->start_instance
);
1841 ilo_cp_write(cp
, info
->index_bias
);
1845 static inline uint32_t
1846 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
1847 const struct ilo_shader_state
**cs
,
1848 uint32_t *sampler_state
,
1850 uint32_t *binding_table_state
,
1856 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1858 * "(Interface Descriptor Total Length) This field must have the same
1859 * alignment as the Interface Descriptor Data Start Address.
1861 * It must be DQWord (32-byte) aligned..."
1863 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1865 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1866 * aligned address of the Interface Descriptor data."
1868 const int state_align
= 32 / 4;
1869 const int state_len
= (32 / 4) * num_ids
;
1870 uint32_t state_offset
, *dw
;
1873 ILO_GPE_VALID_GEN(dev
, 6, 6);
1875 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
1876 state_len
, state_align
, &state_offset
);
1878 for (i
= 0; i
< num_ids
; i
++) {
1879 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
1880 dw
[1] = 1 << 18; /* SPF */
1881 dw
[2] = sampler_state
[i
] |
1882 (num_samplers
[i
] + 3) / 4 << 2;
1883 dw
[3] = binding_table_state
[i
] |
1885 dw
[4] = 0 << 16 | /* CURBE Read Length */
1886 0; /* CURBE Read Offset */
1887 dw
[5] = 0; /* Barrier ID */
1894 return state_offset
;
1897 static inline uint32_t
1898 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
1899 const struct ilo_viewport_cso
*viewports
,
1900 unsigned num_viewports
,
1903 const int state_align
= 32 / 4;
1904 const int state_len
= 8 * num_viewports
;
1905 uint32_t state_offset
, *dw
;
1908 ILO_GPE_VALID_GEN(dev
, 6, 6);
1911 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1913 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1914 * stored as an array of up to 16 elements..."
1916 assert(num_viewports
&& num_viewports
<= 16);
1918 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
1919 state_len
, state_align
, &state_offset
);
1921 for (i
= 0; i
< num_viewports
; i
++) {
1922 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1924 dw
[0] = fui(vp
->m00
);
1925 dw
[1] = fui(vp
->m11
);
1926 dw
[2] = fui(vp
->m22
);
1927 dw
[3] = fui(vp
->m30
);
1928 dw
[4] = fui(vp
->m31
);
1929 dw
[5] = fui(vp
->m32
);
1936 return state_offset
;
1939 static inline uint32_t
1940 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1941 const struct ilo_viewport_cso
*viewports
,
1942 unsigned num_viewports
,
1945 const int state_align
= 32 / 4;
1946 const int state_len
= 4 * num_viewports
;
1947 uint32_t state_offset
, *dw
;
1950 ILO_GPE_VALID_GEN(dev
, 6, 6);
1953 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1955 * "The viewport-related state is stored as an array of up to 16
1958 assert(num_viewports
&& num_viewports
<= 16);
1960 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
1961 state_len
, state_align
, &state_offset
);
1963 for (i
= 0; i
< num_viewports
; i
++) {
1964 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1966 dw
[0] = fui(vp
->min_gbx
);
1967 dw
[1] = fui(vp
->max_gbx
);
1968 dw
[2] = fui(vp
->min_gby
);
1969 dw
[3] = fui(vp
->max_gby
);
1974 return state_offset
;
1977 static inline uint32_t
1978 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
1979 const struct ilo_viewport_cso
*viewports
,
1980 unsigned num_viewports
,
1983 const int state_align
= 32 / 4;
1984 const int state_len
= 2 * num_viewports
;
1985 uint32_t state_offset
, *dw
;
1988 ILO_GPE_VALID_GEN(dev
, 6, 7);
1991 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
1993 * "The viewport state is stored as an array of up to 16 elements..."
1995 assert(num_viewports
&& num_viewports
<= 16);
1997 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
1998 state_len
, state_align
, &state_offset
);
2000 for (i
= 0; i
< num_viewports
; i
++) {
2001 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2003 dw
[0] = fui(vp
->min_z
);
2004 dw
[1] = fui(vp
->max_z
);
2009 return state_offset
;
2012 static inline uint32_t
2013 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
2014 const struct pipe_stencil_ref
*stencil_ref
,
2016 const struct pipe_blend_color
*blend_color
,
2019 const int state_align
= 64 / 4;
2020 const int state_len
= 6;
2021 uint32_t state_offset
, *dw
;
2023 ILO_GPE_VALID_GEN(dev
, 6, 7);
2025 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
2026 state_len
, state_align
, &state_offset
);
2028 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
2029 stencil_ref
->ref_value
[1] << 16 |
2030 BRW_ALPHATEST_FORMAT_UNORM8
;
2032 dw
[2] = fui(blend_color
->color
[0]);
2033 dw
[3] = fui(blend_color
->color
[1]);
2034 dw
[4] = fui(blend_color
->color
[2]);
2035 dw
[5] = fui(blend_color
->color
[3]);
2037 return state_offset
;
2040 static inline uint32_t
2041 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
2042 const struct ilo_blend_state
*blend
,
2043 const struct ilo_fb_state
*fb
,
2044 const struct ilo_dsa_state
*dsa
,
2047 const int state_align
= 64 / 4;
2049 uint32_t state_offset
, *dw
;
2050 unsigned num_targets
, i
;
2052 ILO_GPE_VALID_GEN(dev
, 6, 7);
2055 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2057 * "The blend state is stored as an array of up to 8 elements..."
2059 num_targets
= fb
->state
.nr_cbufs
;
2060 assert(num_targets
<= 8);
2065 /* to be able to reference alpha func */
2069 state_len
= 2 * num_targets
;
2071 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
2072 state_len
, state_align
, &state_offset
);
2074 for (i
= 0; i
< num_targets
; i
++) {
2075 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
2076 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
2077 const int num_samples
= fb
->num_samples
;
2078 const struct util_format_description
*format_desc
=
2079 (idx
< fb
->state
.nr_cbufs
) ?
2080 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
2081 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
2084 rt_is_pure_integer
= false;
2085 rt_dst_alpha_forced_one
= false;
2090 switch (format_desc
->format
) {
2091 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2092 /* force alpha to one when the HW format has alpha */
2093 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
2094 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM
);
2095 rt_dst_alpha_forced_one
= true;
2101 for (ch
= 0; ch
< 4; ch
++) {
2102 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
2105 if (format_desc
->channel
[ch
].pure_integer
) {
2106 rt_is_unorm
= false;
2107 rt_is_pure_integer
= true;
2111 if (!format_desc
->channel
[ch
].normalized
||
2112 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
2113 rt_is_unorm
= false;
2117 dw
[0] = cso
->payload
[0];
2118 dw
[1] = cso
->payload
[1];
2120 if (!rt_is_pure_integer
) {
2121 if (rt_dst_alpha_forced_one
)
2122 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
2124 dw
[0] |= cso
->dw_blend
;
2128 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2130 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2131 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2133 * Since logicop is ignored for non-UNORM color buffers, no special care
2137 dw
[1] |= cso
->dw_logicop
;
2140 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2142 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2143 * Dither both must be disabled."
2145 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2146 * requires that anyway.
2148 if (num_samples
> 1)
2149 dw
[1] |= cso
->dw_alpha_mod
;
2152 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2154 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2157 if (!rt_is_pure_integer
)
2158 dw
[1] |= dsa
->dw_alpha
;
2163 return state_offset
;
2166 static inline uint32_t
2167 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
2168 const struct ilo_dsa_state
*dsa
,
2171 const int state_align
= 64 / 4;
2172 const int state_len
= 3;
2173 uint32_t state_offset
, *dw
;
2176 ILO_GPE_VALID_GEN(dev
, 6, 7);
2178 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
2179 state_len
, state_align
, &state_offset
);
2181 dw
[0] = dsa
->payload
[0];
2182 dw
[1] = dsa
->payload
[1];
2183 dw
[2] = dsa
->payload
[2];
2185 return state_offset
;
2188 static inline uint32_t
2189 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
2190 const struct ilo_scissor_state
*scissor
,
2191 unsigned num_viewports
,
2194 const int state_align
= 32 / 4;
2195 const int state_len
= 2 * num_viewports
;
2196 uint32_t state_offset
, *dw
;
2198 ILO_GPE_VALID_GEN(dev
, 6, 7);
2201 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2203 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2204 * stored as an array of up to 16 elements..."
2206 assert(num_viewports
&& num_viewports
<= 16);
2208 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
2209 state_len
, state_align
, &state_offset
);
2211 memcpy(dw
, scissor
->payload
, state_len
* 4);
2213 return state_offset
;
2216 static inline uint32_t
2217 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
2218 uint32_t *surface_states
,
2219 int num_surface_states
,
2222 const int state_align
= 32 / 4;
2223 const int state_len
= num_surface_states
;
2224 uint32_t state_offset
, *dw
;
2226 ILO_GPE_VALID_GEN(dev
, 6, 7);
2229 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2231 * "It is stored as an array of up to 256 elements..."
2233 assert(num_surface_states
<= 256);
2235 if (!num_surface_states
)
2238 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
2239 state_len
, state_align
, &state_offset
);
2240 memcpy(dw
, surface_states
,
2241 num_surface_states
* sizeof(surface_states
[0]));
2243 return state_offset
;
2246 static inline uint32_t
2247 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2248 const struct ilo_view_surface
*surf
,
2252 const int state_align
= 32 / 4;
2253 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
2254 uint32_t state_offset
;
2255 uint32_t read_domains
, write_domain
;
2257 ILO_GPE_VALID_GEN(dev
, 6, 7);
2260 read_domains
= INTEL_DOMAIN_RENDER
;
2261 write_domain
= INTEL_DOMAIN_RENDER
;
2264 read_domains
= INTEL_DOMAIN_SAMPLER
;
2268 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
2270 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
2272 ilo_cp_write(cp
, surf
->payload
[0]);
2273 ilo_cp_write_bo(cp
, surf
->payload
[1],
2274 surf
->bo
, read_domains
, write_domain
);
2275 ilo_cp_write(cp
, surf
->payload
[2]);
2276 ilo_cp_write(cp
, surf
->payload
[3]);
2277 ilo_cp_write(cp
, surf
->payload
[4]);
2278 ilo_cp_write(cp
, surf
->payload
[5]);
2280 if (dev
->gen
>= ILO_GEN(7)) {
2281 ilo_cp_write(cp
, surf
->payload
[6]);
2282 ilo_cp_write(cp
, surf
->payload
[7]);
2287 return state_offset
;
2290 static inline uint32_t
2291 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2292 const struct pipe_stream_output_target
*so
,
2293 const struct pipe_stream_output_info
*so_info
,
2297 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
2298 unsigned bo_offset
, struct_size
;
2299 enum pipe_format elem_format
;
2300 struct ilo_view_surface surf
;
2302 ILO_GPE_VALID_GEN(dev
, 6, 6);
2304 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
2305 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
2307 switch (so_info
->output
[so_index
].num_components
) {
2309 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2312 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
2315 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
2318 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
2321 assert(!"unexpected SO components length");
2322 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2326 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
2327 struct_size
, elem_format
, false, true, &surf
);
2329 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
2332 static inline uint32_t
2333 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
2334 const struct ilo_sampler_cso
* const *samplers
,
2335 const struct pipe_sampler_view
* const *views
,
2336 const uint32_t *sampler_border_colors
,
2340 const int state_align
= 32 / 4;
2341 const int state_len
= 4 * num_samplers
;
2342 uint32_t state_offset
, *dw
;
2345 ILO_GPE_VALID_GEN(dev
, 6, 7);
2348 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2350 * "The sampler state is stored as an array of up to 16 elements..."
2352 assert(num_samplers
<= 16);
2357 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
2358 state_len
, state_align
, &state_offset
);
2360 for (i
= 0; i
< num_samplers
; i
++) {
2361 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
2362 const struct pipe_sampler_view
*view
= views
[i
];
2363 const uint32_t border_color
= sampler_border_colors
[i
];
2364 uint32_t dw_filter
, dw_wrap
;
2366 /* there may be holes */
2367 if (!sampler
|| !view
) {
2368 /* disabled sampler */
2378 /* determine filter and wrap modes */
2379 switch (view
->texture
->target
) {
2380 case PIPE_TEXTURE_1D
:
2381 dw_filter
= (sampler
->anisotropic
) ?
2382 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2383 dw_wrap
= sampler
->dw_wrap_1d
;
2385 case PIPE_TEXTURE_3D
:
2387 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2389 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2390 * surfaces of type SURFTYPE_3D."
2392 dw_filter
= sampler
->dw_filter
;
2393 dw_wrap
= sampler
->dw_wrap
;
2395 case PIPE_TEXTURE_CUBE
:
2396 dw_filter
= (sampler
->anisotropic
) ?
2397 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2398 dw_wrap
= sampler
->dw_wrap_cube
;
2401 dw_filter
= (sampler
->anisotropic
) ?
2402 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2403 dw_wrap
= sampler
->dw_wrap
;
2407 dw
[0] = sampler
->payload
[0];
2408 dw
[1] = sampler
->payload
[1];
2409 assert(!(border_color
& 0x1f));
2410 dw
[2] = border_color
;
2411 dw
[3] = sampler
->payload
[2];
2415 if (dev
->gen
>= ILO_GEN(7)) {
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2422 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2423 * where the min and mag mode filters are different and
2424 * SurfMinLOD is nonzero. The determination of MagMode uses the
2425 * following equation instead of the one in the above
2426 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2428 * As a way to work around that, we set Base to
2429 * view->u.tex.first_level.
2431 dw
[0] |= view
->u
.tex
.first_level
<< 22;
2439 return state_offset
;
2442 static inline uint32_t
2443 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
2444 const struct ilo_sampler_cso
*sampler
,
2447 const int state_align
= 32 / 4;
2448 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
2449 uint32_t state_offset
, *dw
;
2451 ILO_GPE_VALID_GEN(dev
, 6, 7);
2453 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
2454 state_len
, state_align
, &state_offset
);
2456 /* see ilo_gpe_init_sampler_cso() */
2457 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
2459 return state_offset
;
2462 static inline uint32_t
2463 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
2464 int size
, void **pcb
,
2468 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2469 * to 32 bytes, and their sizes are specified in 256-bit units.
2471 const int state_align
= 32 / 4;
2472 const int state_len
= align(size
, 32) / 4;
2473 uint32_t state_offset
;
2476 ILO_GPE_VALID_GEN(dev
, 6, 7);
2478 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
2479 state_len
, state_align
, &state_offset
);
2481 /* zero out the unused range */
2482 if (size
< state_len
* 4)
2483 memset(&buf
[size
], 0, state_len
* 4 - size
);
2488 return state_offset
;
2491 #endif /* ILO_GPE_GEN6_H */