2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
34 #include "ilo_common.h"
36 #include "ilo_format.h"
37 #include "ilo_resource.h"
38 #include "ilo_shader.h"
41 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
42 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
45 * Translate winsys tiling to hardware tiling.
48 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
51 case INTEL_TILING_NONE
:
52 return GEN6_TILING_NONE
;
58 assert(!"unknown tiling");
59 return GEN6_TILING_NONE
;
64 * Translate a pipe primitive type to the matching hardware primitive type.
67 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
69 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
70 [PIPE_PRIM_POINTS
] = GEN6_3DPRIM_POINTLIST
,
71 [PIPE_PRIM_LINES
] = GEN6_3DPRIM_LINELIST
,
72 [PIPE_PRIM_LINE_LOOP
] = GEN6_3DPRIM_LINELOOP
,
73 [PIPE_PRIM_LINE_STRIP
] = GEN6_3DPRIM_LINESTRIP
,
74 [PIPE_PRIM_TRIANGLES
] = GEN6_3DPRIM_TRILIST
,
75 [PIPE_PRIM_TRIANGLE_STRIP
] = GEN6_3DPRIM_TRISTRIP
,
76 [PIPE_PRIM_TRIANGLE_FAN
] = GEN6_3DPRIM_TRIFAN
,
77 [PIPE_PRIM_QUADS
] = GEN6_3DPRIM_QUADLIST
,
78 [PIPE_PRIM_QUAD_STRIP
] = GEN6_3DPRIM_QUADSTRIP
,
79 [PIPE_PRIM_POLYGON
] = GEN6_3DPRIM_POLYGON
,
80 [PIPE_PRIM_LINES_ADJACENCY
] = GEN6_3DPRIM_LINELIST_ADJ
,
81 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = GEN6_3DPRIM_LINESTRIP_ADJ
,
82 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = GEN6_3DPRIM_TRILIST_ADJ
,
83 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = GEN6_3DPRIM_TRISTRIP_ADJ
,
86 assert(prim_mapping
[prim
]);
88 return prim_mapping
[prim
];
92 * Translate a pipe texture target to the matching hardware surface type.
95 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
99 return GEN6_SURFTYPE_BUFFER
;
100 case PIPE_TEXTURE_1D
:
101 case PIPE_TEXTURE_1D_ARRAY
:
102 return GEN6_SURFTYPE_1D
;
103 case PIPE_TEXTURE_2D
:
104 case PIPE_TEXTURE_RECT
:
105 case PIPE_TEXTURE_2D_ARRAY
:
106 return GEN6_SURFTYPE_2D
;
107 case PIPE_TEXTURE_3D
:
108 return GEN6_SURFTYPE_3D
;
109 case PIPE_TEXTURE_CUBE
:
110 case PIPE_TEXTURE_CUBE_ARRAY
:
111 return GEN6_SURFTYPE_CUBE
;
113 assert(!"unknown texture target");
114 return GEN6_SURFTYPE_BUFFER
;
119 * Fill in DW2 to DW7 of 3DSTATE_SF.
122 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
123 const struct ilo_rasterizer_state
*rasterizer
,
125 enum pipe_format depth_format
,
126 uint32_t *payload
, unsigned payload_len
)
128 assert(payload_len
== Elements(rasterizer
->sf
.payload
));
131 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
133 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
135 payload
[1] |= sf
->dw_msaa
;
139 payload
[1] = (num_samples
> 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN
: 0;
146 if (dev
->gen
>= ILO_GEN(7)) {
149 /* separate stencil */
150 switch (depth_format
) {
151 case PIPE_FORMAT_Z16_UNORM
:
152 format
= GEN6_ZFORMAT_D16_UNORM
;
154 case PIPE_FORMAT_Z32_FLOAT
:
155 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
156 format
= GEN6_ZFORMAT_D32_FLOAT
;
158 case PIPE_FORMAT_Z24X8_UNORM
:
159 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
160 format
= GEN6_ZFORMAT_D24_UNORM_X8_UINT
;
163 /* FLOAT surface is assumed when there is no depth buffer */
164 format
= GEN6_ZFORMAT_D32_FLOAT
;
168 payload
[0] |= format
<< GEN7_SF_DW1_DEPTH_FORMAT__SHIFT
;
173 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
176 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
177 const struct ilo_rasterizer_state
*rasterizer
,
178 const struct ilo_shader_state
*fs
,
179 uint32_t *dw
, int num_dwords
)
181 int output_count
, vue_offset
, vue_len
;
182 const struct ilo_kernel_routing
*routing
;
184 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
185 assert(num_dwords
== 13);
188 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
189 dw
[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT
;
193 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
194 assert(output_count
<= 32);
196 routing
= ilo_shader_get_kernel_routing(fs
);
198 vue_offset
= routing
->source_skip
;
199 assert(vue_offset
% 2 == 0);
202 vue_len
= (routing
->source_len
+ 1) / 2;
206 dw
[0] = output_count
<< GEN7_SBE_DW1_ATTR_COUNT__SHIFT
|
207 vue_len
<< GEN7_SBE_DW1_URB_READ_LEN__SHIFT
|
208 vue_offset
<< GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT
;
209 if (routing
->swizzle_enable
)
210 dw
[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE
;
212 switch (rasterizer
->state
.sprite_coord_mode
) {
213 case PIPE_SPRITE_COORD_UPPER_LEFT
:
214 dw
[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT
;
216 case PIPE_SPRITE_COORD_LOWER_LEFT
:
217 dw
[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT
;
221 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
222 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
225 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
227 * "This field (Point Sprite Texture Coordinate Enable) must be
228 * programmed to 0 when non-point primitives are rendered."
230 * TODO We do not check that yet.
232 dw
[9] = routing
->point_sprite_enable
;
234 dw
[10] = routing
->const_interp_enable
;
236 /* WrapShortest enables */
242 gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info
*dev
,
243 struct intel_bo
*bo
, uint32_t bo_offset
,
244 uint64_t val
, bool store_qword
,
247 const uint8_t cmd_len
= (store_qword
) ? 5 : 4;
248 uint32_t dw0
= GEN6_MI_CMD(MI_STORE_DATA_IMM
) | (cmd_len
- 2);
249 uint32_t reloc_flags
= INTEL_RELOC_WRITE
;
251 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
253 assert(bo_offset
% ((store_qword
) ? 8 : 4) == 0);
255 /* must use GGTT on GEN6 as in PIPE_CONTROL */
256 if (dev
->gen
== ILO_GEN(6)) {
257 dw0
|= GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT
;
258 reloc_flags
|= INTEL_RELOC_GGTT
;
261 ilo_cp_begin(cp
, cmd_len
);
262 ilo_cp_write(cp
, dw0
);
264 ilo_cp_write_bo(cp
, bo_offset
, bo
, reloc_flags
);
265 ilo_cp_write(cp
, (uint32_t) val
);
268 ilo_cp_write(cp
, (uint32_t) (val
>> 32));
270 assert(val
== (uint64_t) ((uint32_t) val
));
276 gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info
*dev
,
277 uint32_t reg
, uint32_t val
,
280 const uint8_t cmd_len
= 3;
281 const uint32_t dw0
= GEN6_MI_CMD(MI_LOAD_REGISTER_IMM
) | (cmd_len
- 2);
283 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
285 assert(reg
% 4 == 0);
287 ilo_cp_begin(cp
, cmd_len
);
288 ilo_cp_write(cp
, dw0
);
289 ilo_cp_write(cp
, reg
);
290 ilo_cp_write(cp
, val
);
295 gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info
*dev
,
296 struct intel_bo
*bo
, uint32_t bo_offset
,
297 uint32_t reg
, struct ilo_cp
*cp
)
299 const uint8_t cmd_len
= 3;
300 uint32_t dw0
= GEN6_MI_CMD(MI_STORE_REGISTER_MEM
) | (cmd_len
- 2);
301 uint32_t reloc_flags
= INTEL_RELOC_WRITE
;
303 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
305 assert(reg
% 4 == 0 && bo_offset
% 4 == 0);
307 /* must use GGTT on GEN6 as in PIPE_CONTROL */
308 if (dev
->gen
== ILO_GEN(6)) {
309 dw0
|= GEN6_MI_STORE_REGISTER_MEM_DW0_USE_GGTT
;
310 reloc_flags
|= INTEL_RELOC_GGTT
;
313 ilo_cp_begin(cp
, cmd_len
);
314 ilo_cp_write(cp
, dw0
);
315 ilo_cp_write(cp
, reg
);
316 ilo_cp_write_bo(cp
, bo_offset
, bo
, reloc_flags
);
321 gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info
*dev
,
322 struct intel_bo
*bo
, uint32_t bo_offset
,
323 uint32_t report_id
, struct ilo_cp
*cp
)
325 const uint8_t cmd_len
= 3;
326 const uint32_t dw0
= GEN6_MI_CMD(MI_REPORT_PERF_COUNT
) | (cmd_len
- 2);
327 uint32_t reloc_flags
= INTEL_RELOC_WRITE
;
329 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
331 assert(bo_offset
% 64 == 0);
333 /* must use GGTT on GEN6 as in PIPE_CONTROL */
334 if (dev
->gen
== ILO_GEN(6)) {
335 bo_offset
|= GEN6_MI_REPORT_PERF_COUNT_DW1_USE_GGTT
;
336 reloc_flags
|= INTEL_RELOC_GGTT
;
339 ilo_cp_begin(cp
, cmd_len
);
340 ilo_cp_write(cp
, dw0
);
341 ilo_cp_write_bo(cp
, bo_offset
, bo
, reloc_flags
);
342 ilo_cp_write(cp
, report_id
);
347 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
348 struct intel_bo
*general_state_bo
,
349 struct intel_bo
*surface_state_bo
,
350 struct intel_bo
*dynamic_state_bo
,
351 struct intel_bo
*indirect_object_bo
,
352 struct intel_bo
*instruction_bo
,
353 uint32_t general_state_size
,
354 uint32_t dynamic_state_size
,
355 uint32_t indirect_object_size
,
356 uint32_t instruction_size
,
359 const uint8_t cmd_len
= 10;
360 const uint32_t dw0
= GEN6_RENDER_CMD(COMMON
, STATE_BASE_ADDRESS
) |
363 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
365 /* 4K-page aligned */
366 assert(((general_state_size
| dynamic_state_size
|
367 indirect_object_size
| instruction_size
) & 0xfff) == 0);
369 ilo_cp_begin(cp
, cmd_len
);
370 ilo_cp_write(cp
, dw0
);
372 ilo_cp_write_bo(cp
, 1, general_state_bo
, 0);
373 ilo_cp_write_bo(cp
, 1, surface_state_bo
, 0);
374 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
, 0);
375 ilo_cp_write_bo(cp
, 1, indirect_object_bo
, 0);
376 ilo_cp_write_bo(cp
, 1, instruction_bo
, 0);
378 if (general_state_size
) {
379 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
, 0);
382 /* skip range check */
386 if (dynamic_state_size
) {
387 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
, 0);
390 /* skip range check */
391 ilo_cp_write(cp
, 0xfffff000 + 1);
394 if (indirect_object_size
) {
395 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
, 0);
398 /* skip range check */
399 ilo_cp_write(cp
, 0xfffff000 + 1);
402 if (instruction_size
) {
403 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
, 0);
406 /* skip range check */
414 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
418 const uint8_t cmd_len
= 2;
419 const uint32_t dw0
= GEN6_RENDER_CMD(COMMON
, STATE_SIP
) | (cmd_len
- 2);
421 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
423 ilo_cp_begin(cp
, cmd_len
);
424 ilo_cp_write(cp
, dw0
);
425 ilo_cp_write(cp
, sip
);
430 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
434 const uint8_t cmd_len
= 1;
435 const uint32_t dw0
= GEN6_RENDER_CMD(SINGLE_DW
, 3DSTATE_VF_STATISTICS
) |
438 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
440 ilo_cp_begin(cp
, cmd_len
);
441 ilo_cp_write(cp
, dw0
);
446 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
450 const uint8_t cmd_len
= 1;
451 const uint32_t dw0
= GEN6_RENDER_CMD(SINGLE_DW
, PIPELINE_SELECT
) |
454 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
457 assert(pipeline
== 0x0 || pipeline
== 0x1);
459 ilo_cp_begin(cp
, cmd_len
);
460 ilo_cp_write(cp
, dw0
);
465 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
466 int max_threads
, int num_urb_entries
,
470 const uint8_t cmd_len
= 8;
471 const uint32_t dw0
= GEN6_RENDER_CMD(MEDIA
, MEDIA_VFE_STATE
) |
475 ILO_GPE_VALID_GEN(dev
, 6, 6);
477 dw2
= (max_threads
- 1) << 16 |
478 num_urb_entries
<< 8 |
479 1 << 7 | /* Reset Gateway Timer */
480 1 << 6; /* Bypass Gateway Control */
482 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
483 480; /* CURBE Allocation Size */
485 ilo_cp_begin(cp
, cmd_len
);
486 ilo_cp_write(cp
, dw0
);
487 ilo_cp_write(cp
, 0); /* scratch */
488 ilo_cp_write(cp
, dw2
);
489 ilo_cp_write(cp
, 0); /* MBZ */
490 ilo_cp_write(cp
, dw4
);
491 ilo_cp_write(cp
, 0); /* scoreboard */
498 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
499 uint32_t buf
, int size
,
502 const uint8_t cmd_len
= 4;
503 const uint32_t dw0
= GEN6_RENDER_CMD(MEDIA
, MEDIA_CURBE_LOAD
) |
506 ILO_GPE_VALID_GEN(dev
, 6, 6);
508 assert(buf
% 32 == 0);
509 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
510 size
= align(size
, 32);
512 ilo_cp_begin(cp
, cmd_len
);
513 ilo_cp_write(cp
, dw0
);
514 ilo_cp_write(cp
, 0); /* MBZ */
515 ilo_cp_write(cp
, size
);
516 ilo_cp_write(cp
, buf
);
521 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
522 uint32_t offset
, int num_ids
,
525 const uint8_t cmd_len
= 4;
527 GEN6_RENDER_CMD(MEDIA
, MEDIA_INTERFACE_DESCRIPTOR_LOAD
) | (cmd_len
- 2);
529 ILO_GPE_VALID_GEN(dev
, 6, 6);
531 assert(offset
% 32 == 0);
533 ilo_cp_begin(cp
, cmd_len
);
534 ilo_cp_write(cp
, dw0
);
535 ilo_cp_write(cp
, 0); /* MBZ */
536 /* every ID has 8 DWords */
537 ilo_cp_write(cp
, num_ids
* 8 * 4);
538 ilo_cp_write(cp
, offset
);
543 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
544 int id
, int byte
, int thread_count
,
547 const uint8_t cmd_len
= 2;
548 const uint32_t dw0
= GEN6_RENDER_CMD(MEDIA
, MEDIA_GATEWAY_STATE
) |
552 ILO_GPE_VALID_GEN(dev
, 6, 6);
558 ilo_cp_begin(cp
, cmd_len
);
559 ilo_cp_write(cp
, dw0
);
560 ilo_cp_write(cp
, dw1
);
565 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
566 int thread_count_water_mark
,
570 const uint8_t cmd_len
= 2;
571 const uint32_t dw0
= GEN6_RENDER_CMD(MEDIA
, MEDIA_STATE_FLUSH
) |
575 ILO_GPE_VALID_GEN(dev
, 6, 6);
577 dw1
= thread_count_water_mark
<< 16 |
580 ilo_cp_begin(cp
, cmd_len
);
581 ilo_cp_write(cp
, dw0
);
582 ilo_cp_write(cp
, dw1
);
587 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
590 assert(!"MEDIA_OBJECT_WALKER unsupported");
594 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
595 uint32_t vs_binding_table
,
596 uint32_t gs_binding_table
,
597 uint32_t ps_binding_table
,
600 const uint8_t cmd_len
= 4;
601 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_BINDING_TABLE_POINTERS
) |
602 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED
|
603 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED
|
604 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED
|
607 ILO_GPE_VALID_GEN(dev
, 6, 6);
609 ilo_cp_begin(cp
, cmd_len
);
610 ilo_cp_write(cp
, dw0
);
611 ilo_cp_write(cp
, vs_binding_table
);
612 ilo_cp_write(cp
, gs_binding_table
);
613 ilo_cp_write(cp
, ps_binding_table
);
618 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
619 uint32_t vs_sampler_state
,
620 uint32_t gs_sampler_state
,
621 uint32_t ps_sampler_state
,
624 const uint8_t cmd_len
= 4;
625 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SAMPLER_STATE_POINTERS
) |
626 GEN6_PTR_SAMPLER_DW0_VS_CHANGED
|
627 GEN6_PTR_SAMPLER_DW0_GS_CHANGED
|
628 GEN6_PTR_SAMPLER_DW0_PS_CHANGED
|
631 ILO_GPE_VALID_GEN(dev
, 6, 6);
633 ilo_cp_begin(cp
, cmd_len
);
634 ilo_cp_write(cp
, dw0
);
635 ilo_cp_write(cp
, vs_sampler_state
);
636 ilo_cp_write(cp
, gs_sampler_state
);
637 ilo_cp_write(cp
, ps_sampler_state
);
642 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
643 int vs_total_size
, int gs_total_size
,
644 int vs_entry_size
, int gs_entry_size
,
647 const uint8_t cmd_len
= 3;
648 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_URB
) | (cmd_len
- 2);
649 const int row_size
= 128; /* 1024 bits */
650 int vs_alloc_size
, gs_alloc_size
;
651 int vs_num_entries
, gs_num_entries
;
653 ILO_GPE_VALID_GEN(dev
, 6, 6);
655 /* in 1024-bit URB rows */
656 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
657 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
659 /* the valid range is [1, 5] */
664 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
666 /* the valid range is [24, 256] in multiples of 4 */
667 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
668 if (vs_num_entries
> 256)
669 vs_num_entries
= 256;
670 assert(vs_num_entries
>= 24);
672 /* the valid range is [0, 256] in multiples of 4 */
673 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
674 if (gs_num_entries
> 256)
675 gs_num_entries
= 256;
677 ilo_cp_begin(cp
, cmd_len
);
678 ilo_cp_write(cp
, dw0
);
679 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT
|
680 vs_num_entries
<< GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT
);
681 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT
|
682 (gs_alloc_size
- 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT
);
687 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
688 const struct ilo_ve_state
*ve
,
689 const struct ilo_vb_state
*vb
,
696 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
699 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
701 * "From 1 to 33 VBs can be specified..."
703 assert(ve
->vb_count
<= 33);
708 cmd_len
= 1 + 4 * ve
->vb_count
;
709 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_VERTEX_BUFFERS
) |
712 ilo_cp_begin(cp
, cmd_len
);
713 ilo_cp_write(cp
, dw0
);
715 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
716 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
717 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
718 const struct pipe_vertex_buffer
*cso
= &vb
->states
[pipe_idx
];
721 dw
= hw_idx
<< GEN6_VB_STATE_DW0_INDEX__SHIFT
;
723 if (instance_divisor
)
724 dw
|= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA
;
726 dw
|= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA
;
728 if (dev
->gen
>= ILO_GEN(7))
729 dw
|= GEN7_VB_STATE_DW0_ADDR_MODIFIED
;
731 /* use null vb if there is no buffer or the stride is out of range */
732 if (cso
->buffer
&& cso
->stride
<= 2048) {
733 const struct ilo_buffer
*buf
= ilo_buffer(cso
->buffer
);
734 const uint32_t start_offset
= cso
->buffer_offset
;
735 const uint32_t end_offset
= buf
->bo_size
- 1;
737 dw
|= cso
->stride
<< GEN6_VB_STATE_DW0_PITCH__SHIFT
;
739 ilo_cp_write(cp
, dw
);
740 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, 0);
741 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, 0);
742 ilo_cp_write(cp
, instance_divisor
);
747 ilo_cp_write(cp
, dw
);
750 ilo_cp_write(cp
, instance_divisor
);
758 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
759 int comp0
, int comp1
, int comp2
, int comp3
,
760 struct ilo_ve_cso
*cso
)
762 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
764 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
765 cso
->payload
[0] = GEN6_VE_STATE_DW0_VALID
;
767 comp0
<< GEN6_VE_STATE_DW1_COMP0__SHIFT
|
768 comp1
<< GEN6_VE_STATE_DW1_COMP1__SHIFT
|
769 comp2
<< GEN6_VE_STATE_DW1_COMP2__SHIFT
|
770 comp3
<< GEN6_VE_STATE_DW1_COMP3__SHIFT
;
774 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
775 struct ilo_ve_cso
*cso
)
779 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
782 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
784 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
785 * valid VERTEX_ELEMENT structure.
787 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
788 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
790 * - The Source Element Format must be set to the UINT format.
792 * - [DevSNB]: Edge Flags are not supported for QUADLIST
793 * primitives. Software may elect to convert QUADLIST primitives
794 * to some set of corresponding edge-flag-supported primitive
795 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
798 cso
->payload
[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE
;
800 GEN6_VFCOMP_STORE_SRC
<< GEN6_VE_STATE_DW1_COMP0__SHIFT
|
801 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP1__SHIFT
|
802 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP2__SHIFT
|
803 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP3__SHIFT
;
806 * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
807 * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
808 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
810 * Since all the hardware cares about is whether the flags are zero or not,
811 * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
813 format
= (cso
->payload
[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT
) & 0x1ff;
814 if (format
== GEN6_FORMAT_R32_FLOAT
) {
815 STATIC_ASSERT(GEN6_FORMAT_R32_UINT
== GEN6_FORMAT_R32_FLOAT
- 1);
816 cso
->payload
[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT
);
819 assert(format
== GEN6_FORMAT_R8_UINT
);
824 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
825 const struct ilo_ve_state
*ve
,
826 bool last_velement_edgeflag
,
827 bool prepend_generated_ids
,
834 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
837 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
839 * "Up to 34 (DevSNB+) vertex elements are supported."
841 assert(ve
->count
+ prepend_generated_ids
<= 34);
843 if (!ve
->count
&& !prepend_generated_ids
) {
844 struct ilo_ve_cso dummy
;
846 ve_init_cso_with_components(dev
,
850 GEN6_VFCOMP_STORE_1_FP
,
854 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_VERTEX_ELEMENTS
) |
857 ilo_cp_begin(cp
, cmd_len
);
858 ilo_cp_write(cp
, dw0
);
859 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
865 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
866 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_VERTEX_ELEMENTS
) |
869 ilo_cp_begin(cp
, cmd_len
);
870 ilo_cp_write(cp
, dw0
);
872 if (prepend_generated_ids
) {
873 struct ilo_ve_cso gen_ids
;
875 ve_init_cso_with_components(dev
,
876 GEN6_VFCOMP_STORE_VID
,
877 GEN6_VFCOMP_STORE_IID
,
882 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
885 if (last_velement_edgeflag
) {
886 struct ilo_ve_cso edgeflag
;
888 for (i
= 0; i
< ve
->count
- 1; i
++)
889 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
891 edgeflag
= ve
->cso
[i
];
892 ve_set_cso_edgeflag(dev
, &edgeflag
);
893 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
896 for (i
= 0; i
< ve
->count
; i
++)
897 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
904 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
905 const struct ilo_ib_state
*ib
,
906 bool enable_cut_index
,
909 const uint8_t cmd_len
= 3;
910 struct ilo_buffer
*buf
= ilo_buffer(ib
->hw_resource
);
911 uint32_t start_offset
, end_offset
;
915 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
920 /* this is moved to the new 3DSTATE_VF */
921 if (dev
->gen
>= ILO_GEN(7.5))
922 assert(!enable_cut_index
);
924 switch (ib
->hw_index_size
) {
926 format
= GEN6_IB_DW0_FORMAT_DWORD
;
929 format
= GEN6_IB_DW0_FORMAT_WORD
;
932 format
= GEN6_IB_DW0_FORMAT_BYTE
;
935 assert(!"unknown index size");
936 format
= GEN6_IB_DW0_FORMAT_BYTE
;
941 * set start_offset to 0 here and adjust pipe_draw_info::start with
942 * ib->draw_start_offset in 3DPRIMITIVE
945 end_offset
= buf
->bo_size
;
947 /* end_offset must also be aligned and is inclusive */
948 end_offset
-= (end_offset
% ib
->hw_index_size
);
951 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_INDEX_BUFFER
) |
954 if (enable_cut_index
)
955 dw0
|= GEN6_IB_DW0_CUT_INDEX_ENABLE
;
957 ilo_cp_begin(cp
, cmd_len
);
958 ilo_cp_write(cp
, dw0
);
959 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, 0);
960 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, 0);
965 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
966 uint32_t clip_viewport
,
967 uint32_t sf_viewport
,
968 uint32_t cc_viewport
,
971 const uint8_t cmd_len
= 4;
972 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_VIEWPORT_STATE_POINTERS
) |
973 GEN6_PTR_VP_DW0_CLIP_CHANGED
|
974 GEN6_PTR_VP_DW0_SF_CHANGED
|
975 GEN6_PTR_VP_DW0_CC_CHANGED
|
978 ILO_GPE_VALID_GEN(dev
, 6, 6);
980 ilo_cp_begin(cp
, cmd_len
);
981 ilo_cp_write(cp
, dw0
);
982 ilo_cp_write(cp
, clip_viewport
);
983 ilo_cp_write(cp
, sf_viewport
);
984 ilo_cp_write(cp
, cc_viewport
);
989 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
990 uint32_t blend_state
,
991 uint32_t depth_stencil_state
,
992 uint32_t color_calc_state
,
995 const uint8_t cmd_len
= 4;
996 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CC_STATE_POINTERS
) |
999 ILO_GPE_VALID_GEN(dev
, 6, 6);
1001 ilo_cp_begin(cp
, cmd_len
);
1002 ilo_cp_write(cp
, dw0
);
1003 ilo_cp_write(cp
, blend_state
| 1);
1004 ilo_cp_write(cp
, depth_stencil_state
| 1);
1005 ilo_cp_write(cp
, color_calc_state
| 1);
1010 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1011 uint32_t scissor_rect
,
1014 const uint8_t cmd_len
= 2;
1015 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SCISSOR_STATE_POINTERS
) |
1018 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1020 ilo_cp_begin(cp
, cmd_len
);
1021 ilo_cp_write(cp
, dw0
);
1022 ilo_cp_write(cp
, scissor_rect
);
1027 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1028 const struct ilo_shader_state
*vs
,
1032 const uint8_t cmd_len
= 6;
1033 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_VS
) | (cmd_len
- 2);
1034 const struct ilo_shader_cso
*cso
;
1035 uint32_t dw2
, dw4
, dw5
;
1037 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1040 ilo_cp_begin(cp
, cmd_len
);
1041 ilo_cp_write(cp
, dw0
);
1042 ilo_cp_write(cp
, 0);
1043 ilo_cp_write(cp
, 0);
1044 ilo_cp_write(cp
, 0);
1045 ilo_cp_write(cp
, 0);
1046 ilo_cp_write(cp
, 0);
1051 cso
= ilo_shader_get_kernel_cso(vs
);
1052 dw2
= cso
->payload
[0];
1053 dw4
= cso
->payload
[1];
1054 dw5
= cso
->payload
[2];
1056 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
1058 ilo_cp_begin(cp
, cmd_len
);
1059 ilo_cp_write(cp
, dw0
);
1060 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1061 ilo_cp_write(cp
, dw2
);
1062 ilo_cp_write(cp
, 0); /* scratch */
1063 ilo_cp_write(cp
, dw4
);
1064 ilo_cp_write(cp
, dw5
);
1069 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1070 const struct ilo_shader_state
*gs
,
1071 const struct ilo_shader_state
*vs
,
1075 const uint8_t cmd_len
= 7;
1076 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_GS
) | (cmd_len
- 2);
1077 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1079 ILO_GPE_VALID_GEN(dev
, 6, 6);
1082 const struct ilo_shader_cso
*cso
;
1084 dw1
= ilo_shader_get_kernel_offset(gs
);
1086 cso
= ilo_shader_get_kernel_cso(gs
);
1087 dw2
= cso
->payload
[0];
1088 dw4
= cso
->payload
[1];
1089 dw5
= cso
->payload
[2];
1090 dw6
= cso
->payload
[3];
1092 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1093 struct ilo_shader_cso cso
;
1094 enum ilo_kernel_param param
;
1096 switch (verts_per_prim
) {
1098 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1101 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1104 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1108 dw1
= ilo_shader_get_kernel_offset(vs
) +
1109 ilo_shader_get_kernel_param(vs
, param
);
1111 /* cannot use VS's CSO */
1112 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1113 dw2
= cso
.payload
[0];
1114 dw4
= cso
.payload
[1];
1115 dw5
= cso
.payload
[2];
1116 dw6
= cso
.payload
[3];
1121 dw4
= 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT
;
1122 dw5
= GEN6_GS_DW5_STATISTICS
;
1126 ilo_cp_begin(cp
, cmd_len
);
1127 ilo_cp_write(cp
, dw0
);
1128 ilo_cp_write(cp
, dw1
);
1129 ilo_cp_write(cp
, dw2
);
1130 ilo_cp_write(cp
, 0);
1131 ilo_cp_write(cp
, dw4
);
1132 ilo_cp_write(cp
, dw5
);
1133 ilo_cp_write(cp
, dw6
);
1138 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1139 const struct ilo_rasterizer_state
*rasterizer
,
1140 const struct ilo_shader_state
*fs
,
1141 bool enable_guardband
,
1145 const uint8_t cmd_len
= 4;
1146 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CLIP
) | (cmd_len
- 2);
1147 uint32_t dw1
, dw2
, dw3
;
1149 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1154 dw1
= rasterizer
->clip
.payload
[0];
1155 dw2
= rasterizer
->clip
.payload
[1];
1156 dw3
= rasterizer
->clip
.payload
[2];
1158 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1159 dw2
|= GEN6_CLIP_DW2_GB_TEST_ENABLE
;
1161 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1162 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1164 if (interps
& (GEN6_INTERP_NONPERSPECTIVE_PIXEL
|
1165 GEN6_INTERP_NONPERSPECTIVE_CENTROID
|
1166 GEN6_INTERP_NONPERSPECTIVE_SAMPLE
))
1167 dw2
|= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE
;
1169 dw3
|= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO
|
1170 (num_viewports
- 1);
1178 ilo_cp_begin(cp
, cmd_len
);
1179 ilo_cp_write(cp
, dw0
);
1180 ilo_cp_write(cp
, dw1
);
1181 ilo_cp_write(cp
, dw2
);
1182 ilo_cp_write(cp
, dw3
);
1187 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1188 const struct ilo_rasterizer_state
*rasterizer
,
1189 const struct ilo_shader_state
*fs
,
1192 const uint8_t cmd_len
= 20;
1193 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SF
) | (cmd_len
- 2);
1194 uint32_t payload_raster
[6], payload_sbe
[13];
1196 ILO_GPE_VALID_GEN(dev
, 6, 6);
1198 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1199 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1200 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1201 fs
, payload_sbe
, Elements(payload_sbe
));
1203 ilo_cp_begin(cp
, cmd_len
);
1204 ilo_cp_write(cp
, dw0
);
1205 ilo_cp_write(cp
, payload_sbe
[0]);
1206 ilo_cp_write_multi(cp
, payload_raster
, 6);
1207 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1212 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1213 const struct ilo_shader_state
*fs
,
1215 const struct ilo_rasterizer_state
*rasterizer
,
1216 bool dual_blend
, bool cc_may_kill
,
1220 const uint8_t cmd_len
= 9;
1221 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_WM
) | (cmd_len
- 2);
1222 const int num_samples
= 1;
1223 const struct ilo_shader_cso
*fs_cso
;
1224 uint32_t dw2
, dw4
, dw5
, dw6
;
1226 ILO_GPE_VALID_GEN(dev
, 6, 6);
1229 /* see brwCreateContext() */
1230 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
1232 ilo_cp_begin(cp
, cmd_len
);
1233 ilo_cp_write(cp
, dw0
);
1234 ilo_cp_write(cp
, 0);
1235 ilo_cp_write(cp
, 0);
1236 ilo_cp_write(cp
, 0);
1237 ilo_cp_write(cp
, hiz_op
);
1238 /* honor the valid range even if dispatching is disabled */
1239 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT
);
1240 ilo_cp_write(cp
, 0);
1241 ilo_cp_write(cp
, 0);
1242 ilo_cp_write(cp
, 0);
1248 fs_cso
= ilo_shader_get_kernel_cso(fs
);
1249 dw2
= fs_cso
->payload
[0];
1250 dw4
= fs_cso
->payload
[1];
1251 dw5
= fs_cso
->payload
[2];
1252 dw6
= fs_cso
->payload
[3];
1254 dw2
|= (num_samplers
+ 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
1257 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1259 * "This bit (Statistics Enable) must be disabled if either of these
1260 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1261 * Enable or Depth Buffer Resolve Enable."
1264 dw4
|= GEN6_WM_DW4_STATISTICS
;
1267 dw5
|= GEN6_WM_DW5_PS_KILL
| GEN6_WM_DW5_PS_ENABLE
;
1270 dw5
|= GEN6_WM_DW5_DUAL_SOURCE_BLEND
;
1272 dw5
|= rasterizer
->wm
.payload
[0];
1274 dw6
|= rasterizer
->wm
.payload
[1];
1276 if (num_samples
> 1) {
1277 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
1278 rasterizer
->wm
.dw_msaa_disp
;
1281 ilo_cp_begin(cp
, cmd_len
);
1282 ilo_cp_write(cp
, dw0
);
1283 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
1284 ilo_cp_write(cp
, dw2
);
1285 ilo_cp_write(cp
, 0); /* scratch */
1286 ilo_cp_write(cp
, dw4
);
1287 ilo_cp_write(cp
, dw5
);
1288 ilo_cp_write(cp
, dw6
);
1289 ilo_cp_write(cp
, 0); /* kernel 1 */
1290 ilo_cp_write(cp
, 0); /* kernel 2 */
1294 static inline unsigned
1295 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
1296 const uint32_t *bufs
, const int *sizes
,
1297 int num_bufs
, int max_read_length
,
1298 uint32_t *dw
, int num_dwords
)
1300 unsigned enabled
= 0x0;
1301 int total_read_length
, i
;
1303 assert(num_dwords
== 4);
1305 total_read_length
= 0;
1306 for (i
= 0; i
< 4; i
++) {
1307 if (i
< num_bufs
&& sizes
[i
]) {
1308 /* in 256-bit units minus one */
1309 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
1311 assert(bufs
[i
] % 32 == 0);
1312 assert(read_len
< 32);
1315 dw
[i
] = bufs
[i
] | read_len
;
1317 total_read_length
+= read_len
+ 1;
1324 assert(total_read_length
<= max_read_length
);
1330 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
1331 const uint32_t *bufs
, const int *sizes
,
1335 const uint8_t cmd_len
= 5;
1336 uint32_t buf_dw
[4], buf_enabled
;
1339 ILO_GPE_VALID_GEN(dev
, 6, 6);
1340 assert(num_bufs
<= 4);
1343 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1345 * "The sum of all four read length fields (each incremented to
1346 * represent the actual read length) must be less than or equal to 32"
1348 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1349 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
1351 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CONSTANT_VS
) |
1355 ilo_cp_begin(cp
, cmd_len
);
1356 ilo_cp_write(cp
, dw0
);
1357 ilo_cp_write(cp
, buf_dw
[0]);
1358 ilo_cp_write(cp
, buf_dw
[1]);
1359 ilo_cp_write(cp
, buf_dw
[2]);
1360 ilo_cp_write(cp
, buf_dw
[3]);
1365 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
1366 const uint32_t *bufs
, const int *sizes
,
1370 const uint8_t cmd_len
= 5;
1371 uint32_t buf_dw
[4], buf_enabled
;
1374 ILO_GPE_VALID_GEN(dev
, 6, 6);
1375 assert(num_bufs
<= 4);
1378 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1380 * "The sum of all four read length fields (each incremented to
1381 * represent the actual read length) must be less than or equal to 64"
1383 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1384 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1386 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CONSTANT_GS
) |
1390 ilo_cp_begin(cp
, cmd_len
);
1391 ilo_cp_write(cp
, dw0
);
1392 ilo_cp_write(cp
, buf_dw
[0]);
1393 ilo_cp_write(cp
, buf_dw
[1]);
1394 ilo_cp_write(cp
, buf_dw
[2]);
1395 ilo_cp_write(cp
, buf_dw
[3]);
1400 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
1401 const uint32_t *bufs
, const int *sizes
,
1405 const uint8_t cmd_len
= 5;
1406 uint32_t buf_dw
[4], buf_enabled
;
1409 ILO_GPE_VALID_GEN(dev
, 6, 6);
1410 assert(num_bufs
<= 4);
1413 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1415 * "The sum of all four read length fields (each incremented to
1416 * represent the actual read length) must be less than or equal to 64"
1418 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1419 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1421 dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CONSTANT_PS
) |
1425 ilo_cp_begin(cp
, cmd_len
);
1426 ilo_cp_write(cp
, dw0
);
1427 ilo_cp_write(cp
, buf_dw
[0]);
1428 ilo_cp_write(cp
, buf_dw
[1]);
1429 ilo_cp_write(cp
, buf_dw
[2]);
1430 ilo_cp_write(cp
, buf_dw
[3]);
1435 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
1436 unsigned sample_mask
,
1439 const uint8_t cmd_len
= 2;
1440 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SAMPLE_MASK
) |
1442 const unsigned valid_mask
= 0xf;
1444 ILO_GPE_VALID_GEN(dev
, 6, 6);
1446 sample_mask
&= valid_mask
;
1448 ilo_cp_begin(cp
, cmd_len
);
1449 ilo_cp_write(cp
, dw0
);
1450 ilo_cp_write(cp
, sample_mask
);
1455 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
1456 unsigned x
, unsigned y
,
1457 unsigned width
, unsigned height
,
1460 const uint8_t cmd_len
= 4;
1461 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_DRAWING_RECTANGLE
) |
1463 unsigned xmax
= x
+ width
- 1;
1464 unsigned ymax
= y
+ height
- 1;
1467 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1469 if (dev
->gen
>= ILO_GEN(7)) {
1474 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1476 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1477 * must be an even number"
1484 if (x
> rect_limit
) x
= rect_limit
;
1485 if (y
> rect_limit
) y
= rect_limit
;
1486 if (xmax
> rect_limit
) xmax
= rect_limit
;
1487 if (ymax
> rect_limit
) ymax
= rect_limit
;
1489 ilo_cp_begin(cp
, cmd_len
);
1490 ilo_cp_write(cp
, dw0
);
1491 ilo_cp_write(cp
, y
<< 16 | x
);
1492 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
1495 * There is no need to set the origin. It is intended to support front
1498 ilo_cp_write(cp
, 0);
1504 zs_align_surface(const struct ilo_dev_info
*dev
,
1505 unsigned align_w
, unsigned align_h
,
1506 struct ilo_zs_surface
*zs
)
1508 unsigned mask
, shift_w
, shift_h
;
1509 unsigned width
, height
;
1512 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1514 if (dev
->gen
>= ILO_GEN(7)) {
1525 dw3
= zs
->payload
[2];
1527 /* aligned width and height */
1528 width
= align(((dw3
>> shift_w
) & mask
) + 1, align_w
);
1529 height
= align(((dw3
>> shift_h
) & mask
) + 1, align_h
);
1531 dw3
= (dw3
& ~((mask
<< shift_w
) | (mask
<< shift_h
))) |
1532 (width
- 1) << shift_w
|
1533 (height
- 1) << shift_h
;
1535 zs
->payload
[2] = dw3
;
1539 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1540 const struct ilo_zs_surface
*zs
,
1543 const uint8_t cmd_len
= 7;
1546 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1548 dw0
= (dev
->gen
>= ILO_GEN(7)) ?
1549 GEN7_RENDER_CMD(3D
, 3DSTATE_DEPTH_BUFFER
) :
1550 GEN6_RENDER_CMD(3D
, 3DSTATE_DEPTH_BUFFER
);
1551 dw0
|= (cmd_len
- 2);
1553 ilo_cp_begin(cp
, cmd_len
);
1554 ilo_cp_write(cp
, dw0
);
1555 ilo_cp_write(cp
, zs
->payload
[0]);
1556 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
, INTEL_RELOC_WRITE
);
1557 ilo_cp_write(cp
, zs
->payload
[2]);
1558 ilo_cp_write(cp
, zs
->payload
[3]);
1559 ilo_cp_write(cp
, zs
->payload
[4]);
1560 ilo_cp_write(cp
, zs
->payload
[5]);
1565 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
1566 int x_offset
, int y_offset
,
1569 const uint8_t cmd_len
= 2;
1570 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_POLY_STIPPLE_OFFSET
) |
1573 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1574 assert(x_offset
>= 0 && x_offset
<= 31);
1575 assert(y_offset
>= 0 && y_offset
<= 31);
1577 ilo_cp_begin(cp
, cmd_len
);
1578 ilo_cp_write(cp
, dw0
);
1579 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
1584 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
1585 const struct pipe_poly_stipple
*pattern
,
1588 const uint8_t cmd_len
= 33;
1589 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_POLY_STIPPLE_PATTERN
) |
1593 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1594 assert(Elements(pattern
->stipple
) == 32);
1596 ilo_cp_begin(cp
, cmd_len
);
1597 ilo_cp_write(cp
, dw0
);
1598 for (i
= 0; i
< 32; i
++)
1599 ilo_cp_write(cp
, pattern
->stipple
[i
]);
1604 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
1605 unsigned pattern
, unsigned factor
,
1608 const uint8_t cmd_len
= 3;
1609 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_LINE_STIPPLE
) |
1613 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1614 assert((pattern
& 0xffff) == pattern
);
1615 assert(factor
>= 1 && factor
<= 256);
1617 ilo_cp_begin(cp
, cmd_len
);
1618 ilo_cp_write(cp
, dw0
);
1619 ilo_cp_write(cp
, pattern
);
1621 if (dev
->gen
>= ILO_GEN(7)) {
1623 inverse
= (unsigned) (65536.0f
/ factor
);
1624 ilo_cp_write(cp
, inverse
<< 15 | factor
);
1628 inverse
= (unsigned) (8192.0f
/ factor
);
1629 ilo_cp_write(cp
, inverse
<< 16 | factor
);
1636 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
1639 const uint8_t cmd_len
= 3;
1640 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_AA_LINE_PARAMETERS
) |
1643 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1645 ilo_cp_begin(cp
, cmd_len
);
1646 ilo_cp_write(cp
, dw0
);
1647 ilo_cp_write(cp
, 0 << 16 | 0);
1648 ilo_cp_write(cp
, 0 << 16 | 0);
1653 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
1654 int index
, unsigned svbi
,
1656 bool load_vertex_count
,
1659 const uint8_t cmd_len
= 4;
1660 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_GS_SVB_INDEX
) |
1664 ILO_GPE_VALID_GEN(dev
, 6, 6);
1665 assert(index
>= 0 && index
< 4);
1667 dw1
= index
<< GEN6_SVBI_DW1_INDEX__SHIFT
;
1668 if (load_vertex_count
)
1669 dw1
|= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT
;
1671 ilo_cp_begin(cp
, cmd_len
);
1672 ilo_cp_write(cp
, dw0
);
1673 ilo_cp_write(cp
, dw1
);
1674 ilo_cp_write(cp
, svbi
);
1675 ilo_cp_write(cp
, max_svbi
);
1680 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
1682 const uint32_t *packed_sample_pos
,
1683 bool pixel_location_center
,
1686 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
1687 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_MULTISAMPLE
) |
1689 uint32_t dw1
, dw2
, dw3
;
1691 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1693 dw1
= (pixel_location_center
) ?
1694 GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER
: GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER
;
1696 switch (num_samples
) {
1699 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1
;
1704 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4
;
1705 dw2
= packed_sample_pos
[0];
1709 assert(dev
->gen
>= ILO_GEN(7));
1710 dw1
|= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8
;
1711 dw2
= packed_sample_pos
[0];
1712 dw3
= packed_sample_pos
[1];
1715 assert(!"unsupported sample count");
1716 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1
;
1722 ilo_cp_begin(cp
, cmd_len
);
1723 ilo_cp_write(cp
, dw0
);
1724 ilo_cp_write(cp
, dw1
);
1725 ilo_cp_write(cp
, dw2
);
1726 if (dev
->gen
>= ILO_GEN(7))
1727 ilo_cp_write(cp
, dw3
);
1732 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
1733 const struct ilo_zs_surface
*zs
,
1736 const uint8_t cmd_len
= 3;
1739 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1741 dw0
= (dev
->gen
>= ILO_GEN(7)) ?
1742 GEN7_RENDER_CMD(3D
, 3DSTATE_STENCIL_BUFFER
) :
1743 GEN6_RENDER_CMD(3D
, 3DSTATE_STENCIL_BUFFER
);
1744 dw0
|= (cmd_len
- 2);
1746 ilo_cp_begin(cp
, cmd_len
);
1747 ilo_cp_write(cp
, dw0
);
1748 /* see ilo_gpe_init_zs_surface() */
1749 ilo_cp_write(cp
, zs
->payload
[6]);
1750 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
, INTEL_RELOC_WRITE
);
1755 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1756 const struct ilo_zs_surface
*zs
,
1759 const uint8_t cmd_len
= 3;
1762 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1764 dw0
= (dev
->gen
>= ILO_GEN(7)) ?
1765 GEN7_RENDER_CMD(3D
, 3DSTATE_HIER_DEPTH_BUFFER
) :
1766 GEN6_RENDER_CMD(3D
, 3DSTATE_HIER_DEPTH_BUFFER
);
1767 dw0
|= (cmd_len
- 2);
1769 ilo_cp_begin(cp
, cmd_len
);
1770 ilo_cp_write(cp
, dw0
);
1771 /* see ilo_gpe_init_zs_surface() */
1772 ilo_cp_write(cp
, zs
->payload
[8]);
1773 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
, INTEL_RELOC_WRITE
);
1778 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
1782 const uint8_t cmd_len
= 2;
1783 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_CLEAR_PARAMS
) |
1784 GEN6_CLEAR_PARAMS_DW0_VALID
|
1787 ILO_GPE_VALID_GEN(dev
, 6, 6);
1789 ilo_cp_begin(cp
, cmd_len
);
1790 ilo_cp_write(cp
, dw0
);
1791 ilo_cp_write(cp
, clear_val
);
1796 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
1798 struct intel_bo
*bo
, uint32_t bo_offset
,
1802 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
1803 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, PIPE_CONTROL
) | (cmd_len
- 2);
1804 uint32_t reloc_flags
= INTEL_RELOC_WRITE
;
1806 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1808 assert(bo_offset
% ((write_qword
) ? 8 : 4) == 0);
1810 if (dw1
& GEN6_PIPE_CONTROL_CS_STALL
) {
1812 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1814 * "1 of the following must also be set (when CS stall is set):
1816 * * Depth Cache Flush Enable ([0] of DW1)
1817 * * Stall at Pixel Scoreboard ([1] of DW1)
1818 * * Depth Stall ([13] of DW1)
1819 * * Post-Sync Operation ([13] of DW1)
1820 * * Render Target Cache Flush Enable ([12] of DW1)
1821 * * Notify Enable ([8] of DW1)"
1823 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1825 * "One of the following must also be set (when CS stall is set):
1827 * * Render Target Cache Flush Enable ([12] of DW1)
1828 * * Depth Cache Flush Enable ([0] of DW1)
1829 * * Stall at Pixel Scoreboard ([1] of DW1)
1830 * * Depth Stall ([13] of DW1)
1831 * * Post-Sync Operation ([13] of DW1)"
1833 uint32_t bit_test
= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
1834 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1835 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL
|
1836 GEN6_PIPE_CONTROL_DEPTH_STALL
;
1839 bit_test
|= GEN6_PIPE_CONTROL_WRITE_IMM
|
1840 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT
|
1841 GEN6_PIPE_CONTROL_WRITE_TIMESTAMP
;
1843 if (dev
->gen
== ILO_GEN(6))
1844 bit_test
|= GEN6_PIPE_CONTROL_NOTIFY_ENABLE
;
1846 assert(dw1
& bit_test
);
1849 if (dw1
& GEN6_PIPE_CONTROL_DEPTH_STALL
) {
1851 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1853 * "Following bits must be clear (when Depth Stall is set):
1855 * * Render Target Cache Flush Enable ([12] of DW1)
1856 * * Depth Cache Flush Enable ([0] of DW1)"
1858 assert(!(dw1
& (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
1859 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
1863 * From the Sandy Bridge PRM, volume 1 part 3, page 19:
1865 * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
1866 * and PIPE_CONTROL are not supported."
1868 * The kernel will add the mapping automatically (when write domain is
1869 * INTEL_DOMAIN_INSTRUCTION).
1871 if (dev
->gen
== ILO_GEN(6) && bo
) {
1872 bo_offset
|= GEN6_PIPE_CONTROL_DW2_USE_GGTT
;
1873 reloc_flags
|= INTEL_RELOC_GGTT
;
1876 ilo_cp_begin(cp
, cmd_len
);
1877 ilo_cp_write(cp
, dw0
);
1878 ilo_cp_write(cp
, dw1
);
1879 ilo_cp_write_bo(cp
, bo_offset
, bo
, reloc_flags
);
1880 ilo_cp_write(cp
, 0);
1882 ilo_cp_write(cp
, 0);
1887 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1888 const struct pipe_draw_info
*info
,
1889 const struct ilo_ib_state
*ib
,
1893 const uint8_t cmd_len
= 6;
1894 const int prim
= (rectlist
) ?
1895 GEN6_3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1896 const int vb_access
= (info
->indexed
) ?
1897 GEN6_3DPRIM_DW0_ACCESS_RANDOM
: GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL
;
1898 const uint32_t vb_start
= info
->start
+
1899 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1902 ILO_GPE_VALID_GEN(dev
, 6, 6);
1904 dw0
= GEN6_RENDER_CMD(3D
, 3DPRIMITIVE
) |
1906 prim
<< GEN6_3DPRIM_DW0_TYPE__SHIFT
|
1909 ilo_cp_begin(cp
, cmd_len
);
1910 ilo_cp_write(cp
, dw0
);
1911 ilo_cp_write(cp
, info
->count
);
1912 ilo_cp_write(cp
, vb_start
);
1913 ilo_cp_write(cp
, info
->instance_count
);
1914 ilo_cp_write(cp
, info
->start_instance
);
1915 ilo_cp_write(cp
, info
->index_bias
);
1919 static inline uint32_t
1920 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
1921 const struct ilo_shader_state
**cs
,
1922 uint32_t *sampler_state
,
1924 uint32_t *binding_table_state
,
1930 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1932 * "(Interface Descriptor Total Length) This field must have the same
1933 * alignment as the Interface Descriptor Data Start Address.
1935 * It must be DQWord (32-byte) aligned..."
1937 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1939 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1940 * aligned address of the Interface Descriptor data."
1942 const int state_align
= 32 / 4;
1943 const int state_len
= (32 / 4) * num_ids
;
1944 uint32_t state_offset
, *dw
;
1947 ILO_GPE_VALID_GEN(dev
, 6, 6);
1949 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_BLOB
,
1950 state_len
, state_align
, &state_offset
);
1952 for (i
= 0; i
< num_ids
; i
++) {
1953 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
1954 dw
[1] = 1 << 18; /* SPF */
1955 dw
[2] = sampler_state
[i
] |
1956 (num_samplers
[i
] + 3) / 4 << 2;
1957 dw
[3] = binding_table_state
[i
] |
1959 dw
[4] = 0 << 16 | /* CURBE Read Length */
1960 0; /* CURBE Read Offset */
1961 dw
[5] = 0; /* Barrier ID */
1968 return state_offset
;
1971 static inline uint32_t
1972 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
1973 const struct ilo_viewport_cso
*viewports
,
1974 unsigned num_viewports
,
1977 const int state_align
= 32 / 4;
1978 const int state_len
= 8 * num_viewports
;
1979 uint32_t state_offset
, *dw
;
1982 ILO_GPE_VALID_GEN(dev
, 6, 6);
1985 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1987 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1988 * stored as an array of up to 16 elements..."
1990 assert(num_viewports
&& num_viewports
<= 16);
1992 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_SF_VIEWPORT
,
1993 state_len
, state_align
, &state_offset
);
1995 for (i
= 0; i
< num_viewports
; i
++) {
1996 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1998 dw
[0] = fui(vp
->m00
);
1999 dw
[1] = fui(vp
->m11
);
2000 dw
[2] = fui(vp
->m22
);
2001 dw
[3] = fui(vp
->m30
);
2002 dw
[4] = fui(vp
->m31
);
2003 dw
[5] = fui(vp
->m32
);
2010 return state_offset
;
2013 static inline uint32_t
2014 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
2015 const struct ilo_viewport_cso
*viewports
,
2016 unsigned num_viewports
,
2019 const int state_align
= 32 / 4;
2020 const int state_len
= 4 * num_viewports
;
2021 uint32_t state_offset
, *dw
;
2024 ILO_GPE_VALID_GEN(dev
, 6, 6);
2027 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
2029 * "The viewport-related state is stored as an array of up to 16
2032 assert(num_viewports
&& num_viewports
<= 16);
2034 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_CLIP_VIEWPORT
,
2035 state_len
, state_align
, &state_offset
);
2037 for (i
= 0; i
< num_viewports
; i
++) {
2038 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2040 dw
[0] = fui(vp
->min_gbx
);
2041 dw
[1] = fui(vp
->max_gbx
);
2042 dw
[2] = fui(vp
->min_gby
);
2043 dw
[3] = fui(vp
->max_gby
);
2048 return state_offset
;
2051 static inline uint32_t
2052 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
2053 const struct ilo_viewport_cso
*viewports
,
2054 unsigned num_viewports
,
2057 const int state_align
= 32 / 4;
2058 const int state_len
= 2 * num_viewports
;
2059 uint32_t state_offset
, *dw
;
2062 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2065 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2067 * "The viewport state is stored as an array of up to 16 elements..."
2069 assert(num_viewports
&& num_viewports
<= 16);
2071 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_CC_VIEWPORT
,
2072 state_len
, state_align
, &state_offset
);
2074 for (i
= 0; i
< num_viewports
; i
++) {
2075 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2077 dw
[0] = fui(vp
->min_z
);
2078 dw
[1] = fui(vp
->max_z
);
2083 return state_offset
;
2086 static inline uint32_t
2087 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
2088 const struct pipe_stencil_ref
*stencil_ref
,
2090 const struct pipe_blend_color
*blend_color
,
2093 const int state_align
= 64 / 4;
2094 const int state_len
= 6;
2095 uint32_t state_offset
, *dw
;
2097 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2099 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_COLOR_CALC
,
2100 state_len
, state_align
, &state_offset
);
2102 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
2103 stencil_ref
->ref_value
[1] << 16 |
2104 GEN6_CC_DW0_ALPHATEST_UNORM8
;
2106 dw
[2] = fui(blend_color
->color
[0]);
2107 dw
[3] = fui(blend_color
->color
[1]);
2108 dw
[4] = fui(blend_color
->color
[2]);
2109 dw
[5] = fui(blend_color
->color
[3]);
2111 return state_offset
;
2114 static inline uint32_t
2115 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
2116 const struct ilo_blend_state
*blend
,
2117 const struct ilo_fb_state
*fb
,
2118 const struct ilo_dsa_state
*dsa
,
2121 const int state_align
= 64 / 4;
2123 uint32_t state_offset
, *dw
;
2124 unsigned num_targets
, i
;
2126 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2129 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2131 * "The blend state is stored as an array of up to 8 elements..."
2133 num_targets
= fb
->state
.nr_cbufs
;
2134 assert(num_targets
<= 8);
2139 /* to be able to reference alpha func */
2143 state_len
= 2 * num_targets
;
2145 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_BLEND
,
2146 state_len
, state_align
, &state_offset
);
2148 for (i
= 0; i
< num_targets
; i
++) {
2149 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
2150 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
2151 const int num_samples
= fb
->num_samples
;
2152 const struct util_format_description
*format_desc
=
2153 (idx
< fb
->state
.nr_cbufs
&& fb
->state
.cbufs
[idx
]) ?
2154 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
2155 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
2158 rt_is_pure_integer
= false;
2159 rt_dst_alpha_forced_one
= false;
2164 switch (format_desc
->format
) {
2165 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2166 /* force alpha to one when the HW format has alpha */
2167 assert(ilo_translate_render_format(dev
, PIPE_FORMAT_B8G8R8X8_UNORM
)
2168 == GEN6_FORMAT_B8G8R8A8_UNORM
);
2169 rt_dst_alpha_forced_one
= true;
2175 for (ch
= 0; ch
< 4; ch
++) {
2176 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
2179 if (format_desc
->channel
[ch
].pure_integer
) {
2180 rt_is_unorm
= false;
2181 rt_is_pure_integer
= true;
2185 if (!format_desc
->channel
[ch
].normalized
||
2186 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
2187 rt_is_unorm
= false;
2191 dw
[0] = cso
->payload
[0];
2192 dw
[1] = cso
->payload
[1];
2194 if (!rt_is_pure_integer
) {
2195 if (rt_dst_alpha_forced_one
)
2196 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
2198 dw
[0] |= cso
->dw_blend
;
2202 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2204 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2205 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2207 * Since logicop is ignored for non-UNORM color buffers, no special care
2211 dw
[1] |= cso
->dw_logicop
;
2214 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2216 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2217 * Dither both must be disabled."
2219 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2220 * requires that anyway.
2222 if (num_samples
> 1)
2223 dw
[1] |= cso
->dw_alpha_mod
;
2226 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2228 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2231 if (!rt_is_pure_integer
)
2232 dw
[1] |= dsa
->dw_alpha
;
2237 return state_offset
;
2240 static inline uint32_t
2241 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
2242 const struct ilo_dsa_state
*dsa
,
2245 const int state_align
= 64 / 4;
2246 const int state_len
= 3;
2247 uint32_t state_offset
, *dw
;
2250 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2252 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_DEPTH_STENCIL
,
2253 state_len
, state_align
, &state_offset
);
2255 dw
[0] = dsa
->payload
[0];
2256 dw
[1] = dsa
->payload
[1];
2257 dw
[2] = dsa
->payload
[2];
2259 return state_offset
;
2262 static inline uint32_t
2263 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
2264 const struct ilo_scissor_state
*scissor
,
2265 unsigned num_viewports
,
2268 const int state_align
= 32 / 4;
2269 const int state_len
= 2 * num_viewports
;
2270 uint32_t state_offset
, *dw
;
2272 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2275 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2277 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2278 * stored as an array of up to 16 elements..."
2280 assert(num_viewports
&& num_viewports
<= 16);
2282 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_SCISSOR_RECT
,
2283 state_len
, state_align
, &state_offset
);
2285 memcpy(dw
, scissor
->payload
, state_len
* 4);
2287 return state_offset
;
2290 static inline uint32_t
2291 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
2292 uint32_t *surface_states
,
2293 int num_surface_states
,
2296 const int state_align
= 32 / 4;
2297 const int state_len
= num_surface_states
;
2298 uint32_t state_offset
, *dw
;
2300 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2303 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2305 * "It is stored as an array of up to 256 elements..."
2307 assert(num_surface_states
<= 256);
2309 if (!num_surface_states
)
2312 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_BINDING_TABLE
,
2313 state_len
, state_align
, &state_offset
);
2314 memcpy(dw
, surface_states
,
2315 num_surface_states
* sizeof(surface_states
[0]));
2317 return state_offset
;
2320 static inline uint32_t
2321 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2322 const struct ilo_view_surface
*surf
,
2326 const int state_align
= 32 / 4;
2327 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
2328 uint32_t state_offset
;
2330 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2332 ilo_cp_steal(cp
, ILO_BUILDER_ITEM_SURFACE
,
2333 state_len
, state_align
, &state_offset
);
2335 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
2337 ilo_cp_write(cp
, surf
->payload
[0]);
2338 ilo_cp_write_bo(cp
, surf
->payload
[1], surf
->bo
,
2339 (for_render
) ? INTEL_RELOC_WRITE
: 0);
2340 ilo_cp_write(cp
, surf
->payload
[2]);
2341 ilo_cp_write(cp
, surf
->payload
[3]);
2342 ilo_cp_write(cp
, surf
->payload
[4]);
2343 ilo_cp_write(cp
, surf
->payload
[5]);
2345 if (dev
->gen
>= ILO_GEN(7)) {
2346 ilo_cp_write(cp
, surf
->payload
[6]);
2347 ilo_cp_write(cp
, surf
->payload
[7]);
2352 return state_offset
;
2355 static inline uint32_t
2356 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2357 const struct pipe_stream_output_target
*so
,
2358 const struct pipe_stream_output_info
*so_info
,
2362 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
2363 unsigned bo_offset
, struct_size
;
2364 enum pipe_format elem_format
;
2365 struct ilo_view_surface surf
;
2367 ILO_GPE_VALID_GEN(dev
, 6, 6);
2369 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
2370 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
2372 switch (so_info
->output
[so_index
].num_components
) {
2374 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2377 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
2380 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
2383 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
2386 assert(!"unexpected SO components length");
2387 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2391 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
2392 struct_size
, elem_format
, false, true, &surf
);
2394 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
2397 static inline uint32_t
2398 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
2399 const struct ilo_sampler_cso
* const *samplers
,
2400 const struct pipe_sampler_view
* const *views
,
2401 const uint32_t *sampler_border_colors
,
2405 const int state_align
= 32 / 4;
2406 const int state_len
= 4 * num_samplers
;
2407 uint32_t state_offset
, *dw
;
2410 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2413 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2415 * "The sampler state is stored as an array of up to 16 elements..."
2417 assert(num_samplers
<= 16);
2422 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_SAMPLER
,
2423 state_len
, state_align
, &state_offset
);
2425 for (i
= 0; i
< num_samplers
; i
++) {
2426 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
2427 const struct pipe_sampler_view
*view
= views
[i
];
2428 const uint32_t border_color
= sampler_border_colors
[i
];
2429 uint32_t dw_filter
, dw_wrap
;
2431 /* there may be holes */
2432 if (!sampler
|| !view
) {
2433 /* disabled sampler */
2443 /* determine filter and wrap modes */
2444 switch (view
->texture
->target
) {
2445 case PIPE_TEXTURE_1D
:
2446 dw_filter
= (sampler
->anisotropic
) ?
2447 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2448 dw_wrap
= sampler
->dw_wrap_1d
;
2450 case PIPE_TEXTURE_3D
:
2452 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2454 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2455 * surfaces of type SURFTYPE_3D."
2457 dw_filter
= sampler
->dw_filter
;
2458 dw_wrap
= sampler
->dw_wrap
;
2460 case PIPE_TEXTURE_CUBE
:
2461 dw_filter
= (sampler
->anisotropic
) ?
2462 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2463 dw_wrap
= sampler
->dw_wrap_cube
;
2466 dw_filter
= (sampler
->anisotropic
) ?
2467 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2468 dw_wrap
= sampler
->dw_wrap
;
2472 dw
[0] = sampler
->payload
[0];
2473 dw
[1] = sampler
->payload
[1];
2474 assert(!(border_color
& 0x1f));
2475 dw
[2] = border_color
;
2476 dw
[3] = sampler
->payload
[2];
2480 if (dev
->gen
>= ILO_GEN(7)) {
2485 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2487 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2488 * where the min and mag mode filters are different and
2489 * SurfMinLOD is nonzero. The determination of MagMode uses the
2490 * following equation instead of the one in the above
2491 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2493 * As a way to work around that, we set Base to
2494 * view->u.tex.first_level.
2496 dw
[0] |= view
->u
.tex
.first_level
<< 22;
2504 return state_offset
;
2507 static inline uint32_t
2508 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
2509 const struct ilo_sampler_cso
*sampler
,
2512 const int state_align
= 32 / 4;
2513 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
2514 uint32_t state_offset
, *dw
;
2516 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2518 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_BLOB
,
2519 state_len
, state_align
, &state_offset
);
2521 /* see ilo_gpe_init_sampler_cso() */
2522 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
2524 return state_offset
;
2527 static inline uint32_t
2528 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
2529 int size
, void **pcb
,
2533 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2534 * to 32 bytes, and their sizes are specified in 256-bit units.
2536 const int state_align
= 32 / 4;
2537 const int state_len
= align(size
, 32) / 4;
2538 uint32_t state_offset
;
2541 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2543 buf
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_BLOB
,
2544 state_len
, state_align
, &state_offset
);
2546 /* zero out the unused range */
2547 if (size
< state_len
* 4)
2548 memset(&buf
[size
], 0, state_len
* 4 - size
);
2553 return state_offset
;
2556 #endif /* ILO_GPE_GEN6_H */