2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
34 #include "ilo_common.h"
36 #include "ilo_format.h"
37 #include "ilo_resource.h"
38 #include "ilo_shader.h"
41 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
42 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
44 #define ILO_GPE_MI(op) (0x0 << 29 | (op) << 23)
46 #define ILO_GPE_CMD(pipeline, op, subop) \
47 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
50 * Translate winsys tiling to hardware tiling.
53 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling
)
56 case INTEL_TILING_NONE
:
57 return GEN6_TILING_NONE
;
63 assert(!"unknown tiling");
64 return GEN6_TILING_NONE
;
69 * Translate a pipe primitive type to the matching hardware primitive type.
72 ilo_gpe_gen6_translate_pipe_prim(unsigned prim
)
74 static const int prim_mapping
[PIPE_PRIM_MAX
] = {
75 [PIPE_PRIM_POINTS
] = GEN6_3DPRIM_POINTLIST
,
76 [PIPE_PRIM_LINES
] = GEN6_3DPRIM_LINELIST
,
77 [PIPE_PRIM_LINE_LOOP
] = GEN6_3DPRIM_LINELOOP
,
78 [PIPE_PRIM_LINE_STRIP
] = GEN6_3DPRIM_LINESTRIP
,
79 [PIPE_PRIM_TRIANGLES
] = GEN6_3DPRIM_TRILIST
,
80 [PIPE_PRIM_TRIANGLE_STRIP
] = GEN6_3DPRIM_TRISTRIP
,
81 [PIPE_PRIM_TRIANGLE_FAN
] = GEN6_3DPRIM_TRIFAN
,
82 [PIPE_PRIM_QUADS
] = GEN6_3DPRIM_QUADLIST
,
83 [PIPE_PRIM_QUAD_STRIP
] = GEN6_3DPRIM_QUADSTRIP
,
84 [PIPE_PRIM_POLYGON
] = GEN6_3DPRIM_POLYGON
,
85 [PIPE_PRIM_LINES_ADJACENCY
] = GEN6_3DPRIM_LINELIST_ADJ
,
86 [PIPE_PRIM_LINE_STRIP_ADJACENCY
] = GEN6_3DPRIM_LINESTRIP_ADJ
,
87 [PIPE_PRIM_TRIANGLES_ADJACENCY
] = GEN6_3DPRIM_TRILIST_ADJ
,
88 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY
] = GEN6_3DPRIM_TRISTRIP_ADJ
,
91 assert(prim_mapping
[prim
]);
93 return prim_mapping
[prim
];
97 * Translate a pipe texture target to the matching hardware surface type.
100 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target
)
104 return GEN6_SURFTYPE_BUFFER
;
105 case PIPE_TEXTURE_1D
:
106 case PIPE_TEXTURE_1D_ARRAY
:
107 return GEN6_SURFTYPE_1D
;
108 case PIPE_TEXTURE_2D
:
109 case PIPE_TEXTURE_RECT
:
110 case PIPE_TEXTURE_2D_ARRAY
:
111 return GEN6_SURFTYPE_2D
;
112 case PIPE_TEXTURE_3D
:
113 return GEN6_SURFTYPE_3D
;
114 case PIPE_TEXTURE_CUBE
:
115 case PIPE_TEXTURE_CUBE_ARRAY
:
116 return GEN6_SURFTYPE_CUBE
;
118 assert(!"unknown texture target");
119 return GEN6_SURFTYPE_BUFFER
;
124 * Fill in DW2 to DW7 of 3DSTATE_SF.
127 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info
*dev
,
128 const struct ilo_rasterizer_state
*rasterizer
,
130 enum pipe_format depth_format
,
131 uint32_t *payload
, unsigned payload_len
)
133 assert(payload_len
== Elements(rasterizer
->sf
.payload
));
136 const struct ilo_rasterizer_sf
*sf
= &rasterizer
->sf
;
138 memcpy(payload
, sf
->payload
, sizeof(sf
->payload
));
140 payload
[1] |= sf
->dw_msaa
;
144 payload
[1] = (num_samples
> 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN
: 0;
151 if (dev
->gen
>= ILO_GEN(7)) {
154 /* separate stencil */
155 switch (depth_format
) {
156 case PIPE_FORMAT_Z16_UNORM
:
157 format
= GEN6_ZFORMAT_D16_UNORM
;
159 case PIPE_FORMAT_Z32_FLOAT
:
160 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
161 format
= GEN6_ZFORMAT_D32_FLOAT
;
163 case PIPE_FORMAT_Z24X8_UNORM
:
164 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
165 format
= GEN6_ZFORMAT_D24_UNORM_X8_UINT
;
168 /* FLOAT surface is assumed when there is no depth buffer */
169 format
= GEN6_ZFORMAT_D32_FLOAT
;
173 payload
[0] |= format
<< GEN7_SF_DW1_DEPTH_FORMAT__SHIFT
;
178 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
181 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info
*dev
,
182 const struct ilo_rasterizer_state
*rasterizer
,
183 const struct ilo_shader_state
*fs
,
184 uint32_t *dw
, int num_dwords
)
186 int output_count
, vue_offset
, vue_len
;
187 const struct ilo_kernel_routing
*routing
;
189 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
190 assert(num_dwords
== 13);
193 memset(dw
, 0, sizeof(dw
[0]) * num_dwords
);
194 dw
[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT
;
198 output_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
199 assert(output_count
<= 32);
201 routing
= ilo_shader_get_kernel_routing(fs
);
203 vue_offset
= routing
->source_skip
;
204 assert(vue_offset
% 2 == 0);
207 vue_len
= (routing
->source_len
+ 1) / 2;
211 dw
[0] = output_count
<< GEN7_SBE_DW1_ATTR_COUNT__SHIFT
|
212 vue_len
<< GEN7_SBE_DW1_URB_READ_LEN__SHIFT
|
213 vue_offset
<< GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT
;
214 if (routing
->swizzle_enable
)
215 dw
[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE
;
217 switch (rasterizer
->state
.sprite_coord_mode
) {
218 case PIPE_SPRITE_COORD_UPPER_LEFT
:
219 dw
[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT
;
221 case PIPE_SPRITE_COORD_LOWER_LEFT
:
222 dw
[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT
;
226 STATIC_ASSERT(Elements(routing
->swizzles
) >= 16);
227 memcpy(&dw
[1], routing
->swizzles
, 2 * 16);
230 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
232 * "This field (Point Sprite Texture Coordinate Enable) must be
233 * programmed to 0 when non-point primitives are rendered."
235 * TODO We do not check that yet.
237 dw
[9] = routing
->point_sprite_enable
;
239 dw
[10] = routing
->const_interp_enable
;
241 /* WrapShortest enables */
247 gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info
*dev
,
248 struct intel_bo
*bo
, uint32_t bo_offset
,
249 uint64_t val
, bool store_qword
,
252 const uint32_t cmd
= ILO_GPE_MI(0x20);
253 const uint8_t cmd_len
= (store_qword
) ? 5 : 4;
254 /* must use GGTT on GEN6 as in PIPE_CONTROL */
255 const uint32_t cmd_flags
= (dev
->gen
== ILO_GEN(6)) ? (1 << 22) : 0;
256 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
257 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
259 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
261 assert(bo_offset
% ((store_qword
) ? 8 : 4) == 0);
263 ilo_cp_begin(cp
, cmd_len
);
264 ilo_cp_write(cp
, cmd
| cmd_flags
| (cmd_len
- 2));
266 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
267 ilo_cp_write(cp
, (uint32_t) val
);
270 ilo_cp_write(cp
, (uint32_t) (val
>> 32));
272 assert(val
== (uint64_t) ((uint32_t) val
));
278 gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info
*dev
,
279 uint32_t reg
, uint32_t val
,
282 const uint32_t cmd
= ILO_GPE_MI(0x22);
283 const uint8_t cmd_len
= 3;
285 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
287 assert(reg
% 4 == 0);
289 ilo_cp_begin(cp
, cmd_len
);
290 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
291 ilo_cp_write(cp
, reg
);
292 ilo_cp_write(cp
, val
);
297 gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info
*dev
,
298 struct intel_bo
*bo
, uint32_t bo_offset
,
299 uint32_t reg
, struct ilo_cp
*cp
)
301 const uint32_t cmd
= ILO_GPE_MI(0x24);
302 const uint8_t cmd_len
= 3;
303 /* must use GGTT on GEN6 as in PIPE_CONTROL */
304 const uint32_t cmd_flags
= (dev
->gen
== ILO_GEN(6)) ? (1 << 22) : 0;
305 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
306 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
308 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
310 assert(reg
% 4 == 0 && bo_offset
% 4 == 0);
312 ilo_cp_begin(cp
, cmd_len
);
313 ilo_cp_write(cp
, cmd
| cmd_flags
| (cmd_len
- 2));
314 ilo_cp_write(cp
, reg
);
315 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
320 gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info
*dev
,
321 struct intel_bo
*bo
, uint32_t bo_offset
,
322 uint32_t report_id
, struct ilo_cp
*cp
)
324 const uint32_t cmd
= ILO_GPE_MI(0x28);
325 const uint8_t cmd_len
= 3;
326 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
327 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
329 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
331 assert(bo_offset
% 64 == 0);
333 /* must use GGTT on GEN6 as in PIPE_CONTROL */
334 if (dev
->gen
== ILO_GEN(6))
337 ilo_cp_begin(cp
, cmd_len
);
338 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
339 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
340 ilo_cp_write(cp
, report_id
);
345 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info
*dev
,
346 struct intel_bo
*general_state_bo
,
347 struct intel_bo
*surface_state_bo
,
348 struct intel_bo
*dynamic_state_bo
,
349 struct intel_bo
*indirect_object_bo
,
350 struct intel_bo
*instruction_bo
,
351 uint32_t general_state_size
,
352 uint32_t dynamic_state_size
,
353 uint32_t indirect_object_size
,
354 uint32_t instruction_size
,
357 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x01);
358 const uint8_t cmd_len
= 10;
360 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
362 /* 4K-page aligned */
363 assert(((general_state_size
| dynamic_state_size
|
364 indirect_object_size
| instruction_size
) & 0xfff) == 0);
366 ilo_cp_begin(cp
, cmd_len
);
367 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
369 ilo_cp_write_bo(cp
, 1, general_state_bo
,
372 ilo_cp_write_bo(cp
, 1, surface_state_bo
,
373 INTEL_DOMAIN_SAMPLER
,
375 ilo_cp_write_bo(cp
, 1, dynamic_state_bo
,
376 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
378 ilo_cp_write_bo(cp
, 1, indirect_object_bo
,
381 ilo_cp_write_bo(cp
, 1, instruction_bo
,
382 INTEL_DOMAIN_INSTRUCTION
,
385 if (general_state_size
) {
386 ilo_cp_write_bo(cp
, general_state_size
| 1, general_state_bo
,
391 /* skip range check */
395 if (dynamic_state_size
) {
396 ilo_cp_write_bo(cp
, dynamic_state_size
| 1, dynamic_state_bo
,
397 INTEL_DOMAIN_RENDER
| INTEL_DOMAIN_INSTRUCTION
,
401 /* skip range check */
402 ilo_cp_write(cp
, 0xfffff000 + 1);
405 if (indirect_object_size
) {
406 ilo_cp_write_bo(cp
, indirect_object_size
| 1, indirect_object_bo
,
411 /* skip range check */
412 ilo_cp_write(cp
, 0xfffff000 + 1);
415 if (instruction_size
) {
416 ilo_cp_write_bo(cp
, instruction_size
| 1, instruction_bo
,
417 INTEL_DOMAIN_INSTRUCTION
,
421 /* skip range check */
429 gen6_emit_STATE_SIP(const struct ilo_dev_info
*dev
,
433 const uint32_t cmd
= ILO_GPE_CMD(0x0, 0x1, 0x02);
434 const uint8_t cmd_len
= 2;
436 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
438 ilo_cp_begin(cp
, cmd_len
);
439 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
440 ilo_cp_write(cp
, sip
);
445 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info
*dev
,
449 const uint32_t cmd
= ILO_GPE_CMD(0x1, 0x0, 0x0b);
450 const uint8_t cmd_len
= 1;
452 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
454 ilo_cp_begin(cp
, cmd_len
);
455 ilo_cp_write(cp
, cmd
| enable
);
460 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info
*dev
,
464 const int cmd
= ILO_GPE_CMD(0x1, 0x1, 0x04);
465 const uint8_t cmd_len
= 1;
467 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
470 assert(pipeline
== 0x0 || pipeline
== 0x1);
472 ilo_cp_begin(cp
, cmd_len
);
473 ilo_cp_write(cp
, cmd
| pipeline
);
478 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info
*dev
,
479 int max_threads
, int num_urb_entries
,
483 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x00);
484 const uint8_t cmd_len
= 8;
487 ILO_GPE_VALID_GEN(dev
, 6, 6);
489 dw2
= (max_threads
- 1) << 16 |
490 num_urb_entries
<< 8 |
491 1 << 7 | /* Reset Gateway Timer */
492 1 << 6; /* Bypass Gateway Control */
494 dw4
= urb_entry_size
<< 16 | /* URB Entry Allocation Size */
495 480; /* CURBE Allocation Size */
497 ilo_cp_begin(cp
, cmd_len
);
498 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
499 ilo_cp_write(cp
, 0); /* scratch */
500 ilo_cp_write(cp
, dw2
);
501 ilo_cp_write(cp
, 0); /* MBZ */
502 ilo_cp_write(cp
, dw4
);
503 ilo_cp_write(cp
, 0); /* scoreboard */
510 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info
*dev
,
511 uint32_t buf
, int size
,
514 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x01);
515 const uint8_t cmd_len
= 4;
517 ILO_GPE_VALID_GEN(dev
, 6, 6);
519 assert(buf
% 32 == 0);
520 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
521 size
= align(size
, 32);
523 ilo_cp_begin(cp
, cmd_len
);
524 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
525 ilo_cp_write(cp
, 0); /* MBZ */
526 ilo_cp_write(cp
, size
);
527 ilo_cp_write(cp
, buf
);
532 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info
*dev
,
533 uint32_t offset
, int num_ids
,
536 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x02);
537 const uint8_t cmd_len
= 4;
539 ILO_GPE_VALID_GEN(dev
, 6, 6);
541 assert(offset
% 32 == 0);
543 ilo_cp_begin(cp
, cmd_len
);
544 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
545 ilo_cp_write(cp
, 0); /* MBZ */
546 /* every ID has 8 DWords */
547 ilo_cp_write(cp
, num_ids
* 8 * 4);
548 ilo_cp_write(cp
, offset
);
553 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info
*dev
,
554 int id
, int byte
, int thread_count
,
557 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x03);
558 const uint8_t cmd_len
= 2;
561 ILO_GPE_VALID_GEN(dev
, 6, 6);
567 ilo_cp_begin(cp
, cmd_len
);
568 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
569 ilo_cp_write(cp
, dw1
);
574 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info
*dev
,
575 int thread_count_water_mark
,
579 const uint32_t cmd
= ILO_GPE_CMD(0x2, 0x0, 0x04);
580 const uint8_t cmd_len
= 2;
583 ILO_GPE_VALID_GEN(dev
, 6, 6);
585 dw1
= thread_count_water_mark
<< 16 |
588 ilo_cp_begin(cp
, cmd_len
);
589 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
590 ilo_cp_write(cp
, dw1
);
595 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info
*dev
,
598 assert(!"MEDIA_OBJECT_WALKER unsupported");
602 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info
*dev
,
603 uint32_t vs_binding_table
,
604 uint32_t gs_binding_table
,
605 uint32_t ps_binding_table
,
608 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x01);
609 const uint8_t cmd_len
= 4;
611 ILO_GPE_VALID_GEN(dev
, 6, 6);
613 ilo_cp_begin(cp
, cmd_len
);
614 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
615 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED
|
616 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED
|
617 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED
);
618 ilo_cp_write(cp
, vs_binding_table
);
619 ilo_cp_write(cp
, gs_binding_table
);
620 ilo_cp_write(cp
, ps_binding_table
);
625 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info
*dev
,
626 uint32_t vs_sampler_state
,
627 uint32_t gs_sampler_state
,
628 uint32_t ps_sampler_state
,
631 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x02);
632 const uint8_t cmd_len
= 4;
634 ILO_GPE_VALID_GEN(dev
, 6, 6);
636 ilo_cp_begin(cp
, cmd_len
);
637 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
638 GEN6_PTR_SAMPLER_DW0_VS_CHANGED
|
639 GEN6_PTR_SAMPLER_DW0_GS_CHANGED
|
640 GEN6_PTR_SAMPLER_DW0_PS_CHANGED
);
641 ilo_cp_write(cp
, vs_sampler_state
);
642 ilo_cp_write(cp
, gs_sampler_state
);
643 ilo_cp_write(cp
, ps_sampler_state
);
648 gen6_emit_3DSTATE_URB(const struct ilo_dev_info
*dev
,
649 int vs_total_size
, int gs_total_size
,
650 int vs_entry_size
, int gs_entry_size
,
653 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x05);
654 const uint8_t cmd_len
= 3;
655 const int row_size
= 128; /* 1024 bits */
656 int vs_alloc_size
, gs_alloc_size
;
657 int vs_num_entries
, gs_num_entries
;
659 ILO_GPE_VALID_GEN(dev
, 6, 6);
661 /* in 1024-bit URB rows */
662 vs_alloc_size
= (vs_entry_size
+ row_size
- 1) / row_size
;
663 gs_alloc_size
= (gs_entry_size
+ row_size
- 1) / row_size
;
665 /* the valid range is [1, 5] */
670 assert(vs_alloc_size
<= 5 && gs_alloc_size
<= 5);
672 /* the valid range is [24, 256] in multiples of 4 */
673 vs_num_entries
= (vs_total_size
/ row_size
/ vs_alloc_size
) & ~3;
674 if (vs_num_entries
> 256)
675 vs_num_entries
= 256;
676 assert(vs_num_entries
>= 24);
678 /* the valid range is [0, 256] in multiples of 4 */
679 gs_num_entries
= (gs_total_size
/ row_size
/ gs_alloc_size
) & ~3;
680 if (gs_num_entries
> 256)
681 gs_num_entries
= 256;
683 ilo_cp_begin(cp
, cmd_len
);
684 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
685 ilo_cp_write(cp
, (vs_alloc_size
- 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT
|
686 vs_num_entries
<< GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT
);
687 ilo_cp_write(cp
, gs_num_entries
<< GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT
|
688 (gs_alloc_size
- 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT
);
693 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info
*dev
,
694 const struct ilo_ve_state
*ve
,
695 const struct ilo_vb_state
*vb
,
698 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x08);
702 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
705 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
707 * "From 1 to 33 VBs can be specified..."
709 assert(ve
->vb_count
<= 33);
714 cmd_len
= 1 + 4 * ve
->vb_count
;
716 ilo_cp_begin(cp
, cmd_len
);
717 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
719 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
720 const unsigned instance_divisor
= ve
->instance_divisors
[hw_idx
];
721 const unsigned pipe_idx
= ve
->vb_mapping
[hw_idx
];
722 const struct pipe_vertex_buffer
*cso
= &vb
->states
[pipe_idx
];
725 dw
= hw_idx
<< GEN6_VB_STATE_DW0_INDEX__SHIFT
;
727 if (instance_divisor
)
728 dw
|= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA
;
730 dw
|= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA
;
732 if (dev
->gen
>= ILO_GEN(7))
733 dw
|= GEN7_VB_STATE_DW0_ADDR_MODIFIED
;
735 /* use null vb if there is no buffer or the stride is out of range */
736 if (cso
->buffer
&& cso
->stride
<= 2048) {
737 const struct ilo_buffer
*buf
= ilo_buffer(cso
->buffer
);
738 const uint32_t start_offset
= cso
->buffer_offset
;
739 const uint32_t end_offset
= buf
->bo_size
- 1;
741 dw
|= cso
->stride
<< GEN6_VB_STATE_DW0_PITCH__SHIFT
;
743 ilo_cp_write(cp
, dw
);
744 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
745 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
746 ilo_cp_write(cp
, instance_divisor
);
751 ilo_cp_write(cp
, dw
);
754 ilo_cp_write(cp
, instance_divisor
);
762 ve_init_cso_with_components(const struct ilo_dev_info
*dev
,
763 int comp0
, int comp1
, int comp2
, int comp3
,
764 struct ilo_ve_cso
*cso
)
766 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
768 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
769 cso
->payload
[0] = GEN6_VE_STATE_DW0_VALID
;
771 comp0
<< GEN6_VE_STATE_DW1_COMP0__SHIFT
|
772 comp1
<< GEN6_VE_STATE_DW1_COMP1__SHIFT
|
773 comp2
<< GEN6_VE_STATE_DW1_COMP2__SHIFT
|
774 comp3
<< GEN6_VE_STATE_DW1_COMP3__SHIFT
;
778 ve_set_cso_edgeflag(const struct ilo_dev_info
*dev
,
779 struct ilo_ve_cso
*cso
)
783 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
786 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
788 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
789 * valid VERTEX_ELEMENT structure.
791 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
792 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
794 * - The Source Element Format must be set to the UINT format.
796 * - [DevSNB]: Edge Flags are not supported for QUADLIST
797 * primitives. Software may elect to convert QUADLIST primitives
798 * to some set of corresponding edge-flag-supported primitive
799 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
802 cso
->payload
[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE
;
804 GEN6_VFCOMP_STORE_SRC
<< GEN6_VE_STATE_DW1_COMP0__SHIFT
|
805 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP1__SHIFT
|
806 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP2__SHIFT
|
807 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_STATE_DW1_COMP3__SHIFT
;
810 * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
811 * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
812 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
814 * Since all the hardware cares about is whether the flags are zero or not,
815 * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
817 format
= (cso
->payload
[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT
) & 0x1ff;
818 if (format
== GEN6_FORMAT_R32_FLOAT
) {
819 STATIC_ASSERT(GEN6_FORMAT_R32_UINT
== GEN6_FORMAT_R32_FLOAT
- 1);
820 cso
->payload
[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT
);
823 assert(format
== GEN6_FORMAT_R8_UINT
);
828 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info
*dev
,
829 const struct ilo_ve_state
*ve
,
830 bool last_velement_edgeflag
,
831 bool prepend_generated_ids
,
834 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x09);
838 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
841 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
843 * "Up to 34 (DevSNB+) vertex elements are supported."
845 assert(ve
->count
+ prepend_generated_ids
<= 34);
847 if (!ve
->count
&& !prepend_generated_ids
) {
848 struct ilo_ve_cso dummy
;
850 ve_init_cso_with_components(dev
,
854 GEN6_VFCOMP_STORE_1_FP
,
858 ilo_cp_begin(cp
, cmd_len
);
859 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
860 ilo_cp_write_multi(cp
, dummy
.payload
, 2);
866 cmd_len
= 2 * (ve
->count
+ prepend_generated_ids
) + 1;
868 ilo_cp_begin(cp
, cmd_len
);
869 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
871 if (prepend_generated_ids
) {
872 struct ilo_ve_cso gen_ids
;
874 ve_init_cso_with_components(dev
,
875 GEN6_VFCOMP_STORE_VID
,
876 GEN6_VFCOMP_STORE_IID
,
881 ilo_cp_write_multi(cp
, gen_ids
.payload
, 2);
884 if (last_velement_edgeflag
) {
885 struct ilo_ve_cso edgeflag
;
887 for (i
= 0; i
< ve
->count
- 1; i
++)
888 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
890 edgeflag
= ve
->cso
[i
];
891 ve_set_cso_edgeflag(dev
, &edgeflag
);
892 ilo_cp_write_multi(cp
, edgeflag
.payload
, 2);
895 for (i
= 0; i
< ve
->count
; i
++)
896 ilo_cp_write_multi(cp
, ve
->cso
[i
].payload
, 2);
903 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info
*dev
,
904 const struct ilo_ib_state
*ib
,
905 bool enable_cut_index
,
908 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0a);
909 const uint8_t cmd_len
= 3;
910 struct ilo_buffer
*buf
= ilo_buffer(ib
->hw_resource
);
911 uint32_t start_offset
, end_offset
;
914 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
919 /* this is moved to the new 3DSTATE_VF */
920 if (dev
->gen
>= ILO_GEN(7.5))
921 assert(!enable_cut_index
);
923 switch (ib
->hw_index_size
) {
925 format
= GEN6_IB_DW0_FORMAT_DWORD
;
928 format
= GEN6_IB_DW0_FORMAT_WORD
;
931 format
= GEN6_IB_DW0_FORMAT_BYTE
;
934 assert(!"unknown index size");
935 format
= GEN6_IB_DW0_FORMAT_BYTE
;
940 * set start_offset to 0 here and adjust pipe_draw_info::start with
941 * ib->draw_start_offset in 3DPRIMITIVE
944 end_offset
= buf
->bo_size
;
946 /* end_offset must also be aligned and is inclusive */
947 end_offset
-= (end_offset
% ib
->hw_index_size
);
950 ilo_cp_begin(cp
, cmd_len
);
951 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
952 ((enable_cut_index
) ? GEN6_IB_DW0_CUT_INDEX_ENABLE
: 0) |
954 ilo_cp_write_bo(cp
, start_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
955 ilo_cp_write_bo(cp
, end_offset
, buf
->bo
, INTEL_DOMAIN_VERTEX
, 0);
960 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info
*dev
,
961 uint32_t clip_viewport
,
962 uint32_t sf_viewport
,
963 uint32_t cc_viewport
,
966 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0d);
967 const uint8_t cmd_len
= 4;
969 ILO_GPE_VALID_GEN(dev
, 6, 6);
971 ilo_cp_begin(cp
, cmd_len
);
972 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
973 GEN6_PTR_VP_DW0_CLIP_CHANGED
|
974 GEN6_PTR_VP_DW0_SF_CHANGED
|
975 GEN6_PTR_VP_DW0_CC_CHANGED
);
976 ilo_cp_write(cp
, clip_viewport
);
977 ilo_cp_write(cp
, sf_viewport
);
978 ilo_cp_write(cp
, cc_viewport
);
983 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
984 uint32_t blend_state
,
985 uint32_t depth_stencil_state
,
986 uint32_t color_calc_state
,
989 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0e);
990 const uint8_t cmd_len
= 4;
992 ILO_GPE_VALID_GEN(dev
, 6, 6);
994 ilo_cp_begin(cp
, cmd_len
);
995 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
996 ilo_cp_write(cp
, blend_state
| 1);
997 ilo_cp_write(cp
, depth_stencil_state
| 1);
998 ilo_cp_write(cp
, color_calc_state
| 1);
1003 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info
*dev
,
1004 uint32_t scissor_rect
,
1007 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0f);
1008 const uint8_t cmd_len
= 2;
1010 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1012 ilo_cp_begin(cp
, cmd_len
);
1013 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1014 ilo_cp_write(cp
, scissor_rect
);
1019 gen6_emit_3DSTATE_VS(const struct ilo_dev_info
*dev
,
1020 const struct ilo_shader_state
*vs
,
1024 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x10);
1025 const uint8_t cmd_len
= 6;
1026 const struct ilo_shader_cso
*cso
;
1027 uint32_t dw2
, dw4
, dw5
;
1029 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1032 ilo_cp_begin(cp
, cmd_len
);
1033 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1034 ilo_cp_write(cp
, 0);
1035 ilo_cp_write(cp
, 0);
1036 ilo_cp_write(cp
, 0);
1037 ilo_cp_write(cp
, 0);
1038 ilo_cp_write(cp
, 0);
1043 cso
= ilo_shader_get_kernel_cso(vs
);
1044 dw2
= cso
->payload
[0];
1045 dw4
= cso
->payload
[1];
1046 dw5
= cso
->payload
[2];
1048 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
1050 ilo_cp_begin(cp
, cmd_len
);
1051 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1052 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(vs
));
1053 ilo_cp_write(cp
, dw2
);
1054 ilo_cp_write(cp
, 0); /* scratch */
1055 ilo_cp_write(cp
, dw4
);
1056 ilo_cp_write(cp
, dw5
);
1061 gen6_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
1062 const struct ilo_shader_state
*gs
,
1063 const struct ilo_shader_state
*vs
,
1067 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
1068 const uint8_t cmd_len
= 7;
1069 uint32_t dw1
, dw2
, dw4
, dw5
, dw6
;
1071 ILO_GPE_VALID_GEN(dev
, 6, 6);
1074 const struct ilo_shader_cso
*cso
;
1076 dw1
= ilo_shader_get_kernel_offset(gs
);
1078 cso
= ilo_shader_get_kernel_cso(gs
);
1079 dw2
= cso
->payload
[0];
1080 dw4
= cso
->payload
[1];
1081 dw5
= cso
->payload
[2];
1082 dw6
= cso
->payload
[3];
1084 else if (vs
&& ilo_shader_get_kernel_param(vs
, ILO_KERNEL_VS_GEN6_SO
)) {
1085 struct ilo_shader_cso cso
;
1086 enum ilo_kernel_param param
;
1088 switch (verts_per_prim
) {
1090 param
= ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET
;
1093 param
= ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET
;
1096 param
= ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET
;
1100 dw1
= ilo_shader_get_kernel_offset(vs
) +
1101 ilo_shader_get_kernel_param(vs
, param
);
1103 /* cannot use VS's CSO */
1104 ilo_gpe_init_gs_cso_gen6(dev
, vs
, &cso
);
1105 dw2
= cso
.payload
[0];
1106 dw4
= cso
.payload
[1];
1107 dw5
= cso
.payload
[2];
1108 dw6
= cso
.payload
[3];
1113 dw4
= 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT
;
1114 dw5
= GEN6_GS_DW5_STATISTICS
;
1118 ilo_cp_begin(cp
, cmd_len
);
1119 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1120 ilo_cp_write(cp
, dw1
);
1121 ilo_cp_write(cp
, dw2
);
1122 ilo_cp_write(cp
, 0);
1123 ilo_cp_write(cp
, dw4
);
1124 ilo_cp_write(cp
, dw5
);
1125 ilo_cp_write(cp
, dw6
);
1130 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info
*dev
,
1131 const struct ilo_rasterizer_state
*rasterizer
,
1132 const struct ilo_shader_state
*fs
,
1133 bool enable_guardband
,
1137 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x12);
1138 const uint8_t cmd_len
= 4;
1139 uint32_t dw1
, dw2
, dw3
;
1141 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1146 dw1
= rasterizer
->clip
.payload
[0];
1147 dw2
= rasterizer
->clip
.payload
[1];
1148 dw3
= rasterizer
->clip
.payload
[2];
1150 if (enable_guardband
&& rasterizer
->clip
.can_enable_guardband
)
1151 dw2
|= GEN6_CLIP_DW2_GB_TEST_ENABLE
;
1153 interps
= (fs
) ? ilo_shader_get_kernel_param(fs
,
1154 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
) : 0;
1156 if (interps
& (GEN6_INTERP_NONPERSPECTIVE_PIXEL
|
1157 GEN6_INTERP_NONPERSPECTIVE_CENTROID
|
1158 GEN6_INTERP_NONPERSPECTIVE_SAMPLE
))
1159 dw2
|= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE
;
1161 dw3
|= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO
|
1162 (num_viewports
- 1);
1170 ilo_cp_begin(cp
, cmd_len
);
1171 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1172 ilo_cp_write(cp
, dw1
);
1173 ilo_cp_write(cp
, dw2
);
1174 ilo_cp_write(cp
, dw3
);
1179 gen6_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
1180 const struct ilo_rasterizer_state
*rasterizer
,
1181 const struct ilo_shader_state
*fs
,
1184 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
1185 const uint8_t cmd_len
= 20;
1186 uint32_t payload_raster
[6], payload_sbe
[13];
1188 ILO_GPE_VALID_GEN(dev
, 6, 6);
1190 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
, rasterizer
,
1191 1, PIPE_FORMAT_NONE
, payload_raster
, Elements(payload_raster
));
1192 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
,
1193 fs
, payload_sbe
, Elements(payload_sbe
));
1195 ilo_cp_begin(cp
, cmd_len
);
1196 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1197 ilo_cp_write(cp
, payload_sbe
[0]);
1198 ilo_cp_write_multi(cp
, payload_raster
, 6);
1199 ilo_cp_write_multi(cp
, &payload_sbe
[1], 12);
1204 gen6_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
1205 const struct ilo_shader_state
*fs
,
1207 const struct ilo_rasterizer_state
*rasterizer
,
1208 bool dual_blend
, bool cc_may_kill
,
1212 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
1213 const uint8_t cmd_len
= 9;
1214 const int num_samples
= 1;
1215 const struct ilo_shader_cso
*fs_cso
;
1216 uint32_t dw2
, dw4
, dw5
, dw6
;
1218 ILO_GPE_VALID_GEN(dev
, 6, 6);
1221 /* see brwCreateContext() */
1222 const int max_threads
= (dev
->gt
== 2) ? 80 : 40;
1224 ilo_cp_begin(cp
, cmd_len
);
1225 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1226 ilo_cp_write(cp
, 0);
1227 ilo_cp_write(cp
, 0);
1228 ilo_cp_write(cp
, 0);
1229 ilo_cp_write(cp
, hiz_op
);
1230 /* honor the valid range even if dispatching is disabled */
1231 ilo_cp_write(cp
, (max_threads
- 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT
);
1232 ilo_cp_write(cp
, 0);
1233 ilo_cp_write(cp
, 0);
1234 ilo_cp_write(cp
, 0);
1240 fs_cso
= ilo_shader_get_kernel_cso(fs
);
1241 dw2
= fs_cso
->payload
[0];
1242 dw4
= fs_cso
->payload
[1];
1243 dw5
= fs_cso
->payload
[2];
1244 dw6
= fs_cso
->payload
[3];
1246 dw2
|= (num_samplers
+ 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
1249 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1251 * "This bit (Statistics Enable) must be disabled if either of these
1252 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1253 * Enable or Depth Buffer Resolve Enable."
1256 dw4
|= GEN6_WM_DW4_STATISTICS
;
1259 dw5
|= GEN6_WM_DW5_PS_KILL
| GEN6_WM_DW5_PS_ENABLE
;
1262 dw5
|= GEN6_WM_DW5_DUAL_SOURCE_BLEND
;
1264 dw5
|= rasterizer
->wm
.payload
[0];
1266 dw6
|= rasterizer
->wm
.payload
[1];
1268 if (num_samples
> 1) {
1269 dw6
|= rasterizer
->wm
.dw_msaa_rast
|
1270 rasterizer
->wm
.dw_msaa_disp
;
1273 ilo_cp_begin(cp
, cmd_len
);
1274 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1275 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
1276 ilo_cp_write(cp
, dw2
);
1277 ilo_cp_write(cp
, 0); /* scratch */
1278 ilo_cp_write(cp
, dw4
);
1279 ilo_cp_write(cp
, dw5
);
1280 ilo_cp_write(cp
, dw6
);
1281 ilo_cp_write(cp
, 0); /* kernel 1 */
1282 ilo_cp_write(cp
, 0); /* kernel 2 */
1286 static inline unsigned
1287 gen6_fill_3dstate_constant(const struct ilo_dev_info
*dev
,
1288 const uint32_t *bufs
, const int *sizes
,
1289 int num_bufs
, int max_read_length
,
1290 uint32_t *dw
, int num_dwords
)
1292 unsigned enabled
= 0x0;
1293 int total_read_length
, i
;
1295 assert(num_dwords
== 4);
1297 total_read_length
= 0;
1298 for (i
= 0; i
< 4; i
++) {
1299 if (i
< num_bufs
&& sizes
[i
]) {
1300 /* in 256-bit units minus one */
1301 const int read_len
= (sizes
[i
] + 31) / 32 - 1;
1303 assert(bufs
[i
] % 32 == 0);
1304 assert(read_len
< 32);
1307 dw
[i
] = bufs
[i
] | read_len
;
1309 total_read_length
+= read_len
+ 1;
1316 assert(total_read_length
<= max_read_length
);
1322 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
1323 const uint32_t *bufs
, const int *sizes
,
1327 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x15);
1328 const uint8_t cmd_len
= 5;
1329 uint32_t buf_dw
[4], buf_enabled
;
1331 ILO_GPE_VALID_GEN(dev
, 6, 6);
1332 assert(num_bufs
<= 4);
1335 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1337 * "The sum of all four read length fields (each incremented to
1338 * represent the actual read length) must be less than or equal to 32"
1340 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1341 bufs
, sizes
, num_bufs
, 32, buf_dw
, Elements(buf_dw
));
1343 ilo_cp_begin(cp
, cmd_len
);
1344 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1345 ilo_cp_write(cp
, buf_dw
[0]);
1346 ilo_cp_write(cp
, buf_dw
[1]);
1347 ilo_cp_write(cp
, buf_dw
[2]);
1348 ilo_cp_write(cp
, buf_dw
[3]);
1353 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
1354 const uint32_t *bufs
, const int *sizes
,
1358 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x16);
1359 const uint8_t cmd_len
= 5;
1360 uint32_t buf_dw
[4], buf_enabled
;
1362 ILO_GPE_VALID_GEN(dev
, 6, 6);
1363 assert(num_bufs
<= 4);
1366 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1368 * "The sum of all four read length fields (each incremented to
1369 * represent the actual read length) must be less than or equal to 64"
1371 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1372 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1374 ilo_cp_begin(cp
, cmd_len
);
1375 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1376 ilo_cp_write(cp
, buf_dw
[0]);
1377 ilo_cp_write(cp
, buf_dw
[1]);
1378 ilo_cp_write(cp
, buf_dw
[2]);
1379 ilo_cp_write(cp
, buf_dw
[3]);
1384 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
1385 const uint32_t *bufs
, const int *sizes
,
1389 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x17);
1390 const uint8_t cmd_len
= 5;
1391 uint32_t buf_dw
[4], buf_enabled
;
1393 ILO_GPE_VALID_GEN(dev
, 6, 6);
1394 assert(num_bufs
<= 4);
1397 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1399 * "The sum of all four read length fields (each incremented to
1400 * represent the actual read length) must be less than or equal to 64"
1402 buf_enabled
= gen6_fill_3dstate_constant(dev
,
1403 bufs
, sizes
, num_bufs
, 64, buf_dw
, Elements(buf_dw
));
1405 ilo_cp_begin(cp
, cmd_len
);
1406 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) | buf_enabled
<< 12);
1407 ilo_cp_write(cp
, buf_dw
[0]);
1408 ilo_cp_write(cp
, buf_dw
[1]);
1409 ilo_cp_write(cp
, buf_dw
[2]);
1410 ilo_cp_write(cp
, buf_dw
[3]);
1415 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
1416 unsigned sample_mask
,
1419 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
1420 const uint8_t cmd_len
= 2;
1421 const unsigned valid_mask
= 0xf;
1423 ILO_GPE_VALID_GEN(dev
, 6, 6);
1425 sample_mask
&= valid_mask
;
1427 ilo_cp_begin(cp
, cmd_len
);
1428 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1429 ilo_cp_write(cp
, sample_mask
);
1434 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info
*dev
,
1435 unsigned x
, unsigned y
,
1436 unsigned width
, unsigned height
,
1439 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x00);
1440 const uint8_t cmd_len
= 4;
1441 unsigned xmax
= x
+ width
- 1;
1442 unsigned ymax
= y
+ height
- 1;
1445 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1447 if (dev
->gen
>= ILO_GEN(7)) {
1452 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1454 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1455 * must be an even number"
1462 if (x
> rect_limit
) x
= rect_limit
;
1463 if (y
> rect_limit
) y
= rect_limit
;
1464 if (xmax
> rect_limit
) xmax
= rect_limit
;
1465 if (ymax
> rect_limit
) ymax
= rect_limit
;
1467 ilo_cp_begin(cp
, cmd_len
);
1468 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1469 ilo_cp_write(cp
, y
<< 16 | x
);
1470 ilo_cp_write(cp
, ymax
<< 16 | xmax
);
1473 * There is no need to set the origin. It is intended to support front
1476 ilo_cp_write(cp
, 0);
1482 zs_align_surface(const struct ilo_dev_info
*dev
,
1483 unsigned align_w
, unsigned align_h
,
1484 struct ilo_zs_surface
*zs
)
1486 unsigned mask
, shift_w
, shift_h
;
1487 unsigned width
, height
;
1490 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1492 if (dev
->gen
>= ILO_GEN(7)) {
1503 dw3
= zs
->payload
[2];
1505 /* aligned width and height */
1506 width
= align(((dw3
>> shift_w
) & mask
) + 1, align_w
);
1507 height
= align(((dw3
>> shift_h
) & mask
) + 1, align_h
);
1509 dw3
= (dw3
& ~((mask
<< shift_w
) | (mask
<< shift_h
))) |
1510 (width
- 1) << shift_w
|
1511 (height
- 1) << shift_h
;
1513 zs
->payload
[2] = dw3
;
1517 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1518 const struct ilo_zs_surface
*zs
,
1521 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1522 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1523 const uint8_t cmd_len
= 7;
1525 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1527 ilo_cp_begin(cp
, cmd_len
);
1528 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1529 ilo_cp_write(cp
, zs
->payload
[0]);
1530 ilo_cp_write_bo(cp
, zs
->payload
[1], zs
->bo
,
1531 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1532 ilo_cp_write(cp
, zs
->payload
[2]);
1533 ilo_cp_write(cp
, zs
->payload
[3]);
1534 ilo_cp_write(cp
, zs
->payload
[4]);
1535 ilo_cp_write(cp
, zs
->payload
[5]);
1540 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info
*dev
,
1541 int x_offset
, int y_offset
,
1544 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x06);
1545 const uint8_t cmd_len
= 2;
1547 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1548 assert(x_offset
>= 0 && x_offset
<= 31);
1549 assert(y_offset
>= 0 && y_offset
<= 31);
1551 ilo_cp_begin(cp
, cmd_len
);
1552 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1553 ilo_cp_write(cp
, x_offset
<< 8 | y_offset
);
1558 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info
*dev
,
1559 const struct pipe_poly_stipple
*pattern
,
1562 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x07);
1563 const uint8_t cmd_len
= 33;
1566 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1567 assert(Elements(pattern
->stipple
) == 32);
1569 ilo_cp_begin(cp
, cmd_len
);
1570 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1571 for (i
= 0; i
< 32; i
++)
1572 ilo_cp_write(cp
, pattern
->stipple
[i
]);
1577 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info
*dev
,
1578 unsigned pattern
, unsigned factor
,
1581 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x08);
1582 const uint8_t cmd_len
= 3;
1585 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1586 assert((pattern
& 0xffff) == pattern
);
1587 assert(factor
>= 1 && factor
<= 256);
1589 ilo_cp_begin(cp
, cmd_len
);
1590 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1591 ilo_cp_write(cp
, pattern
);
1593 if (dev
->gen
>= ILO_GEN(7)) {
1595 inverse
= (unsigned) (65536.0f
/ factor
);
1596 ilo_cp_write(cp
, inverse
<< 15 | factor
);
1600 inverse
= (unsigned) (8192.0f
/ factor
);
1601 ilo_cp_write(cp
, inverse
<< 16 | factor
);
1608 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info
*dev
,
1611 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0a);
1612 const uint8_t cmd_len
= 3;
1614 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1616 ilo_cp_begin(cp
, cmd_len
);
1617 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1618 ilo_cp_write(cp
, 0 << 16 | 0);
1619 ilo_cp_write(cp
, 0 << 16 | 0);
1624 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info
*dev
,
1625 int index
, unsigned svbi
,
1627 bool load_vertex_count
,
1630 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0b);
1631 const uint8_t cmd_len
= 4;
1634 ILO_GPE_VALID_GEN(dev
, 6, 6);
1635 assert(index
>= 0 && index
< 4);
1637 dw1
= index
<< GEN6_SVBI_DW1_INDEX__SHIFT
;
1638 if (load_vertex_count
)
1639 dw1
|= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT
;
1641 ilo_cp_begin(cp
, cmd_len
);
1642 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1643 ilo_cp_write(cp
, dw1
);
1644 ilo_cp_write(cp
, svbi
);
1645 ilo_cp_write(cp
, max_svbi
);
1650 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info
*dev
,
1652 const uint32_t *packed_sample_pos
,
1653 bool pixel_location_center
,
1656 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x0d);
1657 const uint8_t cmd_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 3;
1658 uint32_t dw1
, dw2
, dw3
;
1660 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1662 dw1
= (pixel_location_center
) ?
1663 GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER
: GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER
;
1665 switch (num_samples
) {
1668 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1
;
1673 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4
;
1674 dw2
= packed_sample_pos
[0];
1678 assert(dev
->gen
>= ILO_GEN(7));
1679 dw1
|= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8
;
1680 dw2
= packed_sample_pos
[0];
1681 dw3
= packed_sample_pos
[1];
1684 assert(!"unsupported sample count");
1685 dw1
|= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1
;
1691 ilo_cp_begin(cp
, cmd_len
);
1692 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1693 ilo_cp_write(cp
, dw1
);
1694 ilo_cp_write(cp
, dw2
);
1695 if (dev
->gen
>= ILO_GEN(7))
1696 ilo_cp_write(cp
, dw3
);
1701 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info
*dev
,
1702 const struct ilo_zs_surface
*zs
,
1705 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1708 const uint8_t cmd_len
= 3;
1710 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1712 ilo_cp_begin(cp
, cmd_len
);
1713 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp
, zs
->payload
[6]);
1716 ilo_cp_write_bo(cp
, zs
->payload
[7], zs
->separate_s8_bo
,
1717 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1722 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info
*dev
,
1723 const struct ilo_zs_surface
*zs
,
1726 const uint32_t cmd
= (dev
->gen
>= ILO_GEN(7)) ?
1727 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1728 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1729 const uint8_t cmd_len
= 3;
1731 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1733 ilo_cp_begin(cp
, cmd_len
);
1734 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1735 /* see ilo_gpe_init_zs_surface() */
1736 ilo_cp_write(cp
, zs
->payload
[8]);
1737 ilo_cp_write_bo(cp
, zs
->payload
[9], zs
->hiz_bo
,
1738 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1743 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
1747 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x10);
1748 const uint8_t cmd_len
= 2;
1750 ILO_GPE_VALID_GEN(dev
, 6, 6);
1752 ilo_cp_begin(cp
, cmd_len
);
1753 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1754 GEN6_CLEAR_PARAMS_DW0_VALID
);
1755 ilo_cp_write(cp
, clear_val
);
1760 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info
*dev
,
1762 struct intel_bo
*bo
, uint32_t bo_offset
,
1766 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x2, 0x00);
1767 const uint8_t cmd_len
= (write_qword
) ? 5 : 4;
1768 const uint32_t read_domains
= INTEL_DOMAIN_INSTRUCTION
;
1769 const uint32_t write_domain
= INTEL_DOMAIN_INSTRUCTION
;
1771 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1773 assert(bo_offset
% ((write_qword
) ? 8 : 4) == 0);
1775 if (dw1
& GEN6_PIPE_CONTROL_CS_STALL
) {
1777 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1779 * "1 of the following must also be set (when CS stall is set):
1781 * * Depth Cache Flush Enable ([0] of DW1)
1782 * * Stall at Pixel Scoreboard ([1] of DW1)
1783 * * Depth Stall ([13] of DW1)
1784 * * Post-Sync Operation ([13] of DW1)
1785 * * Render Target Cache Flush Enable ([12] of DW1)
1786 * * Notify Enable ([8] of DW1)"
1788 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1790 * "One of the following must also be set (when CS stall is set):
1792 * * Render Target Cache Flush Enable ([12] of DW1)
1793 * * Depth Cache Flush Enable ([0] of DW1)
1794 * * Stall at Pixel Scoreboard ([1] of DW1)
1795 * * Depth Stall ([13] of DW1)
1796 * * Post-Sync Operation ([13] of DW1)"
1798 uint32_t bit_test
= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
1799 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1800 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL
|
1801 GEN6_PIPE_CONTROL_DEPTH_STALL
;
1804 bit_test
|= GEN6_PIPE_CONTROL_WRITE_IMM
|
1805 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT
|
1806 GEN6_PIPE_CONTROL_WRITE_TIMESTAMP
;
1808 if (dev
->gen
== ILO_GEN(6))
1809 bit_test
|= GEN6_PIPE_CONTROL_NOTIFY_ENABLE
;
1811 assert(dw1
& bit_test
);
1814 if (dw1
& GEN6_PIPE_CONTROL_DEPTH_STALL
) {
1816 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1818 * "Following bits must be clear (when Depth Stall is set):
1820 * * Render Target Cache Flush Enable ([12] of DW1)
1821 * * Depth Cache Flush Enable ([0] of DW1)"
1823 assert(!(dw1
& (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH
|
1824 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
1828 * From the Sandy Bridge PRM, volume 1 part 3, page 19:
1830 * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
1831 * and PIPE_CONTROL are not supported."
1833 * The kernel will add the mapping automatically (when write domain is
1834 * INTEL_DOMAIN_INSTRUCTION).
1836 if (dev
->gen
== ILO_GEN(6) && bo
)
1837 bo_offset
|= GEN6_PIPE_CONTROL_DW2_USE_GGTT
;
1839 ilo_cp_begin(cp
, cmd_len
);
1840 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1841 ilo_cp_write(cp
, dw1
);
1842 ilo_cp_write_bo(cp
, bo_offset
, bo
, read_domains
, write_domain
);
1843 ilo_cp_write(cp
, 0);
1845 ilo_cp_write(cp
, 0);
1850 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1851 const struct pipe_draw_info
*info
,
1852 const struct ilo_ib_state
*ib
,
1856 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1857 const uint8_t cmd_len
= 6;
1858 const int prim
= (rectlist
) ?
1859 GEN6_3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1860 const int vb_access
= (info
->indexed
) ?
1861 GEN6_3DPRIM_DW0_ACCESS_RANDOM
: GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL
;
1862 const uint32_t vb_start
= info
->start
+
1863 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1865 ILO_GPE_VALID_GEN(dev
, 6, 6);
1867 ilo_cp_begin(cp
, cmd_len
);
1868 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
1869 prim
<< GEN6_3DPRIM_DW0_TYPE__SHIFT
|
1871 ilo_cp_write(cp
, info
->count
);
1872 ilo_cp_write(cp
, vb_start
);
1873 ilo_cp_write(cp
, info
->instance_count
);
1874 ilo_cp_write(cp
, info
->start_instance
);
1875 ilo_cp_write(cp
, info
->index_bias
);
1879 static inline uint32_t
1880 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info
*dev
,
1881 const struct ilo_shader_state
**cs
,
1882 uint32_t *sampler_state
,
1884 uint32_t *binding_table_state
,
1890 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1892 * "(Interface Descriptor Total Length) This field must have the same
1893 * alignment as the Interface Descriptor Data Start Address.
1895 * It must be DQWord (32-byte) aligned..."
1897 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1899 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1900 * aligned address of the Interface Descriptor data."
1902 const int state_align
= 32 / 4;
1903 const int state_len
= (32 / 4) * num_ids
;
1904 uint32_t state_offset
, *dw
;
1907 ILO_GPE_VALID_GEN(dev
, 6, 6);
1909 dw
= ilo_cp_steal_ptr(cp
, "INTERFACE_DESCRIPTOR_DATA",
1910 state_len
, state_align
, &state_offset
);
1912 for (i
= 0; i
< num_ids
; i
++) {
1913 dw
[0] = ilo_shader_get_kernel_offset(cs
[i
]);
1914 dw
[1] = 1 << 18; /* SPF */
1915 dw
[2] = sampler_state
[i
] |
1916 (num_samplers
[i
] + 3) / 4 << 2;
1917 dw
[3] = binding_table_state
[i
] |
1919 dw
[4] = 0 << 16 | /* CURBE Read Length */
1920 0; /* CURBE Read Offset */
1921 dw
[5] = 0; /* Barrier ID */
1928 return state_offset
;
1931 static inline uint32_t
1932 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info
*dev
,
1933 const struct ilo_viewport_cso
*viewports
,
1934 unsigned num_viewports
,
1937 const int state_align
= 32 / 4;
1938 const int state_len
= 8 * num_viewports
;
1939 uint32_t state_offset
, *dw
;
1942 ILO_GPE_VALID_GEN(dev
, 6, 6);
1945 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1947 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1948 * stored as an array of up to 16 elements..."
1950 assert(num_viewports
&& num_viewports
<= 16);
1952 dw
= ilo_cp_steal_ptr(cp
, "SF_VIEWPORT",
1953 state_len
, state_align
, &state_offset
);
1955 for (i
= 0; i
< num_viewports
; i
++) {
1956 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1958 dw
[0] = fui(vp
->m00
);
1959 dw
[1] = fui(vp
->m11
);
1960 dw
[2] = fui(vp
->m22
);
1961 dw
[3] = fui(vp
->m30
);
1962 dw
[4] = fui(vp
->m31
);
1963 dw
[5] = fui(vp
->m32
);
1970 return state_offset
;
1973 static inline uint32_t
1974 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1975 const struct ilo_viewport_cso
*viewports
,
1976 unsigned num_viewports
,
1979 const int state_align
= 32 / 4;
1980 const int state_len
= 4 * num_viewports
;
1981 uint32_t state_offset
, *dw
;
1984 ILO_GPE_VALID_GEN(dev
, 6, 6);
1987 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1989 * "The viewport-related state is stored as an array of up to 16
1992 assert(num_viewports
&& num_viewports
<= 16);
1994 dw
= ilo_cp_steal_ptr(cp
, "CLIP_VIEWPORT",
1995 state_len
, state_align
, &state_offset
);
1997 for (i
= 0; i
< num_viewports
; i
++) {
1998 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2000 dw
[0] = fui(vp
->min_gbx
);
2001 dw
[1] = fui(vp
->max_gbx
);
2002 dw
[2] = fui(vp
->min_gby
);
2003 dw
[3] = fui(vp
->max_gby
);
2008 return state_offset
;
2011 static inline uint32_t
2012 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info
*dev
,
2013 const struct ilo_viewport_cso
*viewports
,
2014 unsigned num_viewports
,
2017 const int state_align
= 32 / 4;
2018 const int state_len
= 2 * num_viewports
;
2019 uint32_t state_offset
, *dw
;
2022 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2025 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2027 * "The viewport state is stored as an array of up to 16 elements..."
2029 assert(num_viewports
&& num_viewports
<= 16);
2031 dw
= ilo_cp_steal_ptr(cp
, "CC_VIEWPORT",
2032 state_len
, state_align
, &state_offset
);
2034 for (i
= 0; i
< num_viewports
; i
++) {
2035 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
2037 dw
[0] = fui(vp
->min_z
);
2038 dw
[1] = fui(vp
->max_z
);
2043 return state_offset
;
2046 static inline uint32_t
2047 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info
*dev
,
2048 const struct pipe_stencil_ref
*stencil_ref
,
2050 const struct pipe_blend_color
*blend_color
,
2053 const int state_align
= 64 / 4;
2054 const int state_len
= 6;
2055 uint32_t state_offset
, *dw
;
2057 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2059 dw
= ilo_cp_steal_ptr(cp
, "COLOR_CALC_STATE",
2060 state_len
, state_align
, &state_offset
);
2062 dw
[0] = stencil_ref
->ref_value
[0] << 24 |
2063 stencil_ref
->ref_value
[1] << 16 |
2064 GEN6_CC_DW0_ALPHATEST_UNORM8
;
2066 dw
[2] = fui(blend_color
->color
[0]);
2067 dw
[3] = fui(blend_color
->color
[1]);
2068 dw
[4] = fui(blend_color
->color
[2]);
2069 dw
[5] = fui(blend_color
->color
[3]);
2071 return state_offset
;
2074 static inline uint32_t
2075 gen6_emit_BLEND_STATE(const struct ilo_dev_info
*dev
,
2076 const struct ilo_blend_state
*blend
,
2077 const struct ilo_fb_state
*fb
,
2078 const struct ilo_dsa_state
*dsa
,
2081 const int state_align
= 64 / 4;
2083 uint32_t state_offset
, *dw
;
2084 unsigned num_targets
, i
;
2086 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2089 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2091 * "The blend state is stored as an array of up to 8 elements..."
2093 num_targets
= fb
->state
.nr_cbufs
;
2094 assert(num_targets
<= 8);
2099 /* to be able to reference alpha func */
2103 state_len
= 2 * num_targets
;
2105 dw
= ilo_cp_steal_ptr(cp
, "BLEND_STATE",
2106 state_len
, state_align
, &state_offset
);
2108 for (i
= 0; i
< num_targets
; i
++) {
2109 const unsigned idx
= (blend
->independent_blend_enable
) ? i
: 0;
2110 const struct ilo_blend_cso
*cso
= &blend
->cso
[idx
];
2111 const int num_samples
= fb
->num_samples
;
2112 const struct util_format_description
*format_desc
=
2113 (idx
< fb
->state
.nr_cbufs
&& fb
->state
.cbufs
[idx
]) ?
2114 util_format_description(fb
->state
.cbufs
[idx
]->format
) : NULL
;
2115 bool rt_is_unorm
, rt_is_pure_integer
, rt_dst_alpha_forced_one
;
2118 rt_is_pure_integer
= false;
2119 rt_dst_alpha_forced_one
= false;
2124 switch (format_desc
->format
) {
2125 case PIPE_FORMAT_B8G8R8X8_UNORM
:
2126 /* force alpha to one when the HW format has alpha */
2127 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM
)
2128 == GEN6_FORMAT_B8G8R8A8_UNORM
);
2129 rt_dst_alpha_forced_one
= true;
2135 for (ch
= 0; ch
< 4; ch
++) {
2136 if (format_desc
->channel
[ch
].type
== UTIL_FORMAT_TYPE_VOID
)
2139 if (format_desc
->channel
[ch
].pure_integer
) {
2140 rt_is_unorm
= false;
2141 rt_is_pure_integer
= true;
2145 if (!format_desc
->channel
[ch
].normalized
||
2146 format_desc
->channel
[ch
].type
!= UTIL_FORMAT_TYPE_UNSIGNED
)
2147 rt_is_unorm
= false;
2151 dw
[0] = cso
->payload
[0];
2152 dw
[1] = cso
->payload
[1];
2154 if (!rt_is_pure_integer
) {
2155 if (rt_dst_alpha_forced_one
)
2156 dw
[0] |= cso
->dw_blend_dst_alpha_forced_one
;
2158 dw
[0] |= cso
->dw_blend
;
2162 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2164 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2165 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2167 * Since logicop is ignored for non-UNORM color buffers, no special care
2171 dw
[1] |= cso
->dw_logicop
;
2174 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2176 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2177 * Dither both must be disabled."
2179 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2180 * requires that anyway.
2182 if (num_samples
> 1)
2183 dw
[1] |= cso
->dw_alpha_mod
;
2186 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2188 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2191 if (!rt_is_pure_integer
)
2192 dw
[1] |= dsa
->dw_alpha
;
2197 return state_offset
;
2200 static inline uint32_t
2201 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info
*dev
,
2202 const struct ilo_dsa_state
*dsa
,
2205 const int state_align
= 64 / 4;
2206 const int state_len
= 3;
2207 uint32_t state_offset
, *dw
;
2210 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2212 dw
= ilo_cp_steal_ptr(cp
, "DEPTH_STENCIL_STATE",
2213 state_len
, state_align
, &state_offset
);
2215 dw
[0] = dsa
->payload
[0];
2216 dw
[1] = dsa
->payload
[1];
2217 dw
[2] = dsa
->payload
[2];
2219 return state_offset
;
2222 static inline uint32_t
2223 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info
*dev
,
2224 const struct ilo_scissor_state
*scissor
,
2225 unsigned num_viewports
,
2228 const int state_align
= 32 / 4;
2229 const int state_len
= 2 * num_viewports
;
2230 uint32_t state_offset
, *dw
;
2232 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2235 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2237 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2238 * stored as an array of up to 16 elements..."
2240 assert(num_viewports
&& num_viewports
<= 16);
2242 dw
= ilo_cp_steal_ptr(cp
, "SCISSOR_RECT",
2243 state_len
, state_align
, &state_offset
);
2245 memcpy(dw
, scissor
->payload
, state_len
* 4);
2247 return state_offset
;
2250 static inline uint32_t
2251 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info
*dev
,
2252 uint32_t *surface_states
,
2253 int num_surface_states
,
2256 const int state_align
= 32 / 4;
2257 const int state_len
= num_surface_states
;
2258 uint32_t state_offset
, *dw
;
2260 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2263 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2265 * "It is stored as an array of up to 256 elements..."
2267 assert(num_surface_states
<= 256);
2269 if (!num_surface_states
)
2272 dw
= ilo_cp_steal_ptr(cp
, "BINDING_TABLE_STATE",
2273 state_len
, state_align
, &state_offset
);
2274 memcpy(dw
, surface_states
,
2275 num_surface_states
* sizeof(surface_states
[0]));
2277 return state_offset
;
2280 static inline uint32_t
2281 gen6_emit_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2282 const struct ilo_view_surface
*surf
,
2286 const int state_align
= 32 / 4;
2287 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 8 : 6;
2288 uint32_t state_offset
;
2289 uint32_t read_domains
, write_domain
;
2291 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2294 read_domains
= INTEL_DOMAIN_RENDER
;
2295 write_domain
= INTEL_DOMAIN_RENDER
;
2298 read_domains
= INTEL_DOMAIN_SAMPLER
;
2302 ilo_cp_steal(cp
, "SURFACE_STATE", state_len
, state_align
, &state_offset
);
2304 STATIC_ASSERT(Elements(surf
->payload
) >= 8);
2306 ilo_cp_write(cp
, surf
->payload
[0]);
2307 ilo_cp_write_bo(cp
, surf
->payload
[1],
2308 surf
->bo
, read_domains
, write_domain
);
2309 ilo_cp_write(cp
, surf
->payload
[2]);
2310 ilo_cp_write(cp
, surf
->payload
[3]);
2311 ilo_cp_write(cp
, surf
->payload
[4]);
2312 ilo_cp_write(cp
, surf
->payload
[5]);
2314 if (dev
->gen
>= ILO_GEN(7)) {
2315 ilo_cp_write(cp
, surf
->payload
[6]);
2316 ilo_cp_write(cp
, surf
->payload
[7]);
2321 return state_offset
;
2324 static inline uint32_t
2325 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info
*dev
,
2326 const struct pipe_stream_output_target
*so
,
2327 const struct pipe_stream_output_info
*so_info
,
2331 struct ilo_buffer
*buf
= ilo_buffer(so
->buffer
);
2332 unsigned bo_offset
, struct_size
;
2333 enum pipe_format elem_format
;
2334 struct ilo_view_surface surf
;
2336 ILO_GPE_VALID_GEN(dev
, 6, 6);
2338 bo_offset
= so
->buffer_offset
+ so_info
->output
[so_index
].dst_offset
* 4;
2339 struct_size
= so_info
->stride
[so_info
->output
[so_index
].output_buffer
] * 4;
2341 switch (so_info
->output
[so_index
].num_components
) {
2343 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2346 elem_format
= PIPE_FORMAT_R32G32_FLOAT
;
2349 elem_format
= PIPE_FORMAT_R32G32B32_FLOAT
;
2352 elem_format
= PIPE_FORMAT_R32G32B32A32_FLOAT
;
2355 assert(!"unexpected SO components length");
2356 elem_format
= PIPE_FORMAT_R32_FLOAT
;
2360 ilo_gpe_init_view_surface_for_buffer_gen6(dev
, buf
, bo_offset
, so
->buffer_size
,
2361 struct_size
, elem_format
, false, true, &surf
);
2363 return gen6_emit_SURFACE_STATE(dev
, &surf
, false, cp
);
2366 static inline uint32_t
2367 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info
*dev
,
2368 const struct ilo_sampler_cso
* const *samplers
,
2369 const struct pipe_sampler_view
* const *views
,
2370 const uint32_t *sampler_border_colors
,
2374 const int state_align
= 32 / 4;
2375 const int state_len
= 4 * num_samplers
;
2376 uint32_t state_offset
, *dw
;
2379 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2382 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2384 * "The sampler state is stored as an array of up to 16 elements..."
2386 assert(num_samplers
<= 16);
2391 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_STATE",
2392 state_len
, state_align
, &state_offset
);
2394 for (i
= 0; i
< num_samplers
; i
++) {
2395 const struct ilo_sampler_cso
*sampler
= samplers
[i
];
2396 const struct pipe_sampler_view
*view
= views
[i
];
2397 const uint32_t border_color
= sampler_border_colors
[i
];
2398 uint32_t dw_filter
, dw_wrap
;
2400 /* there may be holes */
2401 if (!sampler
|| !view
) {
2402 /* disabled sampler */
2412 /* determine filter and wrap modes */
2413 switch (view
->texture
->target
) {
2414 case PIPE_TEXTURE_1D
:
2415 dw_filter
= (sampler
->anisotropic
) ?
2416 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2417 dw_wrap
= sampler
->dw_wrap_1d
;
2419 case PIPE_TEXTURE_3D
:
2421 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2423 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2424 * surfaces of type SURFTYPE_3D."
2426 dw_filter
= sampler
->dw_filter
;
2427 dw_wrap
= sampler
->dw_wrap
;
2429 case PIPE_TEXTURE_CUBE
:
2430 dw_filter
= (sampler
->anisotropic
) ?
2431 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2432 dw_wrap
= sampler
->dw_wrap_cube
;
2435 dw_filter
= (sampler
->anisotropic
) ?
2436 sampler
->dw_filter_aniso
: sampler
->dw_filter
;
2437 dw_wrap
= sampler
->dw_wrap
;
2441 dw
[0] = sampler
->payload
[0];
2442 dw
[1] = sampler
->payload
[1];
2443 assert(!(border_color
& 0x1f));
2444 dw
[2] = border_color
;
2445 dw
[3] = sampler
->payload
[2];
2449 if (dev
->gen
>= ILO_GEN(7)) {
2454 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2456 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2457 * where the min and mag mode filters are different and
2458 * SurfMinLOD is nonzero. The determination of MagMode uses the
2459 * following equation instead of the one in the above
2460 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2462 * As a way to work around that, we set Base to
2463 * view->u.tex.first_level.
2465 dw
[0] |= view
->u
.tex
.first_level
<< 22;
2473 return state_offset
;
2476 static inline uint32_t
2477 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info
*dev
,
2478 const struct ilo_sampler_cso
*sampler
,
2481 const int state_align
= 32 / 4;
2482 const int state_len
= (dev
->gen
>= ILO_GEN(7)) ? 4 : 12;
2483 uint32_t state_offset
, *dw
;
2485 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2487 dw
= ilo_cp_steal_ptr(cp
, "SAMPLER_BORDER_COLOR_STATE",
2488 state_len
, state_align
, &state_offset
);
2490 /* see ilo_gpe_init_sampler_cso() */
2491 memcpy(dw
, &sampler
->payload
[3], state_len
* 4);
2493 return state_offset
;
2496 static inline uint32_t
2497 gen6_emit_push_constant_buffer(const struct ilo_dev_info
*dev
,
2498 int size
, void **pcb
,
2502 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2503 * to 32 bytes, and their sizes are specified in 256-bit units.
2505 const int state_align
= 32 / 4;
2506 const int state_len
= align(size
, 32) / 4;
2507 uint32_t state_offset
;
2510 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2512 buf
= ilo_cp_steal_ptr(cp
, "PUSH_CONSTANT_BUFFER",
2513 state_len
, state_align
, &state_offset
);
2515 /* zero out the unused range */
2516 if (size
< state_len
* 4)
2517 memset(&buf
[size
], 0, state_len
* 4 - size
);
2522 return state_offset
;
2525 #endif /* ILO_GPE_GEN6_H */