2 * Mesa 3-D graphics library
4 * Copyright (C) 2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN7_H
29 #define ILO_GPE_GEN7_H
31 #include "intel_winsys.h"
33 #include "ilo_common.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen6.h"
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info
*dev
,
43 assert(!"GPGPU_WALKER unsupported");
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
51 const uint8_t cmd_len
= 3;
52 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_CLEAR_PARAMS
) |
55 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
57 ilo_cp_begin(cp
, cmd_len
);
58 ilo_cp_write(cp
, dw0
);
59 ilo_cp_write(cp
, clear_val
);
65 gen7_emit_3DSTATE_VF(const struct ilo_dev_info
*dev
,
66 bool enable_cut_index
,
70 const uint8_t cmd_len
= 2;
71 uint32_t dw0
= GEN75_RENDER_CMD(3D
, 3DSTATE_VF
) | (cmd_len
- 2);
73 ILO_GPE_VALID_GEN(dev
, 7.5, 7.5);
76 dw0
|= GEN75_VF_DW0_CUT_INDEX_ENABLE
;
78 ilo_cp_begin(cp
, cmd_len
);
79 ilo_cp_write(cp
, dw0
);
80 ilo_cp_write(cp
, cut_index
);
85 gen7_emit_3dstate_pointer(const struct ilo_dev_info
*dev
,
86 int subop
, uint32_t pointer
,
89 const uint8_t cmd_len
= 2;
90 const uint32_t dw0
= GEN6_RENDER_TYPE_RENDER
|
91 GEN6_RENDER_SUBTYPE_3D
|
92 subop
| (cmd_len
- 2);
94 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
96 ilo_cp_begin(cp
, cmd_len
);
97 ilo_cp_write(cp
, dw0
);
98 ilo_cp_write(cp
, pointer
);
103 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
104 uint32_t color_calc_state
,
107 gen7_emit_3dstate_pointer(dev
,
108 GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS
, color_calc_state
, cp
);
112 gen7_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
113 const struct ilo_shader_state
*gs
,
117 const uint8_t cmd_len
= 7;
118 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_GS
) | (cmd_len
- 2);
119 const struct ilo_shader_cso
*cso
;
120 uint32_t dw2
, dw4
, dw5
;
122 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
125 ilo_cp_begin(cp
, cmd_len
);
126 ilo_cp_write(cp
, dw0
);
131 ilo_cp_write(cp
, GEN7_GS_DW5_STATISTICS
);
137 cso
= ilo_shader_get_kernel_cso(gs
);
138 dw2
= cso
->payload
[0];
139 dw4
= cso
->payload
[1];
140 dw5
= cso
->payload
[2];
142 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
144 ilo_cp_begin(cp
, cmd_len
);
145 ilo_cp_write(cp
, dw0
);
146 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(gs
));
147 ilo_cp_write(cp
, dw2
);
148 ilo_cp_write(cp
, 0); /* scratch */
149 ilo_cp_write(cp
, dw4
);
150 ilo_cp_write(cp
, dw5
);
156 gen7_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
157 const struct ilo_rasterizer_state
*rasterizer
,
158 enum pipe_format zs_format
,
161 const uint8_t cmd_len
= 7;
162 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SF
) | (cmd_len
- 2);
163 const int num_samples
= 1;
166 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
168 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
,
169 rasterizer
, num_samples
, zs_format
,
170 payload
, Elements(payload
));
172 ilo_cp_begin(cp
, cmd_len
);
173 ilo_cp_write(cp
, dw0
);
174 ilo_cp_write_multi(cp
, payload
, 6);
179 gen7_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
180 const struct ilo_shader_state
*fs
,
181 const struct ilo_rasterizer_state
*rasterizer
,
182 bool cc_may_kill
, uint32_t hiz_op
,
185 const uint8_t cmd_len
= 3;
186 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_WM
) | (cmd_len
- 2);
187 const int num_samples
= 1;
190 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
192 /* see ilo_gpe_init_rasterizer_wm() */
194 dw1
= rasterizer
->wm
.payload
[0];
195 dw2
= rasterizer
->wm
.payload
[1];
198 dw1
|= GEN7_WM_DW1_STATISTICS
;
206 const struct ilo_shader_cso
*fs_cso
= ilo_shader_get_kernel_cso(fs
);
208 dw1
|= fs_cso
->payload
[3];
212 dw1
|= GEN7_WM_DW1_PS_ENABLE
| GEN7_WM_DW1_PS_KILL
;
214 if (num_samples
> 1) {
215 dw1
|= rasterizer
->wm
.dw_msaa_rast
;
216 dw2
|= rasterizer
->wm
.dw_msaa_disp
;
219 ilo_cp_begin(cp
, cmd_len
);
220 ilo_cp_write(cp
, dw0
);
221 ilo_cp_write(cp
, dw1
);
222 ilo_cp_write(cp
, dw2
);
227 gen7_emit_3dstate_constant(const struct ilo_dev_info
*dev
,
229 const uint32_t *bufs
, const int *sizes
,
233 const uint8_t cmd_len
= 7;
234 const uint32_t dw0
= GEN6_RENDER_TYPE_RENDER
|
235 GEN6_RENDER_SUBTYPE_3D
|
236 subop
| (cmd_len
- 2);
238 int total_read_length
, i
;
240 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
242 /* VS, HS, DS, GS, and PS variants */
243 assert(subop
>= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS
&&
244 subop
<= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS
&&
245 subop
!= GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK
);
247 assert(num_bufs
<= 4);
252 total_read_length
= 0;
253 for (i
= 0; i
< 4; i
++) {
257 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
259 * "Constant buffers must be enabled in order from Constant Buffer 0
260 * to Constant Buffer 3 within this command. For example, it is
261 * not allowed to enable Constant Buffer 1 by programming a
262 * non-zero value in the VS Constant Buffer 1 Read Length without a
263 * non-zero value in VS Constant Buffer 0 Read Length."
265 if (i
>= num_bufs
|| !sizes
[i
]) {
267 assert(i
>= num_bufs
|| !sizes
[i
]);
273 /* read lengths are in 256-bit units */
274 read_len
= (sizes
[i
] + 31) / 32;
275 /* the lower 5 bits are used for memory object control state */
276 assert(bufs
[i
] % 32 == 0);
278 dw
[i
/ 2] |= read_len
<< ((i
% 2) ? 16 : 0);
281 total_read_length
+= read_len
;
285 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
287 * "The sum of all four read length fields must be less than or equal
290 assert(total_read_length
<= 64);
292 ilo_cp_begin(cp
, cmd_len
);
293 ilo_cp_write(cp
, dw0
);
294 ilo_cp_write_multi(cp
, dw
, 6);
299 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
300 const uint32_t *bufs
, const int *sizes
,
304 gen7_emit_3dstate_constant(dev
, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS
,
305 bufs
, sizes
, num_bufs
, cp
);
309 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
310 const uint32_t *bufs
, const int *sizes
,
314 gen7_emit_3dstate_constant(dev
, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS
,
315 bufs
, sizes
, num_bufs
, cp
);
319 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
320 const uint32_t *bufs
, const int *sizes
,
324 gen7_emit_3dstate_constant(dev
, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS
,
325 bufs
, sizes
, num_bufs
, cp
);
329 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
330 unsigned sample_mask
,
334 const uint8_t cmd_len
= 2;
335 const unsigned valid_mask
= ((1 << num_samples
) - 1) | 0x1;
336 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DSTATE_SAMPLE_MASK
) |
339 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
342 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
344 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
345 * (Sample Mask) must be zero.
347 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
350 sample_mask
&= valid_mask
;
352 ilo_cp_begin(cp
, cmd_len
);
353 ilo_cp_write(cp
, dw0
);
354 ilo_cp_write(cp
, sample_mask
);
359 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info
*dev
,
360 const uint32_t *bufs
, const int *sizes
,
364 gen7_emit_3dstate_constant(dev
, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS
,
365 bufs
, sizes
, num_bufs
, cp
);
369 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info
*dev
,
370 const uint32_t *bufs
, const int *sizes
,
374 gen7_emit_3dstate_constant(dev
, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS
,
375 bufs
, sizes
, num_bufs
, cp
);
379 gen7_emit_3DSTATE_HS(const struct ilo_dev_info
*dev
,
380 const struct ilo_shader_state
*hs
,
384 const uint8_t cmd_len
= 7;
385 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_HS
) | (cmd_len
- 2);
387 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
391 ilo_cp_begin(cp
, cmd_len
);
392 ilo_cp_write(cp
, dw0
);
403 gen7_emit_3DSTATE_TE(const struct ilo_dev_info
*dev
,
406 const uint8_t cmd_len
= 4;
407 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_TE
) | (cmd_len
- 2);
409 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
411 ilo_cp_begin(cp
, cmd_len
);
412 ilo_cp_write(cp
, dw0
);
420 gen7_emit_3DSTATE_DS(const struct ilo_dev_info
*dev
,
421 const struct ilo_shader_state
*ds
,
425 const uint8_t cmd_len
= 6;
426 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_DS
) | (cmd_len
- 2);
428 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
432 ilo_cp_begin(cp
, cmd_len
);
433 ilo_cp_write(cp
, dw0
);
444 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info
*dev
,
445 unsigned buffer_mask
,
446 int vertex_attrib_count
,
447 bool rasterizer_discard
,
450 const uint8_t cmd_len
= 3;
451 const bool enable
= (buffer_mask
!= 0);
452 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_STREAMOUT
) |
457 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
460 dw1
= 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
;
461 if (rasterizer_discard
)
462 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
466 ilo_cp_begin(cp
, cmd_len
);
467 ilo_cp_write(cp
, dw0
);
468 ilo_cp_write(cp
, dw1
);
469 ilo_cp_write(cp
, dw2
);
474 read_len
= (vertex_attrib_count
+ 1) / 2;
478 dw1
= GEN7_SO_DW1_SO_ENABLE
|
479 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
|
480 GEN7_SO_DW1_STATISTICS
|
483 if (rasterizer_discard
)
484 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
488 dw1
|= GEN7_SO_DW1_REORDER_TRAILING
;
490 dw2
= 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT
|
491 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT
|
492 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT
|
493 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT
|
494 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT
|
495 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT
|
496 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT
|
497 (read_len
- 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT
;
499 ilo_cp_begin(cp
, cmd_len
);
500 ilo_cp_write(cp
, dw0
);
501 ilo_cp_write(cp
, dw1
);
502 ilo_cp_write(cp
, dw2
);
507 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info
*dev
,
508 const struct ilo_rasterizer_state
*rasterizer
,
509 const struct ilo_shader_state
*fs
,
512 const uint8_t cmd_len
= 14;
513 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_SBE
) | (cmd_len
- 2);
516 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
518 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
, fs
, dw
, Elements(dw
));
520 ilo_cp_begin(cp
, cmd_len
);
521 ilo_cp_write(cp
, dw0
);
522 ilo_cp_write_multi(cp
, dw
, 13);
527 gen7_emit_3DSTATE_PS(const struct ilo_dev_info
*dev
,
528 const struct ilo_shader_state
*fs
,
529 int num_samplers
, bool dual_blend
,
532 const uint8_t cmd_len
= 8;
533 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_PS
) | (cmd_len
- 2);
534 const struct ilo_shader_cso
*cso
;
535 uint32_t dw2
, dw4
, dw5
;
537 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
542 /* GPU hangs if none of the dispatch enable bits is set */
543 dw4
= GEN7_PS_DW4_8_PIXEL_DISPATCH
;
545 /* see brwCreateContext() */
548 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
549 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
553 max_threads
= (dev
->gt
== 2) ? 172 : 48;
554 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
558 ilo_cp_begin(cp
, cmd_len
);
559 ilo_cp_write(cp
, dw0
);
563 ilo_cp_write(cp
, dw4
);
572 cso
= ilo_shader_get_kernel_cso(fs
);
573 dw2
= cso
->payload
[0];
574 dw4
= cso
->payload
[1];
575 dw5
= cso
->payload
[2];
577 dw2
|= (num_samplers
+ 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
580 dw4
|= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
582 ilo_cp_begin(cp
, cmd_len
);
583 ilo_cp_write(cp
, dw0
);
584 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
585 ilo_cp_write(cp
, dw2
);
586 ilo_cp_write(cp
, 0); /* scratch */
587 ilo_cp_write(cp
, dw4
);
588 ilo_cp_write(cp
, dw5
);
589 ilo_cp_write(cp
, 0); /* kernel 1 */
590 ilo_cp_write(cp
, 0); /* kernel 2 */
595 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info
*dev
,
596 uint32_t sf_clip_viewport
,
599 gen7_emit_3dstate_pointer(dev
,
600 GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
,
601 sf_clip_viewport
, cp
);
605 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info
*dev
,
606 uint32_t cc_viewport
,
609 gen7_emit_3dstate_pointer(dev
,
610 GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC
,
615 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info
*dev
,
616 uint32_t blend_state
,
619 gen7_emit_3dstate_pointer(dev
,
620 GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS
,
625 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info
*dev
,
626 uint32_t depth_stencil_state
,
629 gen7_emit_3dstate_pointer(dev
,
630 GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS
,
631 depth_stencil_state
, cp
);
635 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info
*dev
,
636 uint32_t binding_table
,
639 gen7_emit_3dstate_pointer(dev
,
640 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS
,
645 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info
*dev
,
646 uint32_t binding_table
,
649 gen7_emit_3dstate_pointer(dev
,
650 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS
,
655 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info
*dev
,
656 uint32_t binding_table
,
659 gen7_emit_3dstate_pointer(dev
,
660 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS
,
665 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info
*dev
,
666 uint32_t binding_table
,
669 gen7_emit_3dstate_pointer(dev
,
670 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS
,
675 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info
*dev
,
676 uint32_t binding_table
,
679 gen7_emit_3dstate_pointer(dev
,
680 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS
,
685 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info
*dev
,
686 uint32_t sampler_state
,
689 gen7_emit_3dstate_pointer(dev
,
690 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS
,
695 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info
*dev
,
696 uint32_t sampler_state
,
699 gen7_emit_3dstate_pointer(dev
,
700 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS
,
705 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info
*dev
,
706 uint32_t sampler_state
,
709 gen7_emit_3dstate_pointer(dev
,
710 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS
,
715 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info
*dev
,
716 uint32_t sampler_state
,
719 gen7_emit_3dstate_pointer(dev
,
720 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS
,
725 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info
*dev
,
726 uint32_t sampler_state
,
729 gen7_emit_3dstate_pointer(dev
,
730 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS
,
735 gen7_emit_3dstate_urb(const struct ilo_dev_info
*dev
,
736 int subop
, int offset
, int size
,
740 const uint8_t cmd_len
= 2;
741 const uint32_t dw0
= GEN6_RENDER_TYPE_RENDER
|
742 GEN6_RENDER_SUBTYPE_3D
|
743 subop
| (cmd_len
- 2);
744 const int row_size
= 64; /* 512 bits */
745 int alloc_size
, num_entries
, min_entries
, max_entries
;
747 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
749 /* VS, HS, DS, and GS variants */
750 assert(subop
>= GEN7_RENDER_OPCODE_3DSTATE_URB_VS
&&
751 subop
<= GEN7_RENDER_OPCODE_3DSTATE_URB_GS
);
753 /* in multiples of 8KB */
754 assert(offset
% 8192 == 0);
757 /* in multiple of 512-bit rows */
758 alloc_size
= (entry_size
+ row_size
- 1) / row_size
;
763 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
765 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
766 * cause performance to decrease due to banking in the URB. Element
767 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
769 if (subop
== GEN7_RENDER_OPCODE_3DSTATE_URB_VS
&& alloc_size
== 5)
772 /* in multiples of 8 */
773 num_entries
= (size
/ row_size
/ alloc_size
) & ~7;
776 case GEN7_RENDER_OPCODE_3DSTATE_URB_VS
:
779 max_entries
= (dev
->gt
>= 2) ? 1664 : 640;
780 min_entries
= (dev
->gt
>= 2) ? 64 : 32;
784 max_entries
= (dev
->gt
== 2) ? 704 : 512;
789 assert(num_entries
>= min_entries
);
790 if (num_entries
> max_entries
)
791 num_entries
= max_entries
;
793 case GEN7_RENDER_OPCODE_3DSTATE_URB_HS
:
794 max_entries
= (dev
->gt
== 2) ? 64 : 32;
795 if (num_entries
> max_entries
)
796 num_entries
= max_entries
;
798 case GEN7_RENDER_OPCODE_3DSTATE_URB_DS
:
800 assert(num_entries
>= 138);
802 case GEN7_RENDER_OPCODE_3DSTATE_URB_GS
:
805 max_entries
= (dev
->gt
>= 2) ? 640 : 256;
809 max_entries
= (dev
->gt
== 2) ? 320 : 192;
813 if (num_entries
> max_entries
)
814 num_entries
= max_entries
;
820 ilo_cp_begin(cp
, cmd_len
);
821 ilo_cp_write(cp
, dw0
);
822 ilo_cp_write(cp
, offset
<< GEN7_URB_ANY_DW1_OFFSET__SHIFT
|
823 (alloc_size
- 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT
|
829 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info
*dev
,
830 int offset
, int size
, int entry_size
,
833 gen7_emit_3dstate_urb(dev
, GEN7_RENDER_OPCODE_3DSTATE_URB_VS
,
834 offset
, size
, entry_size
, cp
);
838 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info
*dev
,
839 int offset
, int size
, int entry_size
,
842 gen7_emit_3dstate_urb(dev
, GEN7_RENDER_OPCODE_3DSTATE_URB_HS
,
843 offset
, size
, entry_size
, cp
);
847 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info
*dev
,
848 int offset
, int size
, int entry_size
,
851 gen7_emit_3dstate_urb(dev
, GEN7_RENDER_OPCODE_3DSTATE_URB_DS
,
852 offset
, size
, entry_size
, cp
);
856 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info
*dev
,
857 int offset
, int size
, int entry_size
,
860 gen7_emit_3dstate_urb(dev
, GEN7_RENDER_OPCODE_3DSTATE_URB_GS
,
861 offset
, size
, entry_size
, cp
);
865 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info
*dev
,
866 int subop
, int offset
, int size
,
869 const uint8_t cmd_len
= 2;
870 const uint32_t dw0
= GEN6_RENDER_TYPE_RENDER
|
871 GEN6_RENDER_SUBTYPE_3D
|
872 subop
| (cmd_len
- 2);
875 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
877 /* VS, HS, DS, GS, and PS variants */
878 assert(subop
>= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS
&&
879 subop
<= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS
);
882 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
884 * "(A table that says the maximum size of each constant buffer is
887 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
889 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
890 * may not exceed the maximum value of the Constant Buffer Size."
892 * Thus, the valid range of buffer end is [0KB, 16KB].
894 end
= (offset
+ size
) / 1024;
896 assert(!"invalid constant buffer end");
900 /* the valid range of buffer offset is [0KB, 15KB] */
901 offset
= (offset
+ 1023) / 1024;
903 assert(!"invalid constant buffer offset");
912 /* the valid range of buffer size is [0KB, 15KB] */
915 assert(!"invalid constant buffer size");
919 ilo_cp_begin(cp
, cmd_len
);
920 ilo_cp_write(cp
, dw0
);
921 ilo_cp_write(cp
, offset
<< GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT
|
927 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info
*dev
,
928 int offset
, int size
,
931 gen7_emit_3dstate_push_constant_alloc(dev
,
932 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS
, offset
, size
, cp
);
936 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info
*dev
,
937 int offset
, int size
,
940 gen7_emit_3dstate_push_constant_alloc(dev
,
941 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS
, offset
, size
, cp
);
945 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info
*dev
,
946 int offset
, int size
,
949 gen7_emit_3dstate_push_constant_alloc(dev
,
950 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS
, offset
, size
, cp
);
954 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info
*dev
,
955 int offset
, int size
,
958 gen7_emit_3dstate_push_constant_alloc(dev
,
959 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS
, offset
, size
, cp
);
963 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info
*dev
,
964 int offset
, int size
,
967 gen7_emit_3dstate_push_constant_alloc(dev
,
968 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS
, offset
, size
, cp
);
972 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info
*dev
,
973 const struct pipe_stream_output_info
*so_info
,
978 int buffer_selects
, num_entries
, i
;
979 uint16_t so_decls
[128];
981 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
987 int buffer_offsets
[PIPE_MAX_SO_BUFFERS
];
989 memset(buffer_offsets
, 0, sizeof(buffer_offsets
));
991 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
992 unsigned decl
, buf
, reg
, mask
;
994 buf
= so_info
->output
[i
].output_buffer
;
997 assert(buffer_offsets
[buf
] <= so_info
->output
[i
].dst_offset
);
998 while (buffer_offsets
[buf
] < so_info
->output
[i
].dst_offset
) {
1001 num_dwords
= so_info
->output
[i
].dst_offset
- buffer_offsets
[buf
];
1005 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
1006 GEN7_SO_DECL_HOLE_FLAG
|
1007 ((1 << num_dwords
) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
1009 so_decls
[num_entries
++] = decl
;
1010 buffer_offsets
[buf
] += num_dwords
;
1013 reg
= so_info
->output
[i
].register_index
;
1014 mask
= ((1 << so_info
->output
[i
].num_components
) - 1) <<
1015 so_info
->output
[i
].start_component
;
1017 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
1018 reg
<< GEN7_SO_DECL_REG_INDEX__SHIFT
|
1019 mask
<< GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
1021 so_decls
[num_entries
++] = decl
;
1022 buffer_selects
|= 1 << buf
;
1023 buffer_offsets
[buf
] += so_info
->output
[i
].num_components
;
1028 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1030 * "Errata: All 128 decls for all four streams must be included
1031 * whenever this command is issued. The "Num Entries [n]" fields still
1032 * contain the actual numbers of valid decls."
1034 * Also note that "DWord Length" has 9 bits for this command, and the type
1035 * of cmd_len is thus uint16_t.
1037 cmd_len
= 2 * 128 + 3;
1038 dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_SO_DECL_LIST
) | (cmd_len
- 2);
1040 ilo_cp_begin(cp
, cmd_len
);
1041 ilo_cp_write(cp
, dw0
);
1042 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT
|
1043 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT
|
1044 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT
|
1045 buffer_selects
<< GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT
);
1046 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT
|
1047 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT
|
1048 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT
|
1049 num_entries
<< GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT
);
1051 for (i
= 0; i
< num_entries
; i
++) {
1052 ilo_cp_write(cp
, so_decls
[i
]);
1053 ilo_cp_write(cp
, 0);
1055 for (; i
< 128; i
++) {
1056 ilo_cp_write(cp
, 0);
1057 ilo_cp_write(cp
, 0);
1064 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info
*dev
,
1065 int index
, int base
, int stride
,
1066 const struct pipe_stream_output_target
*so_target
,
1069 const uint8_t cmd_len
= 4;
1070 const uint32_t dw0
= GEN7_RENDER_CMD(3D
, 3DSTATE_SO_BUFFER
) |
1072 struct ilo_buffer
*buf
;
1075 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1077 if (!so_target
|| !so_target
->buffer
) {
1078 ilo_cp_begin(cp
, cmd_len
);
1079 ilo_cp_write(cp
, dw0
);
1080 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
);
1081 ilo_cp_write(cp
, 0);
1082 ilo_cp_write(cp
, 0);
1087 buf
= ilo_buffer(so_target
->buffer
);
1090 assert(stride
% 4 == 0 && base
% 4 == 0);
1091 assert(so_target
->buffer_offset
% 4 == 0);
1094 base
= (base
+ so_target
->buffer_offset
) & ~3;
1095 end
= (base
+ so_target
->buffer_size
) & ~3;
1097 ilo_cp_begin(cp
, cmd_len
);
1098 ilo_cp_write(cp
, dw0
);
1099 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
|
1101 ilo_cp_write_bo(cp
, base
, buf
->bo
, INTEL_RELOC_WRITE
);
1102 ilo_cp_write_bo(cp
, end
, buf
->bo
, INTEL_RELOC_WRITE
);
1107 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1108 const struct pipe_draw_info
*info
,
1109 const struct ilo_ib_state
*ib
,
1113 const uint8_t cmd_len
= 7;
1114 const uint32_t dw0
= GEN6_RENDER_CMD(3D
, 3DPRIMITIVE
) | (cmd_len
- 2);
1115 const int prim
= (rectlist
) ?
1116 GEN6_3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1117 const int vb_access
= (info
->indexed
) ?
1118 GEN7_3DPRIM_DW1_ACCESS_RANDOM
:
1119 GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL
;
1120 const uint32_t vb_start
= info
->start
+
1121 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1123 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1125 ilo_cp_begin(cp
, cmd_len
);
1126 ilo_cp_write(cp
, dw0
);
1127 ilo_cp_write(cp
, vb_access
| prim
);
1128 ilo_cp_write(cp
, info
->count
);
1129 ilo_cp_write(cp
, vb_start
);
1130 ilo_cp_write(cp
, info
->instance_count
);
1131 ilo_cp_write(cp
, info
->start_instance
);
1132 ilo_cp_write(cp
, info
->index_bias
);
1136 static inline uint32_t
1137 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1138 const struct ilo_viewport_cso
*viewports
,
1139 unsigned num_viewports
,
1142 const int state_align
= 64 / 4;
1143 const int state_len
= 16 * num_viewports
;
1144 uint32_t state_offset
, *dw
;
1147 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1150 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1152 * "The viewport-specific state used by both the SF and CL units
1153 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1154 * of which contains the DWords described below. The start of each
1155 * element is spaced 16 DWords apart. The location of first element of
1156 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1157 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1159 assert(num_viewports
&& num_viewports
<= 16);
1161 dw
= ilo_cp_steal_ptr(cp
, ILO_BUILDER_ITEM_SF_VIEWPORT
,
1162 state_len
, state_align
, &state_offset
);
1164 for (i
= 0; i
< num_viewports
; i
++) {
1165 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1167 dw
[0] = fui(vp
->m00
);
1168 dw
[1] = fui(vp
->m11
);
1169 dw
[2] = fui(vp
->m22
);
1170 dw
[3] = fui(vp
->m30
);
1171 dw
[4] = fui(vp
->m31
);
1172 dw
[5] = fui(vp
->m32
);
1175 dw
[8] = fui(vp
->min_gbx
);
1176 dw
[9] = fui(vp
->max_gbx
);
1177 dw
[10] = fui(vp
->min_gby
);
1178 dw
[11] = fui(vp
->max_gby
);
1187 return state_offset
;
1190 #endif /* ILO_GPE_GEN7_H */