2 * Mesa 3-D graphics library
4 * Copyright (C) 2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN7_H
29 #define ILO_GPE_GEN7_H
31 #include "intel_winsys.h"
33 #include "ilo_common.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen6.h"
40 * Commands that GEN7 GPE could emit.
42 enum ilo_gpe_gen7_command
{
43 ILO_GPE_GEN7_MI_STORE_DATA_IMM
, /* ILO_GPE_MI(0x20) */
44 ILO_GPE_GEN7_MI_LOAD_REGISTER_IMM
, /* ILO_GPE_MI(0x22) */
45 ILO_GPE_GEN7_MI_STORE_REGISTER_MEM
, /* ILO_GPE_MI(0x24) */
46 ILO_GPE_GEN7_MI_REPORT_PERF_COUNT
, /* ILO_GPE_MI(0x28) */
47 ILO_GPE_GEN7_STATE_BASE_ADDRESS
, /* (0x0, 0x1, 0x01) */
48 ILO_GPE_GEN7_STATE_SIP
, /* (0x0, 0x1, 0x02) */
49 ILO_GPE_GEN7_3DSTATE_VF_STATISTICS
, /* (0x1, 0x0, 0x0b) */
50 ILO_GPE_GEN7_PIPELINE_SELECT
, /* (0x1, 0x1, 0x04) */
51 ILO_GPE_GEN7_MEDIA_VFE_STATE
, /* (0x2, 0x0, 0x00) */
52 ILO_GPE_GEN7_MEDIA_CURBE_LOAD
, /* (0x2, 0x0, 0x01) */
53 ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD
, /* (0x2, 0x0, 0x02) */
54 ILO_GPE_GEN7_MEDIA_STATE_FLUSH
, /* (0x2, 0x0, 0x04) */
55 ILO_GPE_GEN7_GPGPU_WALKER
, /* (0x2, 0x1, 0x05) */
56 ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS
, /* (0x3, 0x0, 0x04) */
57 ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER
, /* (0x3, 0x0, 0x05) */
58 ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER
, /* (0x3, 0x0, 0x06) */
59 ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER
, /* (0x3, 0x0, 0x07) */
60 ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS
, /* (0x3, 0x0, 0x08) */
61 ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS
, /* (0x3, 0x0, 0x09) */
62 ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER
, /* (0x3, 0x0, 0x0a) */
63 ILO_GPE_GEN7_3DSTATE_VF
, /* (0x3, 0x0, 0x0c) */
64 ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS
, /* (0x3, 0x0, 0x0e) */
65 ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS
, /* (0x3, 0x0, 0x0f) */
66 ILO_GPE_GEN7_3DSTATE_VS
, /* (0x3, 0x0, 0x10) */
67 ILO_GPE_GEN7_3DSTATE_GS
, /* (0x3, 0x0, 0x11) */
68 ILO_GPE_GEN7_3DSTATE_CLIP
, /* (0x3, 0x0, 0x12) */
69 ILO_GPE_GEN7_3DSTATE_SF
, /* (0x3, 0x0, 0x13) */
70 ILO_GPE_GEN7_3DSTATE_WM
, /* (0x3, 0x0, 0x14) */
71 ILO_GPE_GEN7_3DSTATE_CONSTANT_VS
, /* (0x3, 0x0, 0x15) */
72 ILO_GPE_GEN7_3DSTATE_CONSTANT_GS
, /* (0x3, 0x0, 0x16) */
73 ILO_GPE_GEN7_3DSTATE_CONSTANT_PS
, /* (0x3, 0x0, 0x17) */
74 ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK
, /* (0x3, 0x0, 0x18) */
75 ILO_GPE_GEN7_3DSTATE_CONSTANT_HS
, /* (0x3, 0x0, 0x19) */
76 ILO_GPE_GEN7_3DSTATE_CONSTANT_DS
, /* (0x3, 0x0, 0x1a) */
77 ILO_GPE_GEN7_3DSTATE_HS
, /* (0x3, 0x0, 0x1b) */
78 ILO_GPE_GEN7_3DSTATE_TE
, /* (0x3, 0x0, 0x1c) */
79 ILO_GPE_GEN7_3DSTATE_DS
, /* (0x3, 0x0, 0x1d) */
80 ILO_GPE_GEN7_3DSTATE_STREAMOUT
, /* (0x3, 0x0, 0x1e) */
81 ILO_GPE_GEN7_3DSTATE_SBE
, /* (0x3, 0x0, 0x1f) */
82 ILO_GPE_GEN7_3DSTATE_PS
, /* (0x3, 0x0, 0x20) */
83 ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
, /* (0x3, 0x0, 0x21) */
84 ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC
, /* (0x3, 0x0, 0x23) */
85 ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS
, /* (0x3, 0x0, 0x24) */
86 ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS
, /* (0x3, 0x0, 0x25) */
87 ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS
, /* (0x3, 0x0, 0x26) */
88 ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS
, /* (0x3, 0x0, 0x27) */
89 ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS
, /* (0x3, 0x0, 0x28) */
90 ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS
, /* (0x3, 0x0, 0x29) */
91 ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS
, /* (0x3, 0x0, 0x2a) */
92 ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS
, /* (0x3, 0x0, 0x2b) */
93 ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS
, /* (0x3, 0x0, 0x2c) */
94 ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS
, /* (0x3, 0x0, 0x2d) */
95 ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS
, /* (0x3, 0x0, 0x2e) */
96 ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS
, /* (0x3, 0x0, 0x2f) */
97 ILO_GPE_GEN7_3DSTATE_URB_VS
, /* (0x3, 0x0, 0x30) */
98 ILO_GPE_GEN7_3DSTATE_URB_HS
, /* (0x3, 0x0, 0x31) */
99 ILO_GPE_GEN7_3DSTATE_URB_DS
, /* (0x3, 0x0, 0x32) */
100 ILO_GPE_GEN7_3DSTATE_URB_GS
, /* (0x3, 0x0, 0x33) */
101 ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE
, /* (0x3, 0x1, 0x00) */
102 ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET
, /* (0x3, 0x1, 0x06) */
103 ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN
, /* (0x3, 0x1, 0x07) */
104 ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE
, /* (0x3, 0x1, 0x08) */
105 ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS
, /* (0x3, 0x1, 0x0a) */
106 ILO_GPE_GEN7_3DSTATE_MULTISAMPLE
, /* (0x3, 0x1, 0x0d) */
107 ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS
, /* (0x3, 0x1, 0x12) */
108 ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS
, /* (0x3, 0x1, 0x13) */
109 ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS
, /* (0x3, 0x1, 0x14) */
110 ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS
, /* (0x3, 0x1, 0x15) */
111 ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS
, /* (0x3, 0x1, 0x16) */
112 ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST
, /* (0x3, 0x1, 0x17) */
113 ILO_GPE_GEN7_3DSTATE_SO_BUFFER
, /* (0x3, 0x1, 0x18) */
114 ILO_GPE_GEN7_PIPE_CONTROL
, /* (0x3, 0x2, 0x00) */
115 ILO_GPE_GEN7_3DPRIMITIVE
, /* (0x3, 0x3, 0x00) */
117 ILO_GPE_GEN7_COMMAND_COUNT
,
121 * Indirect states that GEN7 GPE could emit.
123 enum ilo_gpe_gen7_state
{
124 ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA
,
125 ILO_GPE_GEN7_SF_CLIP_VIEWPORT
,
126 ILO_GPE_GEN7_CC_VIEWPORT
,
127 ILO_GPE_GEN7_COLOR_CALC_STATE
,
128 ILO_GPE_GEN7_BLEND_STATE
,
129 ILO_GPE_GEN7_DEPTH_STENCIL_STATE
,
130 ILO_GPE_GEN7_SCISSOR_RECT
,
131 ILO_GPE_GEN7_BINDING_TABLE_STATE
,
132 ILO_GPE_GEN7_SURFACE_STATE
,
133 ILO_GPE_GEN7_SAMPLER_STATE
,
134 ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE
,
135 ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER
,
137 ILO_GPE_GEN7_STATE_COUNT
,
141 ilo_gpe_gen7_estimate_command_size(const struct ilo_dev_info
*dev
,
142 enum ilo_gpe_gen7_command cmd
,
146 ilo_gpe_gen7_estimate_state_size(const struct ilo_dev_info
*dev
,
147 enum ilo_gpe_gen7_state state
,
151 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info
*dev
,
154 assert(!"GPGPU_WALKER unsupported");
158 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
162 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x04);
163 const uint8_t cmd_len
= 3;
165 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
167 ilo_cp_begin(cp
, cmd_len
);
168 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
169 ilo_cp_write(cp
, clear_val
);
175 gen7_emit_3DSTATE_VF(const struct ilo_dev_info
*dev
,
176 bool enable_cut_index
,
180 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0c);
181 const uint8_t cmd_len
= 2;
183 ILO_GPE_VALID_GEN(dev
, 7.5, 7.5);
185 ilo_cp_begin(cp
, cmd_len
);
186 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
187 ((enable_cut_index
) ? GEN75_VF_DW0_CUT_INDEX_ENABLE
: 0));
188 ilo_cp_write(cp
, cut_index
);
193 gen7_emit_3dstate_pointer(const struct ilo_dev_info
*dev
,
194 int subop
, uint32_t pointer
,
197 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
198 const uint8_t cmd_len
= 2;
200 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
202 ilo_cp_begin(cp
, cmd_len
);
203 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
204 ilo_cp_write(cp
, pointer
);
209 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
210 uint32_t color_calc_state
,
213 gen7_emit_3dstate_pointer(dev
, 0x0e, color_calc_state
, cp
);
217 gen7_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
218 const struct ilo_shader_state
*gs
,
222 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
223 const uint8_t cmd_len
= 7;
224 const struct ilo_shader_cso
*cso
;
225 uint32_t dw2
, dw4
, dw5
;
227 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
230 ilo_cp_begin(cp
, cmd_len
);
231 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
236 ilo_cp_write(cp
, GEN7_GS_DW5_STATISTICS
);
242 cso
= ilo_shader_get_kernel_cso(gs
);
243 dw2
= cso
->payload
[0];
244 dw4
= cso
->payload
[1];
245 dw5
= cso
->payload
[2];
247 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
249 ilo_cp_begin(cp
, cmd_len
);
250 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
251 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(gs
));
252 ilo_cp_write(cp
, dw2
);
253 ilo_cp_write(cp
, 0); /* scratch */
254 ilo_cp_write(cp
, dw4
);
255 ilo_cp_write(cp
, dw5
);
261 gen7_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
262 const struct ilo_rasterizer_state
*rasterizer
,
263 enum pipe_format zs_format
,
266 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
267 const uint8_t cmd_len
= 7;
268 const int num_samples
= 1;
271 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
273 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
,
274 rasterizer
, num_samples
, zs_format
,
275 payload
, Elements(payload
));
277 ilo_cp_begin(cp
, cmd_len
);
278 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
279 ilo_cp_write_multi(cp
, payload
, 6);
284 gen7_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
285 const struct ilo_shader_state
*fs
,
286 const struct ilo_rasterizer_state
*rasterizer
,
287 bool cc_may_kill
, uint32_t hiz_op
,
290 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
291 const uint8_t cmd_len
= 3;
292 const int num_samples
= 1;
295 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
297 /* see ilo_gpe_init_rasterizer_wm() */
299 dw1
= rasterizer
->wm
.payload
[0];
300 dw2
= rasterizer
->wm
.payload
[1];
303 dw1
|= GEN7_WM_DW1_STATISTICS
;
311 const struct ilo_shader_cso
*fs_cso
= ilo_shader_get_kernel_cso(fs
);
313 dw1
|= fs_cso
->payload
[3];
317 dw1
|= GEN7_WM_DW1_PS_ENABLE
| GEN7_WM_DW1_PS_KILL
;
319 if (num_samples
> 1) {
320 dw1
|= rasterizer
->wm
.dw_msaa_rast
;
321 dw2
|= rasterizer
->wm
.dw_msaa_disp
;
324 ilo_cp_begin(cp
, cmd_len
);
325 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
326 ilo_cp_write(cp
, dw1
);
327 ilo_cp_write(cp
, dw2
);
332 gen7_emit_3dstate_constant(const struct ilo_dev_info
*dev
,
334 const uint32_t *bufs
, const int *sizes
,
338 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
339 const uint8_t cmd_len
= 7;
341 int total_read_length
, i
;
343 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
345 /* VS, HS, DS, GS, and PS variants */
346 assert(subop
>= 0x15 && subop
<= 0x1a && subop
!= 0x18);
348 assert(num_bufs
<= 4);
353 total_read_length
= 0;
354 for (i
= 0; i
< 4; i
++) {
358 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
360 * "Constant buffers must be enabled in order from Constant Buffer 0
361 * to Constant Buffer 3 within this command. For example, it is
362 * not allowed to enable Constant Buffer 1 by programming a
363 * non-zero value in the VS Constant Buffer 1 Read Length without a
364 * non-zero value in VS Constant Buffer 0 Read Length."
366 if (i
>= num_bufs
|| !sizes
[i
]) {
368 assert(i
>= num_bufs
|| !sizes
[i
]);
374 /* read lengths are in 256-bit units */
375 read_len
= (sizes
[i
] + 31) / 32;
376 /* the lower 5 bits are used for memory object control state */
377 assert(bufs
[i
] % 32 == 0);
379 dw
[i
/ 2] |= read_len
<< ((i
% 2) ? 16 : 0);
382 total_read_length
+= read_len
;
386 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
388 * "The sum of all four read length fields must be less than or equal
391 assert(total_read_length
<= 64);
393 ilo_cp_begin(cp
, cmd_len
);
394 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
395 ilo_cp_write_multi(cp
, dw
, 6);
400 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
401 const uint32_t *bufs
, const int *sizes
,
405 gen7_emit_3dstate_constant(dev
, 0x15, bufs
, sizes
, num_bufs
, cp
);
409 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
410 const uint32_t *bufs
, const int *sizes
,
414 gen7_emit_3dstate_constant(dev
, 0x16, bufs
, sizes
, num_bufs
, cp
);
418 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
419 const uint32_t *bufs
, const int *sizes
,
423 gen7_emit_3dstate_constant(dev
, 0x17, bufs
, sizes
, num_bufs
, cp
);
427 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
428 unsigned sample_mask
,
432 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
433 const uint8_t cmd_len
= 2;
434 const unsigned valid_mask
= ((1 << num_samples
) - 1) | 0x1;
436 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
439 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
441 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
442 * (Sample Mask) must be zero.
444 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
447 sample_mask
&= valid_mask
;
449 ilo_cp_begin(cp
, cmd_len
);
450 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
451 ilo_cp_write(cp
, sample_mask
);
456 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info
*dev
,
457 const uint32_t *bufs
, const int *sizes
,
461 gen7_emit_3dstate_constant(dev
, 0x19, bufs
, sizes
, num_bufs
, cp
);
465 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info
*dev
,
466 const uint32_t *bufs
, const int *sizes
,
470 gen7_emit_3dstate_constant(dev
, 0x1a, bufs
, sizes
, num_bufs
, cp
);
474 gen7_emit_3DSTATE_HS(const struct ilo_dev_info
*dev
,
475 const struct ilo_shader_state
*hs
,
479 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1b);
480 const uint8_t cmd_len
= 7;
482 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
486 ilo_cp_begin(cp
, cmd_len
);
487 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
498 gen7_emit_3DSTATE_TE(const struct ilo_dev_info
*dev
,
501 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1c);
502 const uint8_t cmd_len
= 4;
504 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
506 ilo_cp_begin(cp
, cmd_len
);
507 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
515 gen7_emit_3DSTATE_DS(const struct ilo_dev_info
*dev
,
516 const struct ilo_shader_state
*ds
,
520 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1d);
521 const uint8_t cmd_len
= 6;
523 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
527 ilo_cp_begin(cp
, cmd_len
);
528 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
539 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info
*dev
,
540 unsigned buffer_mask
,
541 int vertex_attrib_count
,
542 bool rasterizer_discard
,
545 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1e);
546 const uint8_t cmd_len
= 3;
547 const bool enable
= (buffer_mask
!= 0);
551 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
554 dw1
= 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
;
555 if (rasterizer_discard
)
556 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
560 ilo_cp_begin(cp
, cmd_len
);
561 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
562 ilo_cp_write(cp
, dw1
);
563 ilo_cp_write(cp
, dw2
);
568 read_len
= (vertex_attrib_count
+ 1) / 2;
572 dw1
= GEN7_SO_DW1_SO_ENABLE
|
573 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
|
574 GEN7_SO_DW1_STATISTICS
|
577 if (rasterizer_discard
)
578 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
582 dw1
|= GEN7_SO_DW1_REORDER_TRAILING
;
584 dw2
= 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT
|
585 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT
|
586 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT
|
587 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT
|
588 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT
|
589 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT
|
590 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT
|
591 (read_len
- 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT
;
593 ilo_cp_begin(cp
, cmd_len
);
594 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
595 ilo_cp_write(cp
, dw1
);
596 ilo_cp_write(cp
, dw2
);
601 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info
*dev
,
602 const struct ilo_rasterizer_state
*rasterizer
,
603 const struct ilo_shader_state
*fs
,
606 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1f);
607 const uint8_t cmd_len
= 14;
610 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
612 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
, fs
, dw
, Elements(dw
));
614 ilo_cp_begin(cp
, cmd_len
);
615 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
616 ilo_cp_write_multi(cp
, dw
, 13);
621 gen7_emit_3DSTATE_PS(const struct ilo_dev_info
*dev
,
622 const struct ilo_shader_state
*fs
,
623 int num_samplers
, bool dual_blend
,
626 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x20);
627 const uint8_t cmd_len
= 8;
628 const struct ilo_shader_cso
*cso
;
629 uint32_t dw2
, dw4
, dw5
;
631 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
636 /* GPU hangs if none of the dispatch enable bits is set */
637 dw4
= GEN7_PS_DW4_8_PIXEL_DISPATCH
;
639 /* see brwCreateContext() */
642 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
643 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
647 max_threads
= (dev
->gt
== 2) ? 172 : 48;
648 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
652 ilo_cp_begin(cp
, cmd_len
);
653 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
657 ilo_cp_write(cp
, dw4
);
666 cso
= ilo_shader_get_kernel_cso(fs
);
667 dw2
= cso
->payload
[0];
668 dw4
= cso
->payload
[1];
669 dw5
= cso
->payload
[2];
671 dw2
|= (num_samplers
+ 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
674 dw4
|= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
676 ilo_cp_begin(cp
, cmd_len
);
677 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
678 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
679 ilo_cp_write(cp
, dw2
);
680 ilo_cp_write(cp
, 0); /* scratch */
681 ilo_cp_write(cp
, dw4
);
682 ilo_cp_write(cp
, dw5
);
683 ilo_cp_write(cp
, 0); /* kernel 1 */
684 ilo_cp_write(cp
, 0); /* kernel 2 */
689 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info
*dev
,
690 uint32_t sf_clip_viewport
,
693 gen7_emit_3dstate_pointer(dev
, 0x21, sf_clip_viewport
, cp
);
697 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info
*dev
,
698 uint32_t cc_viewport
,
701 gen7_emit_3dstate_pointer(dev
, 0x23, cc_viewport
, cp
);
705 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info
*dev
,
706 uint32_t blend_state
,
709 gen7_emit_3dstate_pointer(dev
, 0x24, blend_state
, cp
);
713 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info
*dev
,
714 uint32_t depth_stencil_state
,
717 gen7_emit_3dstate_pointer(dev
, 0x25, depth_stencil_state
, cp
);
721 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info
*dev
,
722 uint32_t binding_table
,
725 gen7_emit_3dstate_pointer(dev
, 0x26, binding_table
, cp
);
729 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info
*dev
,
730 uint32_t binding_table
,
733 gen7_emit_3dstate_pointer(dev
, 0x27, binding_table
, cp
);
737 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info
*dev
,
738 uint32_t binding_table
,
741 gen7_emit_3dstate_pointer(dev
, 0x28, binding_table
, cp
);
745 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info
*dev
,
746 uint32_t binding_table
,
749 gen7_emit_3dstate_pointer(dev
, 0x29, binding_table
, cp
);
753 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info
*dev
,
754 uint32_t binding_table
,
757 gen7_emit_3dstate_pointer(dev
, 0x2a, binding_table
, cp
);
761 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info
*dev
,
762 uint32_t sampler_state
,
765 gen7_emit_3dstate_pointer(dev
, 0x2b, sampler_state
, cp
);
769 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info
*dev
,
770 uint32_t sampler_state
,
773 gen7_emit_3dstate_pointer(dev
, 0x2c, sampler_state
, cp
);
777 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info
*dev
,
778 uint32_t sampler_state
,
781 gen7_emit_3dstate_pointer(dev
, 0x2d, sampler_state
, cp
);
785 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info
*dev
,
786 uint32_t sampler_state
,
789 gen7_emit_3dstate_pointer(dev
, 0x2e, sampler_state
, cp
);
793 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info
*dev
,
794 uint32_t sampler_state
,
797 gen7_emit_3dstate_pointer(dev
, 0x2f, sampler_state
, cp
);
801 gen7_emit_3dstate_urb(const struct ilo_dev_info
*dev
,
802 int subop
, int offset
, int size
,
806 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
807 const uint8_t cmd_len
= 2;
808 const int row_size
= 64; /* 512 bits */
809 int alloc_size
, num_entries
, min_entries
, max_entries
;
811 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
813 /* VS, HS, DS, and GS variants */
814 assert(subop
>= 0x30 && subop
<= 0x33);
816 /* in multiples of 8KB */
817 assert(offset
% 8192 == 0);
820 /* in multiple of 512-bit rows */
821 alloc_size
= (entry_size
+ row_size
- 1) / row_size
;
826 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
828 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
829 * cause performance to decrease due to banking in the URB. Element
830 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
832 if (subop
== 0x30 && alloc_size
== 5)
835 /* in multiples of 8 */
836 num_entries
= (size
/ row_size
/ alloc_size
) & ~7;
839 case 0x30: /* 3DSTATE_URB_VS */
844 max_entries
= (dev
->gt
>= 2) ? 1644 : 640;
848 max_entries
= (dev
->gt
== 2) ? 704 : 512;
852 assert(num_entries
>= min_entries
);
853 if (num_entries
> max_entries
)
854 num_entries
= max_entries
;
856 case 0x31: /* 3DSTATE_URB_HS */
857 max_entries
= (dev
->gt
== 2) ? 64 : 32;
858 if (num_entries
> max_entries
)
859 num_entries
= max_entries
;
861 case 0x32: /* 3DSTATE_URB_DS */
863 assert(num_entries
>= 138);
865 case 0x33: /* 3DSTATE_URB_GS */
868 max_entries
= (dev
->gt
>= 2) ? 640 : 256;
872 max_entries
= (dev
->gt
== 2) ? 320 : 192;
876 if (num_entries
> max_entries
)
877 num_entries
= max_entries
;
883 ilo_cp_begin(cp
, cmd_len
);
884 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
885 ilo_cp_write(cp
, offset
<< GEN7_URB_ANY_DW1_OFFSET__SHIFT
|
886 (alloc_size
- 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT
|
892 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info
*dev
,
893 int offset
, int size
, int entry_size
,
896 gen7_emit_3dstate_urb(dev
, 0x30, offset
, size
, entry_size
, cp
);
900 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info
*dev
,
901 int offset
, int size
, int entry_size
,
904 gen7_emit_3dstate_urb(dev
, 0x31, offset
, size
, entry_size
, cp
);
908 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info
*dev
,
909 int offset
, int size
, int entry_size
,
912 gen7_emit_3dstate_urb(dev
, 0x32, offset
, size
, entry_size
, cp
);
916 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info
*dev
,
917 int offset
, int size
, int entry_size
,
920 gen7_emit_3dstate_urb(dev
, 0x33, offset
, size
, entry_size
, cp
);
924 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info
*dev
,
925 int subop
, int offset
, int size
,
928 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, subop
);
929 const uint8_t cmd_len
= 2;
932 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
934 /* VS, HS, DS, GS, and PS variants */
935 assert(subop
>= 0x12 && subop
<= 0x16);
938 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
940 * "(A table that says the maximum size of each constant buffer is
943 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
945 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
946 * may not exceed the maximum value of the Constant Buffer Size."
948 * Thus, the valid range of buffer end is [0KB, 16KB].
950 end
= (offset
+ size
) / 1024;
952 assert(!"invalid constant buffer end");
956 /* the valid range of buffer offset is [0KB, 15KB] */
957 offset
= (offset
+ 1023) / 1024;
959 assert(!"invalid constant buffer offset");
968 /* the valid range of buffer size is [0KB, 15KB] */
971 assert(!"invalid constant buffer size");
975 ilo_cp_begin(cp
, cmd_len
);
976 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
977 ilo_cp_write(cp
, offset
<< GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT
|
983 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info
*dev
,
984 int offset
, int size
,
987 gen7_emit_3dstate_push_constant_alloc(dev
, 0x12, offset
, size
, cp
);
991 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info
*dev
,
992 int offset
, int size
,
995 gen7_emit_3dstate_push_constant_alloc(dev
, 0x13, offset
, size
, cp
);
999 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info
*dev
,
1000 int offset
, int size
,
1003 gen7_emit_3dstate_push_constant_alloc(dev
, 0x14, offset
, size
, cp
);
1007 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info
*dev
,
1008 int offset
, int size
,
1011 gen7_emit_3dstate_push_constant_alloc(dev
, 0x15, offset
, size
, cp
);
1015 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info
*dev
,
1016 int offset
, int size
,
1019 gen7_emit_3dstate_push_constant_alloc(dev
, 0x16, offset
, size
, cp
);
1023 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info
*dev
,
1024 const struct pipe_stream_output_info
*so_info
,
1027 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x17);
1029 int buffer_selects
, num_entries
, i
;
1030 uint16_t so_decls
[128];
1032 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1038 int buffer_offsets
[PIPE_MAX_SO_BUFFERS
];
1040 memset(buffer_offsets
, 0, sizeof(buffer_offsets
));
1042 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
1043 unsigned decl
, buf
, reg
, mask
;
1045 buf
= so_info
->output
[i
].output_buffer
;
1047 /* pad with holes */
1048 assert(buffer_offsets
[buf
] <= so_info
->output
[i
].dst_offset
);
1049 while (buffer_offsets
[buf
] < so_info
->output
[i
].dst_offset
) {
1052 num_dwords
= so_info
->output
[i
].dst_offset
- buffer_offsets
[buf
];
1056 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
1057 GEN7_SO_DECL_HOLE_FLAG
|
1058 ((1 << num_dwords
) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
1060 so_decls
[num_entries
++] = decl
;
1061 buffer_offsets
[buf
] += num_dwords
;
1064 reg
= so_info
->output
[i
].register_index
;
1065 mask
= ((1 << so_info
->output
[i
].num_components
) - 1) <<
1066 so_info
->output
[i
].start_component
;
1068 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
1069 reg
<< GEN7_SO_DECL_REG_INDEX__SHIFT
|
1070 mask
<< GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
1072 so_decls
[num_entries
++] = decl
;
1073 buffer_selects
|= 1 << buf
;
1074 buffer_offsets
[buf
] += so_info
->output
[i
].num_components
;
1079 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1081 * "Errata: All 128 decls for all four streams must be included
1082 * whenever this command is issued. The "Num Entries [n]" fields still
1083 * contain the actual numbers of valid decls."
1085 * Also note that "DWord Length" has 9 bits for this command, and the type
1086 * of cmd_len is thus uint16_t.
1088 cmd_len
= 2 * 128 + 3;
1090 ilo_cp_begin(cp
, cmd_len
);
1091 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1092 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT
|
1093 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT
|
1094 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT
|
1095 buffer_selects
<< GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT
);
1096 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT
|
1097 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT
|
1098 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT
|
1099 num_entries
<< GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT
);
1101 for (i
= 0; i
< num_entries
; i
++) {
1102 ilo_cp_write(cp
, so_decls
[i
]);
1103 ilo_cp_write(cp
, 0);
1105 for (; i
< 128; i
++) {
1106 ilo_cp_write(cp
, 0);
1107 ilo_cp_write(cp
, 0);
1114 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info
*dev
,
1115 int index
, int base
, int stride
,
1116 const struct pipe_stream_output_target
*so_target
,
1119 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x18);
1120 const uint8_t cmd_len
= 4;
1121 struct ilo_buffer
*buf
;
1124 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1126 if (!so_target
|| !so_target
->buffer
) {
1127 ilo_cp_begin(cp
, cmd_len
);
1128 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1129 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
);
1130 ilo_cp_write(cp
, 0);
1131 ilo_cp_write(cp
, 0);
1136 buf
= ilo_buffer(so_target
->buffer
);
1139 assert(stride
% 4 == 0 && base
% 4 == 0);
1140 assert(so_target
->buffer_offset
% 4 == 0);
1143 base
= (base
+ so_target
->buffer_offset
) & ~3;
1144 end
= (base
+ so_target
->buffer_size
) & ~3;
1146 ilo_cp_begin(cp
, cmd_len
);
1147 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1148 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
|
1150 ilo_cp_write_bo(cp
, base
, buf
->bo
, INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1151 ilo_cp_write_bo(cp
, end
, buf
->bo
, INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1156 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1157 const struct pipe_draw_info
*info
,
1158 const struct ilo_ib_state
*ib
,
1162 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1163 const uint8_t cmd_len
= 7;
1164 const int prim
= (rectlist
) ?
1165 GEN6_3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1166 const int vb_access
= (info
->indexed
) ?
1167 GEN7_3DPRIM_DW1_ACCESS_RANDOM
:
1168 GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL
;
1169 const uint32_t vb_start
= info
->start
+
1170 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1172 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1174 ilo_cp_begin(cp
, cmd_len
);
1175 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1176 ilo_cp_write(cp
, vb_access
| prim
);
1177 ilo_cp_write(cp
, info
->count
);
1178 ilo_cp_write(cp
, vb_start
);
1179 ilo_cp_write(cp
, info
->instance_count
);
1180 ilo_cp_write(cp
, info
->start_instance
);
1181 ilo_cp_write(cp
, info
->index_bias
);
1185 static inline uint32_t
1186 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1187 const struct ilo_viewport_cso
*viewports
,
1188 unsigned num_viewports
,
1191 const int state_align
= 64 / 4;
1192 const int state_len
= 16 * num_viewports
;
1193 uint32_t state_offset
, *dw
;
1196 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1199 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1201 * "The viewport-specific state used by both the SF and CL units
1202 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1203 * of which contains the DWords described below. The start of each
1204 * element is spaced 16 DWords apart. The location of first element of
1205 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1206 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1208 assert(num_viewports
&& num_viewports
<= 16);
1210 dw
= ilo_cp_steal_ptr(cp
, "SF_CLIP_VIEWPORT",
1211 state_len
, state_align
, &state_offset
);
1213 for (i
= 0; i
< num_viewports
; i
++) {
1214 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1216 dw
[0] = fui(vp
->m00
);
1217 dw
[1] = fui(vp
->m11
);
1218 dw
[2] = fui(vp
->m22
);
1219 dw
[3] = fui(vp
->m30
);
1220 dw
[4] = fui(vp
->m31
);
1221 dw
[5] = fui(vp
->m32
);
1224 dw
[8] = fui(vp
->min_gbx
);
1225 dw
[9] = fui(vp
->max_gbx
);
1226 dw
[10] = fui(vp
->min_gby
);
1227 dw
[11] = fui(vp
->max_gby
);
1236 return state_offset
;
1239 #endif /* ILO_GPE_GEN7_H */