2 * Mesa 3-D graphics library
4 * Copyright (C) 2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #ifndef ILO_GPE_GEN7_H
29 #define ILO_GPE_GEN7_H
31 #include "intel_winsys.h"
33 #include "ilo_common.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen6.h"
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info
*dev
,
43 assert(!"GPGPU_WALKER unsupported");
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info
*dev
,
51 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x04);
52 const uint8_t cmd_len
= 3;
54 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
56 ilo_cp_begin(cp
, cmd_len
);
57 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
58 ilo_cp_write(cp
, clear_val
);
64 gen7_emit_3DSTATE_VF(const struct ilo_dev_info
*dev
,
65 bool enable_cut_index
,
69 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x0c);
70 const uint8_t cmd_len
= 2;
72 ILO_GPE_VALID_GEN(dev
, 7.5, 7.5);
74 ilo_cp_begin(cp
, cmd_len
);
75 ilo_cp_write(cp
, cmd
| (cmd_len
- 2) |
76 ((enable_cut_index
) ? GEN75_VF_DW0_CUT_INDEX_ENABLE
: 0));
77 ilo_cp_write(cp
, cut_index
);
82 gen7_emit_3dstate_pointer(const struct ilo_dev_info
*dev
,
83 int subop
, uint32_t pointer
,
86 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
87 const uint8_t cmd_len
= 2;
89 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
91 ilo_cp_begin(cp
, cmd_len
);
92 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
93 ilo_cp_write(cp
, pointer
);
98 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info
*dev
,
99 uint32_t color_calc_state
,
102 gen7_emit_3dstate_pointer(dev
, 0x0e, color_calc_state
, cp
);
106 gen7_emit_3DSTATE_GS(const struct ilo_dev_info
*dev
,
107 const struct ilo_shader_state
*gs
,
111 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x11);
112 const uint8_t cmd_len
= 7;
113 const struct ilo_shader_cso
*cso
;
114 uint32_t dw2
, dw4
, dw5
;
116 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
119 ilo_cp_begin(cp
, cmd_len
);
120 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
125 ilo_cp_write(cp
, GEN7_GS_DW5_STATISTICS
);
131 cso
= ilo_shader_get_kernel_cso(gs
);
132 dw2
= cso
->payload
[0];
133 dw4
= cso
->payload
[1];
134 dw5
= cso
->payload
[2];
136 dw2
|= ((num_samplers
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
138 ilo_cp_begin(cp
, cmd_len
);
139 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
140 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(gs
));
141 ilo_cp_write(cp
, dw2
);
142 ilo_cp_write(cp
, 0); /* scratch */
143 ilo_cp_write(cp
, dw4
);
144 ilo_cp_write(cp
, dw5
);
150 gen7_emit_3DSTATE_SF(const struct ilo_dev_info
*dev
,
151 const struct ilo_rasterizer_state
*rasterizer
,
152 enum pipe_format zs_format
,
155 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x13);
156 const uint8_t cmd_len
= 7;
157 const int num_samples
= 1;
160 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
162 ilo_gpe_gen6_fill_3dstate_sf_raster(dev
,
163 rasterizer
, num_samples
, zs_format
,
164 payload
, Elements(payload
));
166 ilo_cp_begin(cp
, cmd_len
);
167 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
168 ilo_cp_write_multi(cp
, payload
, 6);
173 gen7_emit_3DSTATE_WM(const struct ilo_dev_info
*dev
,
174 const struct ilo_shader_state
*fs
,
175 const struct ilo_rasterizer_state
*rasterizer
,
176 bool cc_may_kill
, uint32_t hiz_op
,
179 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x14);
180 const uint8_t cmd_len
= 3;
181 const int num_samples
= 1;
184 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
186 /* see ilo_gpe_init_rasterizer_wm() */
188 dw1
= rasterizer
->wm
.payload
[0];
189 dw2
= rasterizer
->wm
.payload
[1];
192 dw1
|= GEN7_WM_DW1_STATISTICS
;
200 const struct ilo_shader_cso
*fs_cso
= ilo_shader_get_kernel_cso(fs
);
202 dw1
|= fs_cso
->payload
[3];
206 dw1
|= GEN7_WM_DW1_PS_ENABLE
| GEN7_WM_DW1_PS_KILL
;
208 if (num_samples
> 1) {
209 dw1
|= rasterizer
->wm
.dw_msaa_rast
;
210 dw2
|= rasterizer
->wm
.dw_msaa_disp
;
213 ilo_cp_begin(cp
, cmd_len
);
214 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
215 ilo_cp_write(cp
, dw1
);
216 ilo_cp_write(cp
, dw2
);
221 gen7_emit_3dstate_constant(const struct ilo_dev_info
*dev
,
223 const uint32_t *bufs
, const int *sizes
,
227 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
228 const uint8_t cmd_len
= 7;
230 int total_read_length
, i
;
232 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
234 /* VS, HS, DS, GS, and PS variants */
235 assert(subop
>= 0x15 && subop
<= 0x1a && subop
!= 0x18);
237 assert(num_bufs
<= 4);
242 total_read_length
= 0;
243 for (i
= 0; i
< 4; i
++) {
247 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
249 * "Constant buffers must be enabled in order from Constant Buffer 0
250 * to Constant Buffer 3 within this command. For example, it is
251 * not allowed to enable Constant Buffer 1 by programming a
252 * non-zero value in the VS Constant Buffer 1 Read Length without a
253 * non-zero value in VS Constant Buffer 0 Read Length."
255 if (i
>= num_bufs
|| !sizes
[i
]) {
257 assert(i
>= num_bufs
|| !sizes
[i
]);
263 /* read lengths are in 256-bit units */
264 read_len
= (sizes
[i
] + 31) / 32;
265 /* the lower 5 bits are used for memory object control state */
266 assert(bufs
[i
] % 32 == 0);
268 dw
[i
/ 2] |= read_len
<< ((i
% 2) ? 16 : 0);
271 total_read_length
+= read_len
;
275 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
277 * "The sum of all four read length fields must be less than or equal
280 assert(total_read_length
<= 64);
282 ilo_cp_begin(cp
, cmd_len
);
283 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
284 ilo_cp_write_multi(cp
, dw
, 6);
289 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info
*dev
,
290 const uint32_t *bufs
, const int *sizes
,
294 gen7_emit_3dstate_constant(dev
, 0x15, bufs
, sizes
, num_bufs
, cp
);
298 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info
*dev
,
299 const uint32_t *bufs
, const int *sizes
,
303 gen7_emit_3dstate_constant(dev
, 0x16, bufs
, sizes
, num_bufs
, cp
);
307 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info
*dev
,
308 const uint32_t *bufs
, const int *sizes
,
312 gen7_emit_3dstate_constant(dev
, 0x17, bufs
, sizes
, num_bufs
, cp
);
316 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info
*dev
,
317 unsigned sample_mask
,
321 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x18);
322 const uint8_t cmd_len
= 2;
323 const unsigned valid_mask
= ((1 << num_samples
) - 1) | 0x1;
325 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
328 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
330 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
331 * (Sample Mask) must be zero.
333 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
336 sample_mask
&= valid_mask
;
338 ilo_cp_begin(cp
, cmd_len
);
339 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
340 ilo_cp_write(cp
, sample_mask
);
345 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info
*dev
,
346 const uint32_t *bufs
, const int *sizes
,
350 gen7_emit_3dstate_constant(dev
, 0x19, bufs
, sizes
, num_bufs
, cp
);
354 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info
*dev
,
355 const uint32_t *bufs
, const int *sizes
,
359 gen7_emit_3dstate_constant(dev
, 0x1a, bufs
, sizes
, num_bufs
, cp
);
363 gen7_emit_3DSTATE_HS(const struct ilo_dev_info
*dev
,
364 const struct ilo_shader_state
*hs
,
368 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1b);
369 const uint8_t cmd_len
= 7;
371 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
375 ilo_cp_begin(cp
, cmd_len
);
376 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
387 gen7_emit_3DSTATE_TE(const struct ilo_dev_info
*dev
,
390 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1c);
391 const uint8_t cmd_len
= 4;
393 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
395 ilo_cp_begin(cp
, cmd_len
);
396 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
404 gen7_emit_3DSTATE_DS(const struct ilo_dev_info
*dev
,
405 const struct ilo_shader_state
*ds
,
409 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1d);
410 const uint8_t cmd_len
= 6;
412 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
416 ilo_cp_begin(cp
, cmd_len
);
417 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
428 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info
*dev
,
429 unsigned buffer_mask
,
430 int vertex_attrib_count
,
431 bool rasterizer_discard
,
434 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1e);
435 const uint8_t cmd_len
= 3;
436 const bool enable
= (buffer_mask
!= 0);
440 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
443 dw1
= 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
;
444 if (rasterizer_discard
)
445 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
449 ilo_cp_begin(cp
, cmd_len
);
450 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
451 ilo_cp_write(cp
, dw1
);
452 ilo_cp_write(cp
, dw2
);
457 read_len
= (vertex_attrib_count
+ 1) / 2;
461 dw1
= GEN7_SO_DW1_SO_ENABLE
|
462 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
|
463 GEN7_SO_DW1_STATISTICS
|
466 if (rasterizer_discard
)
467 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
471 dw1
|= GEN7_SO_DW1_REORDER_TRAILING
;
473 dw2
= 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT
|
474 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT
|
475 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT
|
476 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT
|
477 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT
|
478 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT
|
479 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT
|
480 (read_len
- 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT
;
482 ilo_cp_begin(cp
, cmd_len
);
483 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
484 ilo_cp_write(cp
, dw1
);
485 ilo_cp_write(cp
, dw2
);
490 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info
*dev
,
491 const struct ilo_rasterizer_state
*rasterizer
,
492 const struct ilo_shader_state
*fs
,
495 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x1f);
496 const uint8_t cmd_len
= 14;
499 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
501 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev
, rasterizer
, fs
, dw
, Elements(dw
));
503 ilo_cp_begin(cp
, cmd_len
);
504 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
505 ilo_cp_write_multi(cp
, dw
, 13);
510 gen7_emit_3DSTATE_PS(const struct ilo_dev_info
*dev
,
511 const struct ilo_shader_state
*fs
,
512 int num_samplers
, bool dual_blend
,
515 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, 0x20);
516 const uint8_t cmd_len
= 8;
517 const struct ilo_shader_cso
*cso
;
518 uint32_t dw2
, dw4
, dw5
;
520 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
525 /* GPU hangs if none of the dispatch enable bits is set */
526 dw4
= GEN7_PS_DW4_8_PIXEL_DISPATCH
;
528 /* see brwCreateContext() */
531 max_threads
= (dev
->gt
== 3) ? 408 : (dev
->gt
== 2) ? 204 : 102;
532 dw4
|= (max_threads
- 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT
;
536 max_threads
= (dev
->gt
== 2) ? 172 : 48;
537 dw4
|= (max_threads
- 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT
;
541 ilo_cp_begin(cp
, cmd_len
);
542 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
546 ilo_cp_write(cp
, dw4
);
555 cso
= ilo_shader_get_kernel_cso(fs
);
556 dw2
= cso
->payload
[0];
557 dw4
= cso
->payload
[1];
558 dw5
= cso
->payload
[2];
560 dw2
|= (num_samplers
+ 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
563 dw4
|= GEN7_PS_DW4_DUAL_SOURCE_BLEND
;
565 ilo_cp_begin(cp
, cmd_len
);
566 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
567 ilo_cp_write(cp
, ilo_shader_get_kernel_offset(fs
));
568 ilo_cp_write(cp
, dw2
);
569 ilo_cp_write(cp
, 0); /* scratch */
570 ilo_cp_write(cp
, dw4
);
571 ilo_cp_write(cp
, dw5
);
572 ilo_cp_write(cp
, 0); /* kernel 1 */
573 ilo_cp_write(cp
, 0); /* kernel 2 */
578 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info
*dev
,
579 uint32_t sf_clip_viewport
,
582 gen7_emit_3dstate_pointer(dev
, 0x21, sf_clip_viewport
, cp
);
586 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info
*dev
,
587 uint32_t cc_viewport
,
590 gen7_emit_3dstate_pointer(dev
, 0x23, cc_viewport
, cp
);
594 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info
*dev
,
595 uint32_t blend_state
,
598 gen7_emit_3dstate_pointer(dev
, 0x24, blend_state
, cp
);
602 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info
*dev
,
603 uint32_t depth_stencil_state
,
606 gen7_emit_3dstate_pointer(dev
, 0x25, depth_stencil_state
, cp
);
610 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info
*dev
,
611 uint32_t binding_table
,
614 gen7_emit_3dstate_pointer(dev
, 0x26, binding_table
, cp
);
618 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info
*dev
,
619 uint32_t binding_table
,
622 gen7_emit_3dstate_pointer(dev
, 0x27, binding_table
, cp
);
626 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info
*dev
,
627 uint32_t binding_table
,
630 gen7_emit_3dstate_pointer(dev
, 0x28, binding_table
, cp
);
634 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info
*dev
,
635 uint32_t binding_table
,
638 gen7_emit_3dstate_pointer(dev
, 0x29, binding_table
, cp
);
642 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info
*dev
,
643 uint32_t binding_table
,
646 gen7_emit_3dstate_pointer(dev
, 0x2a, binding_table
, cp
);
650 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info
*dev
,
651 uint32_t sampler_state
,
654 gen7_emit_3dstate_pointer(dev
, 0x2b, sampler_state
, cp
);
658 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info
*dev
,
659 uint32_t sampler_state
,
662 gen7_emit_3dstate_pointer(dev
, 0x2c, sampler_state
, cp
);
666 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info
*dev
,
667 uint32_t sampler_state
,
670 gen7_emit_3dstate_pointer(dev
, 0x2d, sampler_state
, cp
);
674 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info
*dev
,
675 uint32_t sampler_state
,
678 gen7_emit_3dstate_pointer(dev
, 0x2e, sampler_state
, cp
);
682 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info
*dev
,
683 uint32_t sampler_state
,
686 gen7_emit_3dstate_pointer(dev
, 0x2f, sampler_state
, cp
);
690 gen7_emit_3dstate_urb(const struct ilo_dev_info
*dev
,
691 int subop
, int offset
, int size
,
695 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x0, subop
);
696 const uint8_t cmd_len
= 2;
697 const int row_size
= 64; /* 512 bits */
698 int alloc_size
, num_entries
, min_entries
, max_entries
;
700 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
702 /* VS, HS, DS, and GS variants */
703 assert(subop
>= 0x30 && subop
<= 0x33);
705 /* in multiples of 8KB */
706 assert(offset
% 8192 == 0);
709 /* in multiple of 512-bit rows */
710 alloc_size
= (entry_size
+ row_size
- 1) / row_size
;
715 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
717 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
718 * cause performance to decrease due to banking in the URB. Element
719 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
721 if (subop
== 0x30 && alloc_size
== 5)
724 /* in multiples of 8 */
725 num_entries
= (size
/ row_size
/ alloc_size
) & ~7;
728 case 0x30: /* 3DSTATE_URB_VS */
733 max_entries
= (dev
->gt
>= 2) ? 1644 : 640;
737 max_entries
= (dev
->gt
== 2) ? 704 : 512;
741 assert(num_entries
>= min_entries
);
742 if (num_entries
> max_entries
)
743 num_entries
= max_entries
;
745 case 0x31: /* 3DSTATE_URB_HS */
746 max_entries
= (dev
->gt
== 2) ? 64 : 32;
747 if (num_entries
> max_entries
)
748 num_entries
= max_entries
;
750 case 0x32: /* 3DSTATE_URB_DS */
752 assert(num_entries
>= 138);
754 case 0x33: /* 3DSTATE_URB_GS */
757 max_entries
= (dev
->gt
>= 2) ? 640 : 256;
761 max_entries
= (dev
->gt
== 2) ? 320 : 192;
765 if (num_entries
> max_entries
)
766 num_entries
= max_entries
;
772 ilo_cp_begin(cp
, cmd_len
);
773 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
774 ilo_cp_write(cp
, offset
<< GEN7_URB_ANY_DW1_OFFSET__SHIFT
|
775 (alloc_size
- 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT
|
781 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info
*dev
,
782 int offset
, int size
, int entry_size
,
785 gen7_emit_3dstate_urb(dev
, 0x30, offset
, size
, entry_size
, cp
);
789 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info
*dev
,
790 int offset
, int size
, int entry_size
,
793 gen7_emit_3dstate_urb(dev
, 0x31, offset
, size
, entry_size
, cp
);
797 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info
*dev
,
798 int offset
, int size
, int entry_size
,
801 gen7_emit_3dstate_urb(dev
, 0x32, offset
, size
, entry_size
, cp
);
805 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info
*dev
,
806 int offset
, int size
, int entry_size
,
809 gen7_emit_3dstate_urb(dev
, 0x33, offset
, size
, entry_size
, cp
);
813 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info
*dev
,
814 int subop
, int offset
, int size
,
817 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, subop
);
818 const uint8_t cmd_len
= 2;
821 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
823 /* VS, HS, DS, GS, and PS variants */
824 assert(subop
>= 0x12 && subop
<= 0x16);
827 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
829 * "(A table that says the maximum size of each constant buffer is
832 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
834 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
835 * may not exceed the maximum value of the Constant Buffer Size."
837 * Thus, the valid range of buffer end is [0KB, 16KB].
839 end
= (offset
+ size
) / 1024;
841 assert(!"invalid constant buffer end");
845 /* the valid range of buffer offset is [0KB, 15KB] */
846 offset
= (offset
+ 1023) / 1024;
848 assert(!"invalid constant buffer offset");
857 /* the valid range of buffer size is [0KB, 15KB] */
860 assert(!"invalid constant buffer size");
864 ilo_cp_begin(cp
, cmd_len
);
865 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
866 ilo_cp_write(cp
, offset
<< GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT
|
872 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info
*dev
,
873 int offset
, int size
,
876 gen7_emit_3dstate_push_constant_alloc(dev
, 0x12, offset
, size
, cp
);
880 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info
*dev
,
881 int offset
, int size
,
884 gen7_emit_3dstate_push_constant_alloc(dev
, 0x13, offset
, size
, cp
);
888 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info
*dev
,
889 int offset
, int size
,
892 gen7_emit_3dstate_push_constant_alloc(dev
, 0x14, offset
, size
, cp
);
896 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info
*dev
,
897 int offset
, int size
,
900 gen7_emit_3dstate_push_constant_alloc(dev
, 0x15, offset
, size
, cp
);
904 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info
*dev
,
905 int offset
, int size
,
908 gen7_emit_3dstate_push_constant_alloc(dev
, 0x16, offset
, size
, cp
);
912 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info
*dev
,
913 const struct pipe_stream_output_info
*so_info
,
916 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x17);
918 int buffer_selects
, num_entries
, i
;
919 uint16_t so_decls
[128];
921 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
927 int buffer_offsets
[PIPE_MAX_SO_BUFFERS
];
929 memset(buffer_offsets
, 0, sizeof(buffer_offsets
));
931 for (i
= 0; i
< so_info
->num_outputs
; i
++) {
932 unsigned decl
, buf
, reg
, mask
;
934 buf
= so_info
->output
[i
].output_buffer
;
937 assert(buffer_offsets
[buf
] <= so_info
->output
[i
].dst_offset
);
938 while (buffer_offsets
[buf
] < so_info
->output
[i
].dst_offset
) {
941 num_dwords
= so_info
->output
[i
].dst_offset
- buffer_offsets
[buf
];
945 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
946 GEN7_SO_DECL_HOLE_FLAG
|
947 ((1 << num_dwords
) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
949 so_decls
[num_entries
++] = decl
;
950 buffer_offsets
[buf
] += num_dwords
;
953 reg
= so_info
->output
[i
].register_index
;
954 mask
= ((1 << so_info
->output
[i
].num_components
) - 1) <<
955 so_info
->output
[i
].start_component
;
957 decl
= buf
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
958 reg
<< GEN7_SO_DECL_REG_INDEX__SHIFT
|
959 mask
<< GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
961 so_decls
[num_entries
++] = decl
;
962 buffer_selects
|= 1 << buf
;
963 buffer_offsets
[buf
] += so_info
->output
[i
].num_components
;
968 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
970 * "Errata: All 128 decls for all four streams must be included
971 * whenever this command is issued. The "Num Entries [n]" fields still
972 * contain the actual numbers of valid decls."
974 * Also note that "DWord Length" has 9 bits for this command, and the type
975 * of cmd_len is thus uint16_t.
977 cmd_len
= 2 * 128 + 3;
979 ilo_cp_begin(cp
, cmd_len
);
980 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
981 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT
|
982 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT
|
983 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT
|
984 buffer_selects
<< GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT
);
985 ilo_cp_write(cp
, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT
|
986 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT
|
987 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT
|
988 num_entries
<< GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT
);
990 for (i
= 0; i
< num_entries
; i
++) {
991 ilo_cp_write(cp
, so_decls
[i
]);
994 for (; i
< 128; i
++) {
1003 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info
*dev
,
1004 int index
, int base
, int stride
,
1005 const struct pipe_stream_output_target
*so_target
,
1008 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x1, 0x18);
1009 const uint8_t cmd_len
= 4;
1010 struct ilo_buffer
*buf
;
1013 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1015 if (!so_target
|| !so_target
->buffer
) {
1016 ilo_cp_begin(cp
, cmd_len
);
1017 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1018 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
);
1019 ilo_cp_write(cp
, 0);
1020 ilo_cp_write(cp
, 0);
1025 buf
= ilo_buffer(so_target
->buffer
);
1028 assert(stride
% 4 == 0 && base
% 4 == 0);
1029 assert(so_target
->buffer_offset
% 4 == 0);
1032 base
= (base
+ so_target
->buffer_offset
) & ~3;
1033 end
= (base
+ so_target
->buffer_size
) & ~3;
1035 ilo_cp_begin(cp
, cmd_len
);
1036 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1037 ilo_cp_write(cp
, index
<< GEN7_SO_BUF_DW1_INDEX__SHIFT
|
1039 ilo_cp_write_bo(cp
, base
, buf
->bo
, INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1040 ilo_cp_write_bo(cp
, end
, buf
->bo
, INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
1045 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info
*dev
,
1046 const struct pipe_draw_info
*info
,
1047 const struct ilo_ib_state
*ib
,
1051 const uint32_t cmd
= ILO_GPE_CMD(0x3, 0x3, 0x00);
1052 const uint8_t cmd_len
= 7;
1053 const int prim
= (rectlist
) ?
1054 GEN6_3DPRIM_RECTLIST
: ilo_gpe_gen6_translate_pipe_prim(info
->mode
);
1055 const int vb_access
= (info
->indexed
) ?
1056 GEN7_3DPRIM_DW1_ACCESS_RANDOM
:
1057 GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL
;
1058 const uint32_t vb_start
= info
->start
+
1059 ((info
->indexed
) ? ib
->draw_start_offset
: 0);
1061 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1063 ilo_cp_begin(cp
, cmd_len
);
1064 ilo_cp_write(cp
, cmd
| (cmd_len
- 2));
1065 ilo_cp_write(cp
, vb_access
| prim
);
1066 ilo_cp_write(cp
, info
->count
);
1067 ilo_cp_write(cp
, vb_start
);
1068 ilo_cp_write(cp
, info
->instance_count
);
1069 ilo_cp_write(cp
, info
->start_instance
);
1070 ilo_cp_write(cp
, info
->index_bias
);
1074 static inline uint32_t
1075 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info
*dev
,
1076 const struct ilo_viewport_cso
*viewports
,
1077 unsigned num_viewports
,
1080 const int state_align
= 64 / 4;
1081 const int state_len
= 16 * num_viewports
;
1082 uint32_t state_offset
, *dw
;
1085 ILO_GPE_VALID_GEN(dev
, 7, 7.5);
1088 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1090 * "The viewport-specific state used by both the SF and CL units
1091 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1092 * of which contains the DWords described below. The start of each
1093 * element is spaced 16 DWords apart. The location of first element of
1094 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1095 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1097 assert(num_viewports
&& num_viewports
<= 16);
1099 dw
= ilo_cp_steal_ptr(cp
, "SF_CLIP_VIEWPORT",
1100 state_len
, state_align
, &state_offset
);
1102 for (i
= 0; i
< num_viewports
; i
++) {
1103 const struct ilo_viewport_cso
*vp
= &viewports
[i
];
1105 dw
[0] = fui(vp
->m00
);
1106 dw
[1] = fui(vp
->m11
);
1107 dw
[2] = fui(vp
->m22
);
1108 dw
[3] = fui(vp
->m30
);
1109 dw
[4] = fui(vp
->m31
);
1110 dw
[5] = fui(vp
->m32
);
1113 dw
[8] = fui(vp
->min_gbx
);
1114 dw
[9] = fui(vp
->max_gbx
);
1115 dw
[10] = fui(vp
->min_gby
);
1116 dw
[11] = fui(vp
->max_gby
);
1125 return state_offset
;
1128 #endif /* ILO_GPE_GEN7_H */