ilo: remove GPE state size estimation
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN7_H
29 #define ILO_GPE_GEN7_H
30
31 #include "intel_winsys.h"
32
33 #include "ilo_common.h"
34 #include "ilo_cp.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen6.h"
38
39 static inline void
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
41 struct ilo_cp *cp)
42 {
43 assert(!"GPGPU_WALKER unsupported");
44 }
45
46 static inline void
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
48 uint32_t clear_val,
49 struct ilo_cp *cp)
50 {
51 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
52 const uint8_t cmd_len = 3;
53
54 ILO_GPE_VALID_GEN(dev, 7, 7.5);
55
56 ilo_cp_begin(cp, cmd_len);
57 ilo_cp_write(cp, cmd | (cmd_len - 2));
58 ilo_cp_write(cp, clear_val);
59 ilo_cp_write(cp, 1);
60 ilo_cp_end(cp);
61 }
62
63 static inline void
64 gen7_emit_3DSTATE_VF(const struct ilo_dev_info *dev,
65 bool enable_cut_index,
66 uint32_t cut_index,
67 struct ilo_cp *cp)
68 {
69 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0c);
70 const uint8_t cmd_len = 2;
71
72 ILO_GPE_VALID_GEN(dev, 7.5, 7.5);
73
74 ilo_cp_begin(cp, cmd_len);
75 ilo_cp_write(cp, cmd | (cmd_len - 2) |
76 ((enable_cut_index) ? GEN75_VF_DW0_CUT_INDEX_ENABLE : 0));
77 ilo_cp_write(cp, cut_index);
78 ilo_cp_end(cp);
79 }
80
81 static inline void
82 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
83 int subop, uint32_t pointer,
84 struct ilo_cp *cp)
85 {
86 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
87 const uint8_t cmd_len = 2;
88
89 ILO_GPE_VALID_GEN(dev, 7, 7.5);
90
91 ilo_cp_begin(cp, cmd_len);
92 ilo_cp_write(cp, cmd | (cmd_len - 2));
93 ilo_cp_write(cp, pointer);
94 ilo_cp_end(cp);
95 }
96
97 static inline void
98 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
99 uint32_t color_calc_state,
100 struct ilo_cp *cp)
101 {
102 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
103 }
104
105 static inline void
106 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
107 const struct ilo_shader_state *gs,
108 int num_samplers,
109 struct ilo_cp *cp)
110 {
111 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
112 const uint8_t cmd_len = 7;
113 const struct ilo_shader_cso *cso;
114 uint32_t dw2, dw4, dw5;
115
116 ILO_GPE_VALID_GEN(dev, 7, 7.5);
117
118 if (!gs) {
119 ilo_cp_begin(cp, cmd_len);
120 ilo_cp_write(cp, cmd | (cmd_len - 2));
121 ilo_cp_write(cp, 0);
122 ilo_cp_write(cp, 0);
123 ilo_cp_write(cp, 0);
124 ilo_cp_write(cp, 0);
125 ilo_cp_write(cp, GEN7_GS_DW5_STATISTICS);
126 ilo_cp_write(cp, 0);
127 ilo_cp_end(cp);
128 return;
129 }
130
131 cso = ilo_shader_get_kernel_cso(gs);
132 dw2 = cso->payload[0];
133 dw4 = cso->payload[1];
134 dw5 = cso->payload[2];
135
136 dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
137
138 ilo_cp_begin(cp, cmd_len);
139 ilo_cp_write(cp, cmd | (cmd_len - 2));
140 ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
141 ilo_cp_write(cp, dw2);
142 ilo_cp_write(cp, 0); /* scratch */
143 ilo_cp_write(cp, dw4);
144 ilo_cp_write(cp, dw5);
145 ilo_cp_write(cp, 0);
146 ilo_cp_end(cp);
147 }
148
149 static inline void
150 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
151 const struct ilo_rasterizer_state *rasterizer,
152 enum pipe_format zs_format,
153 struct ilo_cp *cp)
154 {
155 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
156 const uint8_t cmd_len = 7;
157 const int num_samples = 1;
158 uint32_t payload[6];
159
160 ILO_GPE_VALID_GEN(dev, 7, 7.5);
161
162 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
163 rasterizer, num_samples, zs_format,
164 payload, Elements(payload));
165
166 ilo_cp_begin(cp, cmd_len);
167 ilo_cp_write(cp, cmd | (cmd_len - 2));
168 ilo_cp_write_multi(cp, payload, 6);
169 ilo_cp_end(cp);
170 }
171
172 static inline void
173 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
174 const struct ilo_shader_state *fs,
175 const struct ilo_rasterizer_state *rasterizer,
176 bool cc_may_kill, uint32_t hiz_op,
177 struct ilo_cp *cp)
178 {
179 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
180 const uint8_t cmd_len = 3;
181 const int num_samples = 1;
182 uint32_t dw1, dw2;
183
184 ILO_GPE_VALID_GEN(dev, 7, 7.5);
185
186 /* see ilo_gpe_init_rasterizer_wm() */
187 if (rasterizer) {
188 dw1 = rasterizer->wm.payload[0];
189 dw2 = rasterizer->wm.payload[1];
190
191 assert(!hiz_op);
192 dw1 |= GEN7_WM_DW1_STATISTICS;
193 }
194 else {
195 dw1 = hiz_op;
196 dw2 = 0;
197 }
198
199 if (fs) {
200 const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
201
202 dw1 |= fs_cso->payload[3];
203 }
204
205 if (cc_may_kill)
206 dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL;
207
208 if (num_samples > 1) {
209 dw1 |= rasterizer->wm.dw_msaa_rast;
210 dw2 |= rasterizer->wm.dw_msaa_disp;
211 }
212
213 ilo_cp_begin(cp, cmd_len);
214 ilo_cp_write(cp, cmd | (cmd_len - 2));
215 ilo_cp_write(cp, dw1);
216 ilo_cp_write(cp, dw2);
217 ilo_cp_end(cp);
218 }
219
220 static inline void
221 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
222 int subop,
223 const uint32_t *bufs, const int *sizes,
224 int num_bufs,
225 struct ilo_cp *cp)
226 {
227 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
228 const uint8_t cmd_len = 7;
229 uint32_t dw[6];
230 int total_read_length, i;
231
232 ILO_GPE_VALID_GEN(dev, 7, 7.5);
233
234 /* VS, HS, DS, GS, and PS variants */
235 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
236
237 assert(num_bufs <= 4);
238
239 dw[0] = 0;
240 dw[1] = 0;
241
242 total_read_length = 0;
243 for (i = 0; i < 4; i++) {
244 int read_len;
245
246 /*
247 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
248 *
249 * "Constant buffers must be enabled in order from Constant Buffer 0
250 * to Constant Buffer 3 within this command. For example, it is
251 * not allowed to enable Constant Buffer 1 by programming a
252 * non-zero value in the VS Constant Buffer 1 Read Length without a
253 * non-zero value in VS Constant Buffer 0 Read Length."
254 */
255 if (i >= num_bufs || !sizes[i]) {
256 for (; i < 4; i++) {
257 assert(i >= num_bufs || !sizes[i]);
258 dw[2 + i] = 0;
259 }
260 break;
261 }
262
263 /* read lengths are in 256-bit units */
264 read_len = (sizes[i] + 31) / 32;
265 /* the lower 5 bits are used for memory object control state */
266 assert(bufs[i] % 32 == 0);
267
268 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
269 dw[2 + i] = bufs[i];
270
271 total_read_length += read_len;
272 }
273
274 /*
275 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
276 *
277 * "The sum of all four read length fields must be less than or equal
278 * to the size of 64"
279 */
280 assert(total_read_length <= 64);
281
282 ilo_cp_begin(cp, cmd_len);
283 ilo_cp_write(cp, cmd | (cmd_len - 2));
284 ilo_cp_write_multi(cp, dw, 6);
285 ilo_cp_end(cp);
286 }
287
288 static inline void
289 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
290 const uint32_t *bufs, const int *sizes,
291 int num_bufs,
292 struct ilo_cp *cp)
293 {
294 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
295 }
296
297 static inline void
298 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
299 const uint32_t *bufs, const int *sizes,
300 int num_bufs,
301 struct ilo_cp *cp)
302 {
303 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
304 }
305
306 static inline void
307 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
308 const uint32_t *bufs, const int *sizes,
309 int num_bufs,
310 struct ilo_cp *cp)
311 {
312 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
313 }
314
315 static inline void
316 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
317 unsigned sample_mask,
318 int num_samples,
319 struct ilo_cp *cp)
320 {
321 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
322 const uint8_t cmd_len = 2;
323 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
324
325 ILO_GPE_VALID_GEN(dev, 7, 7.5);
326
327 /*
328 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
329 *
330 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
331 * (Sample Mask) must be zero.
332 *
333 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
334 * must be zero."
335 */
336 sample_mask &= valid_mask;
337
338 ilo_cp_begin(cp, cmd_len);
339 ilo_cp_write(cp, cmd | (cmd_len - 2));
340 ilo_cp_write(cp, sample_mask);
341 ilo_cp_end(cp);
342 }
343
344 static inline void
345 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
346 const uint32_t *bufs, const int *sizes,
347 int num_bufs,
348 struct ilo_cp *cp)
349 {
350 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
351 }
352
353 static inline void
354 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
355 const uint32_t *bufs, const int *sizes,
356 int num_bufs,
357 struct ilo_cp *cp)
358 {
359 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
360 }
361
362 static inline void
363 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
364 const struct ilo_shader_state *hs,
365 int num_samplers,
366 struct ilo_cp *cp)
367 {
368 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
369 const uint8_t cmd_len = 7;
370
371 ILO_GPE_VALID_GEN(dev, 7, 7.5);
372
373 assert(!hs);
374
375 ilo_cp_begin(cp, cmd_len);
376 ilo_cp_write(cp, cmd | (cmd_len - 2));
377 ilo_cp_write(cp, 0);
378 ilo_cp_write(cp, 0);
379 ilo_cp_write(cp, 0);
380 ilo_cp_write(cp, 0);
381 ilo_cp_write(cp, 0);
382 ilo_cp_write(cp, 0);
383 ilo_cp_end(cp);
384 }
385
386 static inline void
387 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
388 struct ilo_cp *cp)
389 {
390 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
391 const uint8_t cmd_len = 4;
392
393 ILO_GPE_VALID_GEN(dev, 7, 7.5);
394
395 ilo_cp_begin(cp, cmd_len);
396 ilo_cp_write(cp, cmd | (cmd_len - 2));
397 ilo_cp_write(cp, 0);
398 ilo_cp_write(cp, 0);
399 ilo_cp_write(cp, 0);
400 ilo_cp_end(cp);
401 }
402
403 static inline void
404 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
405 const struct ilo_shader_state *ds,
406 int num_samplers,
407 struct ilo_cp *cp)
408 {
409 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
410 const uint8_t cmd_len = 6;
411
412 ILO_GPE_VALID_GEN(dev, 7, 7.5);
413
414 assert(!ds);
415
416 ilo_cp_begin(cp, cmd_len);
417 ilo_cp_write(cp, cmd | (cmd_len - 2));
418 ilo_cp_write(cp, 0);
419 ilo_cp_write(cp, 0);
420 ilo_cp_write(cp, 0);
421 ilo_cp_write(cp, 0);
422 ilo_cp_write(cp, 0);
423 ilo_cp_end(cp);
424
425 }
426
427 static inline void
428 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
429 unsigned buffer_mask,
430 int vertex_attrib_count,
431 bool rasterizer_discard,
432 struct ilo_cp *cp)
433 {
434 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
435 const uint8_t cmd_len = 3;
436 const bool enable = (buffer_mask != 0);
437 uint32_t dw1, dw2;
438 int read_len;
439
440 ILO_GPE_VALID_GEN(dev, 7, 7.5);
441
442 if (!enable) {
443 dw1 = 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
444 if (rasterizer_discard)
445 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
446
447 dw2 = 0;
448
449 ilo_cp_begin(cp, cmd_len);
450 ilo_cp_write(cp, cmd | (cmd_len - 2));
451 ilo_cp_write(cp, dw1);
452 ilo_cp_write(cp, dw2);
453 ilo_cp_end(cp);
454 return;
455 }
456
457 read_len = (vertex_attrib_count + 1) / 2;
458 if (!read_len)
459 read_len = 1;
460
461 dw1 = GEN7_SO_DW1_SO_ENABLE |
462 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
463 GEN7_SO_DW1_STATISTICS |
464 buffer_mask << 8;
465
466 if (rasterizer_discard)
467 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
468
469 /* API_OPENGL */
470 if (true)
471 dw1 |= GEN7_SO_DW1_REORDER_TRAILING;
472
473 dw2 = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
474 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
475 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
476 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
477 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
478 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
479 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
480 (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
481
482 ilo_cp_begin(cp, cmd_len);
483 ilo_cp_write(cp, cmd | (cmd_len - 2));
484 ilo_cp_write(cp, dw1);
485 ilo_cp_write(cp, dw2);
486 ilo_cp_end(cp);
487 }
488
489 static inline void
490 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
491 const struct ilo_rasterizer_state *rasterizer,
492 const struct ilo_shader_state *fs,
493 struct ilo_cp *cp)
494 {
495 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
496 const uint8_t cmd_len = 14;
497 uint32_t dw[13];
498
499 ILO_GPE_VALID_GEN(dev, 7, 7.5);
500
501 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer, fs, dw, Elements(dw));
502
503 ilo_cp_begin(cp, cmd_len);
504 ilo_cp_write(cp, cmd | (cmd_len - 2));
505 ilo_cp_write_multi(cp, dw, 13);
506 ilo_cp_end(cp);
507 }
508
509 static inline void
510 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
511 const struct ilo_shader_state *fs,
512 int num_samplers, bool dual_blend,
513 struct ilo_cp *cp)
514 {
515 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
516 const uint8_t cmd_len = 8;
517 const struct ilo_shader_cso *cso;
518 uint32_t dw2, dw4, dw5;
519
520 ILO_GPE_VALID_GEN(dev, 7, 7.5);
521
522 if (!fs) {
523 int max_threads;
524
525 /* GPU hangs if none of the dispatch enable bits is set */
526 dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH;
527
528 /* see brwCreateContext() */
529 switch (dev->gen) {
530 case ILO_GEN(7.5):
531 max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
532 dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
533 break;
534 case ILO_GEN(7):
535 default:
536 max_threads = (dev->gt == 2) ? 172 : 48;
537 dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
538 break;
539 }
540
541 ilo_cp_begin(cp, cmd_len);
542 ilo_cp_write(cp, cmd | (cmd_len - 2));
543 ilo_cp_write(cp, 0);
544 ilo_cp_write(cp, 0);
545 ilo_cp_write(cp, 0);
546 ilo_cp_write(cp, dw4);
547 ilo_cp_write(cp, 0);
548 ilo_cp_write(cp, 0);
549 ilo_cp_write(cp, 0);
550 ilo_cp_end(cp);
551
552 return;
553 }
554
555 cso = ilo_shader_get_kernel_cso(fs);
556 dw2 = cso->payload[0];
557 dw4 = cso->payload[1];
558 dw5 = cso->payload[2];
559
560 dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
561
562 if (dual_blend)
563 dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
564
565 ilo_cp_begin(cp, cmd_len);
566 ilo_cp_write(cp, cmd | (cmd_len - 2));
567 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
568 ilo_cp_write(cp, dw2);
569 ilo_cp_write(cp, 0); /* scratch */
570 ilo_cp_write(cp, dw4);
571 ilo_cp_write(cp, dw5);
572 ilo_cp_write(cp, 0); /* kernel 1 */
573 ilo_cp_write(cp, 0); /* kernel 2 */
574 ilo_cp_end(cp);
575 }
576
577 static inline void
578 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
579 uint32_t sf_clip_viewport,
580 struct ilo_cp *cp)
581 {
582 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
583 }
584
585 static inline void
586 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
587 uint32_t cc_viewport,
588 struct ilo_cp *cp)
589 {
590 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
591 }
592
593 static inline void
594 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
595 uint32_t blend_state,
596 struct ilo_cp *cp)
597 {
598 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
599 }
600
601 static inline void
602 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
603 uint32_t depth_stencil_state,
604 struct ilo_cp *cp)
605 {
606 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
607 }
608
609 static inline void
610 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
611 uint32_t binding_table,
612 struct ilo_cp *cp)
613 {
614 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
615 }
616
617 static inline void
618 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
619 uint32_t binding_table,
620 struct ilo_cp *cp)
621 {
622 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
623 }
624
625 static inline void
626 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
627 uint32_t binding_table,
628 struct ilo_cp *cp)
629 {
630 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
631 }
632
633 static inline void
634 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
635 uint32_t binding_table,
636 struct ilo_cp *cp)
637 {
638 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
639 }
640
641 static inline void
642 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
643 uint32_t binding_table,
644 struct ilo_cp *cp)
645 {
646 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
647 }
648
649 static inline void
650 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
651 uint32_t sampler_state,
652 struct ilo_cp *cp)
653 {
654 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
655 }
656
657 static inline void
658 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
659 uint32_t sampler_state,
660 struct ilo_cp *cp)
661 {
662 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
663 }
664
665 static inline void
666 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
667 uint32_t sampler_state,
668 struct ilo_cp *cp)
669 {
670 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
671 }
672
673 static inline void
674 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
675 uint32_t sampler_state,
676 struct ilo_cp *cp)
677 {
678 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
679 }
680
681 static inline void
682 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
683 uint32_t sampler_state,
684 struct ilo_cp *cp)
685 {
686 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
687 }
688
689 static inline void
690 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
691 int subop, int offset, int size,
692 int entry_size,
693 struct ilo_cp *cp)
694 {
695 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
696 const uint8_t cmd_len = 2;
697 const int row_size = 64; /* 512 bits */
698 int alloc_size, num_entries, min_entries, max_entries;
699
700 ILO_GPE_VALID_GEN(dev, 7, 7.5);
701
702 /* VS, HS, DS, and GS variants */
703 assert(subop >= 0x30 && subop <= 0x33);
704
705 /* in multiples of 8KB */
706 assert(offset % 8192 == 0);
707 offset /= 8192;
708
709 /* in multiple of 512-bit rows */
710 alloc_size = (entry_size + row_size - 1) / row_size;
711 if (!alloc_size)
712 alloc_size = 1;
713
714 /*
715 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
716 *
717 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
718 * cause performance to decrease due to banking in the URB. Element
719 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
720 */
721 if (subop == 0x30 && alloc_size == 5)
722 alloc_size = 6;
723
724 /* in multiples of 8 */
725 num_entries = (size / row_size / alloc_size) & ~7;
726
727 switch (subop) {
728 case 0x30: /* 3DSTATE_URB_VS */
729 min_entries = 32;
730
731 switch (dev->gen) {
732 case ILO_GEN(7.5):
733 max_entries = (dev->gt >= 2) ? 1644 : 640;
734 break;
735 case ILO_GEN(7):
736 default:
737 max_entries = (dev->gt == 2) ? 704 : 512;
738 break;
739 }
740
741 assert(num_entries >= min_entries);
742 if (num_entries > max_entries)
743 num_entries = max_entries;
744 break;
745 case 0x31: /* 3DSTATE_URB_HS */
746 max_entries = (dev->gt == 2) ? 64 : 32;
747 if (num_entries > max_entries)
748 num_entries = max_entries;
749 break;
750 case 0x32: /* 3DSTATE_URB_DS */
751 if (num_entries)
752 assert(num_entries >= 138);
753 break;
754 case 0x33: /* 3DSTATE_URB_GS */
755 switch (dev->gen) {
756 case ILO_GEN(7.5):
757 max_entries = (dev->gt >= 2) ? 640 : 256;
758 break;
759 case ILO_GEN(7):
760 default:
761 max_entries = (dev->gt == 2) ? 320 : 192;
762 break;
763 }
764
765 if (num_entries > max_entries)
766 num_entries = max_entries;
767 break;
768 default:
769 break;
770 }
771
772 ilo_cp_begin(cp, cmd_len);
773 ilo_cp_write(cp, cmd | (cmd_len - 2));
774 ilo_cp_write(cp, offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
775 (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
776 num_entries);
777 ilo_cp_end(cp);
778 }
779
780 static inline void
781 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
782 int offset, int size, int entry_size,
783 struct ilo_cp *cp)
784 {
785 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
786 }
787
788 static inline void
789 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
790 int offset, int size, int entry_size,
791 struct ilo_cp *cp)
792 {
793 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
794 }
795
796 static inline void
797 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
798 int offset, int size, int entry_size,
799 struct ilo_cp *cp)
800 {
801 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
802 }
803
804 static inline void
805 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
806 int offset, int size, int entry_size,
807 struct ilo_cp *cp)
808 {
809 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
810 }
811
812 static inline void
813 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
814 int subop, int offset, int size,
815 struct ilo_cp *cp)
816 {
817 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
818 const uint8_t cmd_len = 2;
819 int end;
820
821 ILO_GPE_VALID_GEN(dev, 7, 7.5);
822
823 /* VS, HS, DS, GS, and PS variants */
824 assert(subop >= 0x12 && subop <= 0x16);
825
826 /*
827 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
828 *
829 * "(A table that says the maximum size of each constant buffer is
830 * 16KB")
831 *
832 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
833 *
834 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
835 * may not exceed the maximum value of the Constant Buffer Size."
836 *
837 * Thus, the valid range of buffer end is [0KB, 16KB].
838 */
839 end = (offset + size) / 1024;
840 if (end > 16) {
841 assert(!"invalid constant buffer end");
842 end = 16;
843 }
844
845 /* the valid range of buffer offset is [0KB, 15KB] */
846 offset = (offset + 1023) / 1024;
847 if (offset > 15) {
848 assert(!"invalid constant buffer offset");
849 offset = 15;
850 }
851
852 if (offset > end) {
853 assert(!size);
854 offset = end;
855 }
856
857 /* the valid range of buffer size is [0KB, 15KB] */
858 size = end - offset;
859 if (size > 15) {
860 assert(!"invalid constant buffer size");
861 size = 15;
862 }
863
864 ilo_cp_begin(cp, cmd_len);
865 ilo_cp_write(cp, cmd | (cmd_len - 2));
866 ilo_cp_write(cp, offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
867 size);
868 ilo_cp_end(cp);
869 }
870
871 static inline void
872 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
873 int offset, int size,
874 struct ilo_cp *cp)
875 {
876 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
877 }
878
879 static inline void
880 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
881 int offset, int size,
882 struct ilo_cp *cp)
883 {
884 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
885 }
886
887 static inline void
888 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
889 int offset, int size,
890 struct ilo_cp *cp)
891 {
892 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
893 }
894
895 static inline void
896 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
897 int offset, int size,
898 struct ilo_cp *cp)
899 {
900 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
901 }
902
903 static inline void
904 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
905 int offset, int size,
906 struct ilo_cp *cp)
907 {
908 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
909 }
910
911 static inline void
912 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
913 const struct pipe_stream_output_info *so_info,
914 struct ilo_cp *cp)
915 {
916 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
917 uint16_t cmd_len;
918 int buffer_selects, num_entries, i;
919 uint16_t so_decls[128];
920
921 ILO_GPE_VALID_GEN(dev, 7, 7.5);
922
923 buffer_selects = 0;
924 num_entries = 0;
925
926 if (so_info) {
927 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
928
929 memset(buffer_offsets, 0, sizeof(buffer_offsets));
930
931 for (i = 0; i < so_info->num_outputs; i++) {
932 unsigned decl, buf, reg, mask;
933
934 buf = so_info->output[i].output_buffer;
935
936 /* pad with holes */
937 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
938 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
939 int num_dwords;
940
941 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
942 if (num_dwords > 4)
943 num_dwords = 4;
944
945 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
946 GEN7_SO_DECL_HOLE_FLAG |
947 ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
948
949 so_decls[num_entries++] = decl;
950 buffer_offsets[buf] += num_dwords;
951 }
952
953 reg = so_info->output[i].register_index;
954 mask = ((1 << so_info->output[i].num_components) - 1) <<
955 so_info->output[i].start_component;
956
957 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
958 reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
959 mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
960
961 so_decls[num_entries++] = decl;
962 buffer_selects |= 1 << buf;
963 buffer_offsets[buf] += so_info->output[i].num_components;
964 }
965 }
966
967 /*
968 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
969 *
970 * "Errata: All 128 decls for all four streams must be included
971 * whenever this command is issued. The "Num Entries [n]" fields still
972 * contain the actual numbers of valid decls."
973 *
974 * Also note that "DWord Length" has 9 bits for this command, and the type
975 * of cmd_len is thus uint16_t.
976 */
977 cmd_len = 2 * 128 + 3;
978
979 ilo_cp_begin(cp, cmd_len);
980 ilo_cp_write(cp, cmd | (cmd_len - 2));
981 ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
982 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
983 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
984 buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT);
985 ilo_cp_write(cp, 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
986 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
987 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
988 num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT);
989
990 for (i = 0; i < num_entries; i++) {
991 ilo_cp_write(cp, so_decls[i]);
992 ilo_cp_write(cp, 0);
993 }
994 for (; i < 128; i++) {
995 ilo_cp_write(cp, 0);
996 ilo_cp_write(cp, 0);
997 }
998
999 ilo_cp_end(cp);
1000 }
1001
1002 static inline void
1003 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1004 int index, int base, int stride,
1005 const struct pipe_stream_output_target *so_target,
1006 struct ilo_cp *cp)
1007 {
1008 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1009 const uint8_t cmd_len = 4;
1010 struct ilo_buffer *buf;
1011 int end;
1012
1013 ILO_GPE_VALID_GEN(dev, 7, 7.5);
1014
1015 if (!so_target || !so_target->buffer) {
1016 ilo_cp_begin(cp, cmd_len);
1017 ilo_cp_write(cp, cmd | (cmd_len - 2));
1018 ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT);
1019 ilo_cp_write(cp, 0);
1020 ilo_cp_write(cp, 0);
1021 ilo_cp_end(cp);
1022 return;
1023 }
1024
1025 buf = ilo_buffer(so_target->buffer);
1026
1027 /* DWord-aligned */
1028 assert(stride % 4 == 0 && base % 4 == 0);
1029 assert(so_target->buffer_offset % 4 == 0);
1030
1031 stride &= ~3;
1032 base = (base + so_target->buffer_offset) & ~3;
1033 end = (base + so_target->buffer_size) & ~3;
1034
1035 ilo_cp_begin(cp, cmd_len);
1036 ilo_cp_write(cp, cmd | (cmd_len - 2));
1037 ilo_cp_write(cp, index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
1038 stride);
1039 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1040 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1041 ilo_cp_end(cp);
1042 }
1043
1044 static inline void
1045 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1046 const struct pipe_draw_info *info,
1047 const struct ilo_ib_state *ib,
1048 bool rectlist,
1049 struct ilo_cp *cp)
1050 {
1051 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1052 const uint8_t cmd_len = 7;
1053 const int prim = (rectlist) ?
1054 GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1055 const int vb_access = (info->indexed) ?
1056 GEN7_3DPRIM_DW1_ACCESS_RANDOM :
1057 GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL;
1058 const uint32_t vb_start = info->start +
1059 ((info->indexed) ? ib->draw_start_offset : 0);
1060
1061 ILO_GPE_VALID_GEN(dev, 7, 7.5);
1062
1063 ilo_cp_begin(cp, cmd_len);
1064 ilo_cp_write(cp, cmd | (cmd_len - 2));
1065 ilo_cp_write(cp, vb_access | prim);
1066 ilo_cp_write(cp, info->count);
1067 ilo_cp_write(cp, vb_start);
1068 ilo_cp_write(cp, info->instance_count);
1069 ilo_cp_write(cp, info->start_instance);
1070 ilo_cp_write(cp, info->index_bias);
1071 ilo_cp_end(cp);
1072 }
1073
1074 static inline uint32_t
1075 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1076 const struct ilo_viewport_cso *viewports,
1077 unsigned num_viewports,
1078 struct ilo_cp *cp)
1079 {
1080 const int state_align = 64 / 4;
1081 const int state_len = 16 * num_viewports;
1082 uint32_t state_offset, *dw;
1083 unsigned i;
1084
1085 ILO_GPE_VALID_GEN(dev, 7, 7.5);
1086
1087 /*
1088 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1089 *
1090 * "The viewport-specific state used by both the SF and CL units
1091 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1092 * of which contains the DWords described below. The start of each
1093 * element is spaced 16 DWords apart. The location of first element of
1094 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1095 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1096 */
1097 assert(num_viewports && num_viewports <= 16);
1098
1099 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1100 state_len, state_align, &state_offset);
1101
1102 for (i = 0; i < num_viewports; i++) {
1103 const struct ilo_viewport_cso *vp = &viewports[i];
1104
1105 dw[0] = fui(vp->m00);
1106 dw[1] = fui(vp->m11);
1107 dw[2] = fui(vp->m22);
1108 dw[3] = fui(vp->m30);
1109 dw[4] = fui(vp->m31);
1110 dw[5] = fui(vp->m32);
1111 dw[6] = 0;
1112 dw[7] = 0;
1113 dw[8] = fui(vp->min_gbx);
1114 dw[9] = fui(vp->max_gbx);
1115 dw[10] = fui(vp->min_gby);
1116 dw[11] = fui(vp->max_gby);
1117 dw[12] = 0;
1118 dw[13] = 0;
1119 dw[14] = 0;
1120 dw[15] = 0;
1121
1122 dw += 16;
1123 }
1124
1125 return state_offset;
1126 }
1127
1128 #endif /* ILO_GPE_GEN7_H */