79e01d7968f91089374cc017a46a301ba3d8df1e
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_resource.h"
29 #include "brw_defines.h"
30 #include "intel_reg.h"
31
32 #include "shader/ilo_shader_internal.h"
33 #include "ilo_cp.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen7.h"
38
39 static void
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
41 struct ilo_cp *cp)
42 {
43 assert(!"GPGPU_WALKER unsupported");
44 }
45
46 static void
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
48 uint32_t clear_val,
49 struct ilo_cp *cp)
50 {
51 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
52 const uint8_t cmd_len = 3;
53
54 ILO_GPE_VALID_GEN(dev, 7, 7);
55
56 ilo_cp_begin(cp, cmd_len);
57 ilo_cp_write(cp, cmd | (cmd_len - 2));
58 ilo_cp_write(cp, clear_val);
59 ilo_cp_write(cp, 1);
60 ilo_cp_end(cp);
61 }
62
63 static void
64 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
65 int subop, uint32_t pointer,
66 struct ilo_cp *cp)
67 {
68 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
69 const uint8_t cmd_len = 2;
70
71 ILO_GPE_VALID_GEN(dev, 7, 7);
72
73 ilo_cp_begin(cp, cmd_len);
74 ilo_cp_write(cp, cmd | (cmd_len - 2));
75 ilo_cp_write(cp, pointer);
76 ilo_cp_end(cp);
77 }
78
79 static void
80 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
81 uint32_t color_calc_state,
82 struct ilo_cp *cp)
83 {
84 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
85 }
86
87 void
88 ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
89 const struct ilo_shader_state *gs,
90 struct ilo_shader_cso *cso)
91 {
92 int start_grf, vue_read_len, max_threads;
93 uint32_t dw2, dw4, dw5;
94
95 ILO_GPE_VALID_GEN(dev, 7, 7);
96
97 start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
98 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
99
100 /* in pairs */
101 vue_read_len = (vue_read_len + 1) / 2;
102
103 switch (dev->gen) {
104 case ILO_GEN(7):
105 max_threads = (dev->gt == 2) ? 128 : 36;
106 break;
107 default:
108 max_threads = 1;
109 break;
110 }
111
112 dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
113
114 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
115 GEN7_GS_INCLUDE_VERTEX_HANDLES |
116 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
117 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
118
119 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
120 GEN6_GS_STATISTICS_ENABLE |
121 GEN6_GS_ENABLE;
122
123 STATIC_ASSERT(Elements(cso->payload) >= 3);
124 cso->payload[0] = dw2;
125 cso->payload[1] = dw4;
126 cso->payload[2] = dw5;
127 }
128
129 static void
130 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
131 const struct ilo_shader_state *gs,
132 int num_samplers,
133 struct ilo_cp *cp)
134 {
135 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
136 const uint8_t cmd_len = 7;
137 const struct ilo_shader_cso *cso;
138 uint32_t dw2, dw4, dw5;
139
140 ILO_GPE_VALID_GEN(dev, 7, 7);
141
142 if (!gs) {
143 ilo_cp_begin(cp, cmd_len);
144 ilo_cp_write(cp, cmd | (cmd_len - 2));
145 ilo_cp_write(cp, 0);
146 ilo_cp_write(cp, 0);
147 ilo_cp_write(cp, 0);
148 ilo_cp_write(cp, 0);
149 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
150 ilo_cp_write(cp, 0);
151 ilo_cp_end(cp);
152 return;
153 }
154
155 cso = ilo_shader_get_kernel_cso(gs);
156 dw2 = cso->payload[0];
157 dw4 = cso->payload[1];
158 dw5 = cso->payload[2];
159
160 dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
161
162 ilo_cp_begin(cp, cmd_len);
163 ilo_cp_write(cp, cmd | (cmd_len - 2));
164 ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
165 ilo_cp_write(cp, dw2);
166 ilo_cp_write(cp, 0); /* scratch */
167 ilo_cp_write(cp, dw4);
168 ilo_cp_write(cp, dw5);
169 ilo_cp_write(cp, 0);
170 ilo_cp_end(cp);
171 }
172
173 static void
174 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
175 const struct ilo_rasterizer_state *rasterizer,
176 const struct pipe_surface *zs_surf,
177 struct ilo_cp *cp)
178 {
179 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
180 const uint8_t cmd_len = 7;
181 const int num_samples = 1;
182 uint32_t payload[6];
183
184 ILO_GPE_VALID_GEN(dev, 7, 7);
185
186 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
187 &rasterizer->sf, num_samples,
188 (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
189 payload, Elements(payload));
190
191 ilo_cp_begin(cp, cmd_len);
192 ilo_cp_write(cp, cmd | (cmd_len - 2));
193 ilo_cp_write_multi(cp, payload, 6);
194 ilo_cp_end(cp);
195 }
196
197 void
198 ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
199 const struct pipe_rasterizer_state *state,
200 struct ilo_rasterizer_wm *wm)
201 {
202 uint32_t dw1, dw2;
203
204 ILO_GPE_VALID_GEN(dev, 7, 7);
205
206 dw1 = GEN7_WM_POSITION_ZW_PIXEL |
207 GEN7_WM_LINE_AA_WIDTH_2_0 |
208 GEN7_WM_MSRAST_OFF_PIXEL;
209
210 /* same value as in 3DSTATE_SF */
211 if (state->line_smooth)
212 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
213
214 if (state->poly_stipple_enable)
215 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
216 if (state->line_stipple_enable)
217 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
218
219 if (state->bottom_edge_rule)
220 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
221
222 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
223
224 /*
225 * assertion that makes sure
226 *
227 * dw1 |= wm->dw_msaa_rast;
228 * dw2 |= wm->dw_msaa_disp;
229 *
230 * is valid
231 */
232 STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
233 GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
234
235 wm->dw_msaa_rast =
236 (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
237 wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
238
239 STATIC_ASSERT(Elements(wm->payload) >= 2);
240 wm->payload[0] = dw1;
241 wm->payload[1] = dw2;
242 }
243
244 static void
245 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
246 const struct ilo_shader *fs,
247 const struct ilo_rasterizer_state *rasterizer,
248 bool cc_may_kill,
249 struct ilo_cp *cp)
250 {
251 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
252 const uint8_t cmd_len = 3;
253 const int num_samples = 1;
254 uint32_t dw1, dw2;
255
256 ILO_GPE_VALID_GEN(dev, 7, 7);
257
258 /* see ilo_gpe_init_rasterizer_wm() */
259 dw1 = rasterizer->wm.payload[0];
260 dw2 = rasterizer->wm.payload[1];
261
262 dw1 |= GEN7_WM_STATISTICS_ENABLE;
263
264 if (false) {
265 dw1 |= GEN7_WM_DEPTH_CLEAR;
266 dw1 |= GEN7_WM_DEPTH_RESOLVE;
267 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
268 }
269
270 if (fs) {
271 /*
272 * Set this bit if
273 *
274 * a) fs writes colors and color is not masked, or
275 * b) fs writes depth, or
276 * c) fs or cc kills
277 */
278 dw1 |= GEN7_WM_DISPATCH_ENABLE;
279
280 /*
281 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
282 *
283 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
284 * the PS kernel or color calculator has the ability to kill
285 * (discard) pixels or samples, other than due to depth or stencil
286 * testing. This bit is required to be ENABLED in the following
287 * situations:
288 *
289 * - The API pixel shader program contains "killpix" or "discard"
290 * instructions, or other code in the pixel shader kernel that
291 * can cause the final pixel mask to differ from the pixel mask
292 * received on dispatch.
293 *
294 * - A sampler with chroma key enabled with kill pixel mode is used
295 * by the pixel shader.
296 *
297 * - Any render target has Alpha Test Enable or AlphaToCoverage
298 * Enable enabled.
299 *
300 * - The pixel shader kernel generates and outputs oMask.
301 *
302 * Note: As ClipDistance clipping is fully supported in hardware
303 * and therefore not via PS instructions, there should be no need
304 * to ENABLE this bit due to ClipDistance clipping."
305 */
306 if (fs->has_kill || cc_may_kill)
307 dw1 |= GEN7_WM_KILL_ENABLE;
308
309 if (fs->out.has_pos)
310 dw1 |= GEN7_WM_PSCDEPTH_ON;
311 if (fs->in.has_pos)
312 dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
313
314 dw1 |= fs->in.barycentric_interpolation_mode <<
315 GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
316 }
317 else if (cc_may_kill) {
318 dw1 |= GEN7_WM_DISPATCH_ENABLE |
319 GEN7_WM_KILL_ENABLE;
320 }
321
322 if (num_samples > 1) {
323 dw1 |= rasterizer->wm.dw_msaa_rast;
324 dw2 |= rasterizer->wm.dw_msaa_disp;
325 }
326
327 ilo_cp_begin(cp, cmd_len);
328 ilo_cp_write(cp, cmd | (cmd_len - 2));
329 ilo_cp_write(cp, dw1);
330 ilo_cp_write(cp, dw2);
331 ilo_cp_end(cp);
332 }
333
334 static void
335 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
336 int subop,
337 const uint32_t *bufs, const int *sizes,
338 int num_bufs,
339 struct ilo_cp *cp)
340 {
341 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
342 const uint8_t cmd_len = 7;
343 uint32_t dw[6];
344 int total_read_length, i;
345
346 ILO_GPE_VALID_GEN(dev, 7, 7);
347
348 /* VS, HS, DS, GS, and PS variants */
349 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
350
351 assert(num_bufs <= 4);
352
353 dw[0] = 0;
354 dw[1] = 0;
355
356 total_read_length = 0;
357 for (i = 0; i < 4; i++) {
358 int read_len;
359
360 /*
361 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
362 *
363 * "Constant buffers must be enabled in order from Constant Buffer 0
364 * to Constant Buffer 3 within this command. For example, it is
365 * not allowed to enable Constant Buffer 1 by programming a
366 * non-zero value in the VS Constant Buffer 1 Read Length without a
367 * non-zero value in VS Constant Buffer 0 Read Length."
368 */
369 if (i >= num_bufs || !sizes[i]) {
370 for (; i < 4; i++) {
371 assert(i >= num_bufs || !sizes[i]);
372 dw[2 + i] = 0;
373 }
374 break;
375 }
376
377 /* read lengths are in 256-bit units */
378 read_len = (sizes[i] + 31) / 32;
379 /* the lower 5 bits are used for memory object control state */
380 assert(bufs[i] % 32 == 0);
381
382 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
383 dw[2 + i] = bufs[i];
384
385 total_read_length += read_len;
386 }
387
388 /*
389 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
390 *
391 * "The sum of all four read length fields must be less than or equal
392 * to the size of 64"
393 */
394 assert(total_read_length <= 64);
395
396 ilo_cp_begin(cp, cmd_len);
397 ilo_cp_write(cp, cmd | (cmd_len - 2));
398 ilo_cp_write_multi(cp, dw, 6);
399 ilo_cp_end(cp);
400 }
401
402 static void
403 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
404 const uint32_t *bufs, const int *sizes,
405 int num_bufs,
406 struct ilo_cp *cp)
407 {
408 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
409 }
410
411 static void
412 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
413 const uint32_t *bufs, const int *sizes,
414 int num_bufs,
415 struct ilo_cp *cp)
416 {
417 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
418 }
419
420 static void
421 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
422 const uint32_t *bufs, const int *sizes,
423 int num_bufs,
424 struct ilo_cp *cp)
425 {
426 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
427 }
428
429 static void
430 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
431 unsigned sample_mask,
432 int num_samples,
433 struct ilo_cp *cp)
434 {
435 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
436 const uint8_t cmd_len = 2;
437 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
438
439 ILO_GPE_VALID_GEN(dev, 7, 7);
440
441 /*
442 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
443 *
444 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
445 * (Sample Mask) must be zero.
446 *
447 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
448 * must be zero."
449 */
450 sample_mask &= valid_mask;
451
452 ilo_cp_begin(cp, cmd_len);
453 ilo_cp_write(cp, cmd | (cmd_len - 2));
454 ilo_cp_write(cp, sample_mask);
455 ilo_cp_end(cp);
456 }
457
458 static void
459 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
460 const uint32_t *bufs, const int *sizes,
461 int num_bufs,
462 struct ilo_cp *cp)
463 {
464 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
465 }
466
467 static void
468 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
469 const uint32_t *bufs, const int *sizes,
470 int num_bufs,
471 struct ilo_cp *cp)
472 {
473 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
474 }
475
476 static void
477 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
478 const struct ilo_shader *hs,
479 int max_threads, int num_samplers,
480 struct ilo_cp *cp)
481 {
482 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
483 const uint8_t cmd_len = 7;
484 uint32_t dw1, dw2, dw5;
485
486 ILO_GPE_VALID_GEN(dev, 7, 7);
487
488 if (!hs) {
489 ilo_cp_begin(cp, cmd_len);
490 ilo_cp_write(cp, cmd | (cmd_len - 2));
491 ilo_cp_write(cp, 0);
492 ilo_cp_write(cp, 0);
493 ilo_cp_write(cp, 0);
494 ilo_cp_write(cp, 0);
495 ilo_cp_write(cp, 0);
496 ilo_cp_write(cp, 0);
497 ilo_cp_end(cp);
498
499 return;
500 }
501
502 dw1 = (num_samplers + 3) / 4 << 27 |
503 0 << 18 |
504 (max_threads - 1);
505 if (false)
506 dw1 |= 1 << 16;
507
508 dw2 = 1 << 31 | /* HS Enable */
509 1 << 29 | /* HS Statistics Enable */
510 0; /* Instance Count */
511
512 dw5 = hs->in.start_grf << 19 |
513 0 << 11 |
514 0 << 4;
515
516 ilo_cp_begin(cp, cmd_len);
517 ilo_cp_write(cp, cmd | (cmd_len - 2));
518 ilo_cp_write(cp, dw1);
519 ilo_cp_write(cp, dw2);
520 ilo_cp_write(cp, hs->cache_offset);
521 ilo_cp_write(cp, 0);
522 ilo_cp_write(cp, dw5);
523 ilo_cp_write(cp, 0);
524 ilo_cp_end(cp);
525 }
526
527 static void
528 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
529 struct ilo_cp *cp)
530 {
531 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
532 const uint8_t cmd_len = 4;
533
534 ILO_GPE_VALID_GEN(dev, 7, 7);
535
536 ilo_cp_begin(cp, cmd_len);
537 ilo_cp_write(cp, cmd | (cmd_len - 2));
538 ilo_cp_write(cp, 0);
539 ilo_cp_write(cp, 0);
540 ilo_cp_write(cp, 0);
541 ilo_cp_end(cp);
542 }
543
544 static void
545 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
546 const struct ilo_shader *ds,
547 int max_threads, int num_samplers,
548 struct ilo_cp *cp)
549 {
550 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
551 const uint8_t cmd_len = 6;
552 uint32_t dw2, dw4, dw5;
553
554 ILO_GPE_VALID_GEN(dev, 7, 7);
555
556 if (!ds) {
557 ilo_cp_begin(cp, cmd_len);
558 ilo_cp_write(cp, cmd | (cmd_len - 2));
559 ilo_cp_write(cp, 0);
560 ilo_cp_write(cp, 0);
561 ilo_cp_write(cp, 0);
562 ilo_cp_write(cp, 0);
563 ilo_cp_write(cp, 0);
564 ilo_cp_end(cp);
565
566 return;
567 }
568
569 dw2 = (num_samplers + 3) / 4 << 27 |
570 0 << 18 |
571 (max_threads - 1);
572 if (false)
573 dw2 |= 1 << 16;
574
575 dw4 = ds->in.start_grf << 20 |
576 0 << 11 |
577 0 << 4;
578
579 dw5 = (max_threads - 1) << 25 |
580 1 << 10 |
581 1;
582
583 ilo_cp_begin(cp, cmd_len);
584 ilo_cp_write(cp, cmd | (cmd_len - 2));
585 ilo_cp_write(cp, ds->cache_offset);
586 ilo_cp_write(cp, dw2);
587 ilo_cp_write(cp, 0);
588 ilo_cp_write(cp, dw4);
589 ilo_cp_write(cp, dw5);
590 ilo_cp_end(cp);
591 }
592
593 static void
594 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
595 unsigned buffer_mask,
596 int vertex_attrib_count,
597 bool rasterizer_discard,
598 struct ilo_cp *cp)
599 {
600 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
601 const uint8_t cmd_len = 3;
602 const bool enable = (buffer_mask != 0);
603 uint32_t dw1, dw2;
604 int read_len;
605
606 ILO_GPE_VALID_GEN(dev, 7, 7);
607
608 if (!enable) {
609 dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
610 if (rasterizer_discard)
611 dw1 |= SO_RENDERING_DISABLE;
612
613 dw2 = 0;
614
615 ilo_cp_begin(cp, cmd_len);
616 ilo_cp_write(cp, cmd | (cmd_len - 2));
617 ilo_cp_write(cp, dw1);
618 ilo_cp_write(cp, dw2);
619 ilo_cp_end(cp);
620 return;
621 }
622
623 read_len = (vertex_attrib_count + 1) / 2;
624 if (!read_len)
625 read_len = 1;
626
627 dw1 = SO_FUNCTION_ENABLE |
628 0 << SO_RENDER_STREAM_SELECT_SHIFT |
629 SO_STATISTICS_ENABLE |
630 buffer_mask << 8;
631
632 if (rasterizer_discard)
633 dw1 |= SO_RENDERING_DISABLE;
634
635 /* API_OPENGL */
636 if (true)
637 dw1 |= SO_REORDER_TRAILING;
638
639 dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
640 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
641 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
642 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
643 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
644 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
645 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
646 (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
647
648 ilo_cp_begin(cp, cmd_len);
649 ilo_cp_write(cp, cmd | (cmd_len - 2));
650 ilo_cp_write(cp, dw1);
651 ilo_cp_write(cp, dw2);
652 ilo_cp_end(cp);
653 }
654
655 static void
656 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
657 const struct pipe_rasterizer_state *rasterizer,
658 const struct ilo_shader *fs,
659 const struct ilo_shader *last_sh,
660 struct ilo_cp *cp)
661 {
662 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
663 const uint8_t cmd_len = 14;
664 uint32_t dw[13];
665
666 ILO_GPE_VALID_GEN(dev, 7, 7);
667
668 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
669 fs, last_sh, dw, Elements(dw));
670
671 ilo_cp_begin(cp, cmd_len);
672 ilo_cp_write(cp, cmd | (cmd_len - 2));
673 ilo_cp_write_multi(cp, dw, 13);
674 ilo_cp_end(cp);
675 }
676
677 static void
678 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
679 const struct ilo_shader *fs,
680 int num_samplers, bool dual_blend,
681 struct ilo_cp *cp)
682 {
683 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
684 const uint8_t cmd_len = 8;
685 uint32_t dw2, dw4, dw5;
686 int max_threads;
687
688 ILO_GPE_VALID_GEN(dev, 7, 7);
689
690 /* see brwCreateContext() */
691 max_threads = (dev->gt == 2) ? 172 : 48;
692
693 if (!fs) {
694 ilo_cp_begin(cp, cmd_len);
695 ilo_cp_write(cp, cmd | (cmd_len - 2));
696 ilo_cp_write(cp, 0);
697 ilo_cp_write(cp, 0);
698 ilo_cp_write(cp, 0);
699 /* GPU hangs if none of the dispatch enable bits is set */
700 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
701 GEN7_PS_8_DISPATCH_ENABLE);
702 ilo_cp_write(cp, 0);
703 ilo_cp_write(cp, 0);
704 ilo_cp_write(cp, 0);
705 ilo_cp_end(cp);
706
707 return;
708 }
709
710 dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
711 0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
712 if (false)
713 dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
714
715 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
716 GEN7_PS_POSOFFSET_NONE;
717
718 if (false)
719 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
720 if (fs->in.count)
721 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
722 if (dual_blend)
723 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
724
725 if (fs->dispatch_16)
726 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
727 else
728 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
729
730 dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
731 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
732 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
733
734 ilo_cp_begin(cp, cmd_len);
735 ilo_cp_write(cp, cmd | (cmd_len - 2));
736 ilo_cp_write(cp, fs->cache_offset);
737 ilo_cp_write(cp, dw2);
738 ilo_cp_write(cp, 0); /* scratch */
739 ilo_cp_write(cp, dw4);
740 ilo_cp_write(cp, dw5);
741 ilo_cp_write(cp, 0); /* kernel 1 */
742 ilo_cp_write(cp, 0); /* kernel 2 */
743 ilo_cp_end(cp);
744 }
745
746 static void
747 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
748 uint32_t sf_clip_viewport,
749 struct ilo_cp *cp)
750 {
751 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
752 }
753
754 static void
755 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
756 uint32_t cc_viewport,
757 struct ilo_cp *cp)
758 {
759 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
760 }
761
762 static void
763 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
764 uint32_t blend_state,
765 struct ilo_cp *cp)
766 {
767 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
768 }
769
770 static void
771 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
772 uint32_t depth_stencil_state,
773 struct ilo_cp *cp)
774 {
775 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
776 }
777
778 static void
779 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
780 uint32_t binding_table,
781 struct ilo_cp *cp)
782 {
783 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
784 }
785
786 static void
787 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
788 uint32_t binding_table,
789 struct ilo_cp *cp)
790 {
791 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
792 }
793
794 static void
795 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
796 uint32_t binding_table,
797 struct ilo_cp *cp)
798 {
799 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
800 }
801
802 static void
803 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
804 uint32_t binding_table,
805 struct ilo_cp *cp)
806 {
807 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
808 }
809
810 static void
811 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
812 uint32_t binding_table,
813 struct ilo_cp *cp)
814 {
815 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
816 }
817
818 static void
819 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
820 uint32_t sampler_state,
821 struct ilo_cp *cp)
822 {
823 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
824 }
825
826 static void
827 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
828 uint32_t sampler_state,
829 struct ilo_cp *cp)
830 {
831 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
832 }
833
834 static void
835 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
836 uint32_t sampler_state,
837 struct ilo_cp *cp)
838 {
839 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
840 }
841
842 static void
843 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
844 uint32_t sampler_state,
845 struct ilo_cp *cp)
846 {
847 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
848 }
849
850 static void
851 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
852 uint32_t sampler_state,
853 struct ilo_cp *cp)
854 {
855 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
856 }
857
858 static void
859 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
860 int subop, int offset, int size,
861 int entry_size,
862 struct ilo_cp *cp)
863 {
864 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
865 const uint8_t cmd_len = 2;
866 const int row_size = 64; /* 512 bits */
867 int alloc_size, num_entries, min_entries, max_entries;
868
869 ILO_GPE_VALID_GEN(dev, 7, 7);
870
871 /* VS, HS, DS, and GS variants */
872 assert(subop >= 0x30 && subop <= 0x33);
873
874 /* in multiples of 8KB */
875 assert(offset % 8192 == 0);
876 offset /= 8192;
877
878 /* in multiple of 512-bit rows */
879 alloc_size = (entry_size + row_size - 1) / row_size;
880 if (!alloc_size)
881 alloc_size = 1;
882
883 /*
884 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
885 *
886 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
887 * cause performance to decrease due to banking in the URB. Element
888 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
889 */
890 if (subop == 0x30 && alloc_size == 5)
891 alloc_size = 6;
892
893 /* in multiples of 8 */
894 num_entries = (size / row_size / alloc_size) & ~7;
895
896 switch (subop) {
897 case 0x30: /* 3DSTATE_URB_VS */
898 min_entries = 32;
899 max_entries = (dev->gt == 2) ? 704 : 512;
900
901 assert(num_entries >= min_entries);
902 if (num_entries > max_entries)
903 num_entries = max_entries;
904 break;
905 case 0x31: /* 3DSTATE_URB_HS */
906 max_entries = (dev->gt == 2) ? 64 : 32;
907 if (num_entries > max_entries)
908 num_entries = max_entries;
909 break;
910 case 0x32: /* 3DSTATE_URB_DS */
911 if (num_entries)
912 assert(num_entries >= 138);
913 break;
914 case 0x33: /* 3DSTATE_URB_GS */
915 max_entries = (dev->gt == 2) ? 320 : 192;
916 if (num_entries > max_entries)
917 num_entries = max_entries;
918 break;
919 default:
920 break;
921 }
922
923 ilo_cp_begin(cp, cmd_len);
924 ilo_cp_write(cp, cmd | (cmd_len - 2));
925 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
926 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
927 num_entries);
928 ilo_cp_end(cp);
929 }
930
931 static void
932 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
933 int offset, int size, int entry_size,
934 struct ilo_cp *cp)
935 {
936 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
937 }
938
939 static void
940 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
941 int offset, int size, int entry_size,
942 struct ilo_cp *cp)
943 {
944 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
945 }
946
947 static void
948 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
949 int offset, int size, int entry_size,
950 struct ilo_cp *cp)
951 {
952 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
953 }
954
955 static void
956 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
957 int offset, int size, int entry_size,
958 struct ilo_cp *cp)
959 {
960 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
961 }
962
963 static void
964 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
965 int subop, int offset, int size,
966 struct ilo_cp *cp)
967 {
968 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
969 const uint8_t cmd_len = 2;
970 int end;
971
972 ILO_GPE_VALID_GEN(dev, 7, 7);
973
974 /* VS, HS, DS, GS, and PS variants */
975 assert(subop >= 0x12 && subop <= 0x16);
976
977 /*
978 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
979 *
980 * "(A table that says the maximum size of each constant buffer is
981 * 16KB")
982 *
983 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
984 *
985 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
986 * may not exceed the maximum value of the Constant Buffer Size."
987 *
988 * Thus, the valid range of buffer end is [0KB, 16KB].
989 */
990 end = (offset + size) / 1024;
991 if (end > 16) {
992 assert(!"invalid constant buffer end");
993 end = 16;
994 }
995
996 /* the valid range of buffer offset is [0KB, 15KB] */
997 offset = (offset + 1023) / 1024;
998 if (offset > 15) {
999 assert(!"invalid constant buffer offset");
1000 offset = 15;
1001 }
1002
1003 if (offset > end) {
1004 assert(!size);
1005 offset = end;
1006 }
1007
1008 /* the valid range of buffer size is [0KB, 15KB] */
1009 size = end - offset;
1010 if (size > 15) {
1011 assert(!"invalid constant buffer size");
1012 size = 15;
1013 }
1014
1015 ilo_cp_begin(cp, cmd_len);
1016 ilo_cp_write(cp, cmd | (cmd_len - 2));
1017 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
1018 size);
1019 ilo_cp_end(cp);
1020 }
1021
1022 static void
1023 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
1024 int offset, int size,
1025 struct ilo_cp *cp)
1026 {
1027 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
1028 }
1029
1030 static void
1031 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
1032 int offset, int size,
1033 struct ilo_cp *cp)
1034 {
1035 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
1036 }
1037
1038 static void
1039 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
1040 int offset, int size,
1041 struct ilo_cp *cp)
1042 {
1043 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
1044 }
1045
1046 static void
1047 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
1048 int offset, int size,
1049 struct ilo_cp *cp)
1050 {
1051 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
1052 }
1053
1054 static void
1055 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
1056 int offset, int size,
1057 struct ilo_cp *cp)
1058 {
1059 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
1060 }
1061
1062 static void
1063 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
1064 const struct pipe_stream_output_info *so_info,
1065 const struct ilo_shader *sh,
1066 struct ilo_cp *cp)
1067 {
1068 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
1069 uint16_t cmd_len;
1070 int buffer_selects, num_entries, i;
1071 uint16_t so_decls[128];
1072
1073 ILO_GPE_VALID_GEN(dev, 7, 7);
1074
1075 buffer_selects = 0;
1076 num_entries = 0;
1077
1078 if (so_info) {
1079 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
1080
1081 memset(buffer_offsets, 0, sizeof(buffer_offsets));
1082
1083 for (i = 0; i < so_info->num_outputs; i++) {
1084 unsigned decl, buf, attr, mask;
1085
1086 buf = so_info->output[i].output_buffer;
1087
1088 /* pad with holes */
1089 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
1090 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
1091 int num_dwords;
1092
1093 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
1094 if (num_dwords > 4)
1095 num_dwords = 4;
1096
1097 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1098 SO_DECL_HOLE_FLAG |
1099 ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
1100
1101 so_decls[num_entries++] = decl;
1102 buffer_offsets[buf] += num_dwords;
1103 }
1104
1105 /* figure out which attribute is sourced */
1106 for (attr = 0; attr < sh->out.count; attr++) {
1107 const int idx = sh->out.register_indices[attr];
1108 if (idx == so_info->output[i].register_index)
1109 break;
1110 }
1111
1112 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
1113
1114 if (attr < sh->out.count) {
1115 mask = ((1 << so_info->output[i].num_components) - 1) <<
1116 so_info->output[i].start_component;
1117
1118 /* PSIZE is at W channel */
1119 if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
1120 assert(mask == 0x1);
1121 mask = (mask << 3) & 0xf;
1122 }
1123
1124 decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT |
1125 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1126 }
1127 else {
1128 assert(!"stream output an undefined register");
1129 mask = (1 << so_info->output[i].num_components) - 1;
1130 decl |= SO_DECL_HOLE_FLAG |
1131 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1132 }
1133
1134 so_decls[num_entries++] = decl;
1135 buffer_selects |= 1 << buf;
1136 buffer_offsets[buf] += so_info->output[i].num_components;
1137 }
1138 }
1139
1140 /*
1141 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1142 *
1143 * "Errata: All 128 decls for all four streams must be included
1144 * whenever this command is issued. The "Num Entries [n]" fields still
1145 * contain the actual numbers of valid decls."
1146 *
1147 * Also note that "DWord Length" has 9 bits for this command, and the type
1148 * of cmd_len is thus uint16_t.
1149 */
1150 cmd_len = 2 * 128 + 3;
1151
1152 ilo_cp_begin(cp, cmd_len);
1153 ilo_cp_write(cp, cmd | (cmd_len - 2));
1154 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
1155 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1156 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1157 buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
1158 ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
1159 0 << SO_NUM_ENTRIES_2_SHIFT |
1160 0 << SO_NUM_ENTRIES_1_SHIFT |
1161 num_entries << SO_NUM_ENTRIES_0_SHIFT);
1162
1163 for (i = 0; i < num_entries; i++) {
1164 ilo_cp_write(cp, so_decls[i]);
1165 ilo_cp_write(cp, 0);
1166 }
1167 for (; i < 128; i++) {
1168 ilo_cp_write(cp, 0);
1169 ilo_cp_write(cp, 0);
1170 }
1171
1172 ilo_cp_end(cp);
1173 }
1174
1175 static void
1176 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1177 int index, int base, int stride,
1178 const struct pipe_stream_output_target *so_target,
1179 struct ilo_cp *cp)
1180 {
1181 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1182 const uint8_t cmd_len = 4;
1183 struct ilo_buffer *buf;
1184 int end;
1185
1186 ILO_GPE_VALID_GEN(dev, 7, 7);
1187
1188 if (!so_target || !so_target->buffer) {
1189 ilo_cp_begin(cp, cmd_len);
1190 ilo_cp_write(cp, cmd | (cmd_len - 2));
1191 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1192 ilo_cp_write(cp, 0);
1193 ilo_cp_write(cp, 0);
1194 ilo_cp_end(cp);
1195 return;
1196 }
1197
1198 buf = ilo_buffer(so_target->buffer);
1199
1200 /* DWord-aligned */
1201 assert(stride % 4 == 0 && base % 4 == 0);
1202 assert(so_target->buffer_offset % 4 == 0);
1203
1204 stride &= ~3;
1205 base = (base + so_target->buffer_offset) & ~3;
1206 end = (base + so_target->buffer_size) & ~3;
1207
1208 ilo_cp_begin(cp, cmd_len);
1209 ilo_cp_write(cp, cmd | (cmd_len - 2));
1210 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
1211 stride);
1212 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1213 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1214 ilo_cp_end(cp);
1215 }
1216
1217 static void
1218 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1219 const struct pipe_draw_info *info,
1220 bool rectlist,
1221 struct ilo_cp *cp)
1222 {
1223 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1224 const uint8_t cmd_len = 7;
1225 const int prim = (rectlist) ?
1226 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1227 const int vb_access = (info->indexed) ?
1228 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1229 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1230
1231 ILO_GPE_VALID_GEN(dev, 7, 7);
1232
1233 ilo_cp_begin(cp, cmd_len);
1234 ilo_cp_write(cp, cmd | (cmd_len - 2));
1235 ilo_cp_write(cp, vb_access | prim);
1236 ilo_cp_write(cp, info->count);
1237 ilo_cp_write(cp, info->start);
1238 ilo_cp_write(cp, info->instance_count);
1239 ilo_cp_write(cp, info->start_instance);
1240 ilo_cp_write(cp, info->index_bias);
1241 ilo_cp_end(cp);
1242 }
1243
1244 static uint32_t
1245 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1246 const struct ilo_viewport_cso *viewports,
1247 unsigned num_viewports,
1248 struct ilo_cp *cp)
1249 {
1250 const int state_align = 64 / 4;
1251 const int state_len = 16 * num_viewports;
1252 uint32_t state_offset, *dw;
1253 unsigned i;
1254
1255 ILO_GPE_VALID_GEN(dev, 7, 7);
1256
1257 /*
1258 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1259 *
1260 * "The viewport-specific state used by both the SF and CL units
1261 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1262 * of which contains the DWords described below. The start of each
1263 * element is spaced 16 DWords apart. The location of first element of
1264 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1265 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1266 */
1267 assert(num_viewports && num_viewports <= 16);
1268
1269 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1270 state_len, state_align, &state_offset);
1271
1272 for (i = 0; i < num_viewports; i++) {
1273 const struct ilo_viewport_cso *vp = &viewports[i];
1274
1275 dw[0] = fui(vp->m00);
1276 dw[1] = fui(vp->m11);
1277 dw[2] = fui(vp->m22);
1278 dw[3] = fui(vp->m30);
1279 dw[4] = fui(vp->m31);
1280 dw[5] = fui(vp->m32);
1281 dw[6] = 0;
1282 dw[7] = 0;
1283 dw[8] = fui(vp->min_gbx);
1284 dw[9] = fui(vp->max_gbx);
1285 dw[10] = fui(vp->min_gby);
1286 dw[11] = fui(vp->max_gby);
1287 dw[12] = 0;
1288 dw[13] = 0;
1289 dw[14] = 0;
1290 dw[15] = 0;
1291
1292 dw += 16;
1293 }
1294
1295 return state_offset;
1296 }
1297
1298 void
1299 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
1300 unsigned width, unsigned height,
1301 unsigned depth, unsigned level,
1302 struct ilo_view_surface *surf)
1303 {
1304 uint32_t *dw;
1305
1306 ILO_GPE_VALID_GEN(dev, 7, 7);
1307
1308 /*
1309 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1310 *
1311 * "A null surface is used in instances where an actual surface is not
1312 * bound. When a write message is generated to a null surface, no
1313 * actual surface is written to. When a read message (including any
1314 * sampling engine message) is generated to a null surface, the result
1315 * is all zeros. Note that a null surface type is allowed to be used
1316 * with all messages, even if it is not specificially indicated as
1317 * supported. All of the remaining fields in surface state are ignored
1318 * for null surfaces, with the following exceptions:
1319 *
1320 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1321 * must match the depth buffer's corresponding state for all render
1322 * target surfaces, including null.
1323 * * All sampling engine and data port messages support null surfaces
1324 * with the above behavior, even if not mentioned as specifically
1325 * supported, except for the following:
1326 * * Data Port Media Block Read/Write messages.
1327 * * The Surface Type of a surface used as a render target (accessed
1328 * via the Data Port's Render Target Write message) must be the same
1329 * as the Surface Type of all other render targets and of the depth
1330 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1331 * buffer or render targets are SURFTYPE_NULL."
1332 *
1333 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1334 *
1335 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1336 * true"
1337 */
1338
1339 STATIC_ASSERT(Elements(surf->payload) >= 8);
1340 dw = surf->payload;
1341
1342 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1343 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1344 BRW_SURFACE_TILED << 13;
1345
1346 dw[1] = 0;
1347
1348 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1349 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1350
1351 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1352
1353 dw[4] = 0;
1354 dw[5] = level;
1355
1356 dw[6] = 0;
1357 dw[7] = 0;
1358
1359 surf->bo = NULL;
1360 }
1361
1362 void
1363 ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
1364 const struct ilo_buffer *buf,
1365 unsigned offset, unsigned size,
1366 unsigned struct_size,
1367 enum pipe_format elem_format,
1368 bool is_rt, bool render_cache_rw,
1369 struct ilo_view_surface *surf)
1370 {
1371 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1372 const bool structured = (!typed && struct_size > 1);
1373 const int elem_size = (typed) ?
1374 util_format_get_blocksize(elem_format) : 1;
1375 int width, height, depth, pitch;
1376 int surface_type, surface_format, num_entries;
1377 uint32_t *dw;
1378
1379 ILO_GPE_VALID_GEN(dev, 7, 7);
1380
1381 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1382
1383 surface_format = (typed) ?
1384 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1385
1386 num_entries = size / struct_size;
1387 /* see if there is enough space to fit another element */
1388 if (size % struct_size >= elem_size && !structured)
1389 num_entries++;
1390
1391 /*
1392 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1393 *
1394 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1395 * Address) specifies the base address of first element of the
1396 * surface. The surface is interpreted as a simple array of that
1397 * single element type. The address must be naturally-aligned to the
1398 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1399 * must be 16-byte aligned)
1400 *
1401 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1402 * the base address of the first element of the surface, computed in
1403 * software by adding the surface base address to the byte offset of
1404 * the element in the buffer."
1405 */
1406 if (is_rt)
1407 assert(offset % elem_size == 0);
1408
1409 /*
1410 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1411 *
1412 * "For typed buffer and structured buffer surfaces, the number of
1413 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1414 * surfaces, the number of entries in the buffer is the number of
1415 * bytes which can range from 1 to 2^30."
1416 */
1417 assert(num_entries >= 1 &&
1418 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1419
1420 /*
1421 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1422 *
1423 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1424 * 11 if the Surface Format is RAW (the size of the buffer must be a
1425 * multiple of 4 bytes)."
1426 *
1427 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1428 *
1429 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1430 * field (Surface Pitch) indicates the size of the structure."
1431 *
1432 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1433 * must be a multiple of 4 bytes."
1434 */
1435 if (structured)
1436 assert(struct_size % 4 == 0);
1437 else if (!typed)
1438 assert(num_entries % 4 == 0);
1439
1440 pitch = struct_size;
1441
1442 pitch--;
1443 num_entries--;
1444 /* bits [6:0] */
1445 width = (num_entries & 0x0000007f);
1446 /* bits [20:7] */
1447 height = (num_entries & 0x001fff80) >> 7;
1448 /* bits [30:21] */
1449 depth = (num_entries & 0x7fe00000) >> 21;
1450 /* limit to [26:21] */
1451 if (typed || structured)
1452 depth &= 0x3f;
1453
1454 STATIC_ASSERT(Elements(surf->payload) >= 8);
1455 dw = surf->payload;
1456
1457 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1458 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1459 if (render_cache_rw)
1460 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1461
1462 dw[1] = offset;
1463
1464 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1465 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1466
1467 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1468 pitch;
1469
1470 dw[4] = 0;
1471 dw[5] = 0;
1472
1473 dw[6] = 0;
1474 dw[7] = 0;
1475
1476 /* do not increment reference count */
1477 surf->bo = buf->bo;
1478 }
1479
1480 void
1481 ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
1482 const struct ilo_texture *tex,
1483 enum pipe_format format,
1484 unsigned first_level,
1485 unsigned num_levels,
1486 unsigned first_layer,
1487 unsigned num_layers,
1488 bool is_rt, bool render_cache_rw,
1489 struct ilo_view_surface *surf)
1490 {
1491 int surface_type, surface_format;
1492 int width, height, depth, pitch, lod;
1493 unsigned layer_offset, x_offset, y_offset;
1494 uint32_t *dw;
1495
1496 ILO_GPE_VALID_GEN(dev, 7, 7);
1497
1498 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1499 assert(surface_type != BRW_SURFACE_BUFFER);
1500
1501 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1502 format = PIPE_FORMAT_Z32_FLOAT;
1503
1504 if (is_rt)
1505 surface_format = ilo_translate_render_format(format);
1506 else
1507 surface_format = ilo_translate_texture_format(format);
1508 assert(surface_format >= 0);
1509
1510 width = tex->base.width0;
1511 height = tex->base.height0;
1512 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1513 tex->base.depth0 : num_layers;
1514 pitch = tex->bo_stride;
1515
1516 if (surface_type == BRW_SURFACE_CUBE) {
1517 /*
1518 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1519 *
1520 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
1521 * this field is [0,340], indicating the number of cube array
1522 * elements (equal to the number of underlying 2D array elements
1523 * divided by 6). For other surfaces, this field must be zero."
1524 *
1525 * When is_rt is true, we treat the texture as a 2D one to avoid the
1526 * restriction.
1527 */
1528 if (is_rt) {
1529 surface_type = BRW_SURFACE_2D;
1530 }
1531 else {
1532 assert(num_layers % 6 == 0);
1533 depth = num_layers / 6;
1534 }
1535 }
1536
1537 /* sanity check the size */
1538 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1539 assert(first_layer < 2048 && num_layers <= 2048);
1540 switch (surface_type) {
1541 case BRW_SURFACE_1D:
1542 assert(width <= 16384 && height == 1 && depth <= 2048);
1543 break;
1544 case BRW_SURFACE_2D:
1545 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1546 break;
1547 case BRW_SURFACE_3D:
1548 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1549 if (!is_rt)
1550 assert(first_layer == 0);
1551 break;
1552 case BRW_SURFACE_CUBE:
1553 assert(width <= 16384 && height <= 16384 && depth <= 86);
1554 assert(width == height);
1555 if (is_rt)
1556 assert(first_layer == 0);
1557 break;
1558 default:
1559 assert(!"unexpected surface type");
1560 break;
1561 }
1562
1563 if (is_rt) {
1564 /*
1565 * Compute the offset to the layer manually.
1566 *
1567 * For rendering, the hardware requires LOD to be the same for all
1568 * render targets and the depth buffer. We need to compute the offset
1569 * to the layer manually and always set LOD to 0.
1570 */
1571 if (true) {
1572 /* we lose the capability for layered rendering */
1573 assert(num_layers == 1);
1574
1575 layer_offset = ilo_texture_get_slice_offset(tex,
1576 first_level, first_layer, &x_offset, &y_offset);
1577
1578 assert(x_offset % 4 == 0);
1579 assert(y_offset % 2 == 0);
1580 x_offset /= 4;
1581 y_offset /= 2;
1582
1583 /* derive the size for the LOD */
1584 width = u_minify(width, first_level);
1585 height = u_minify(height, first_level);
1586 if (surface_type == BRW_SURFACE_3D)
1587 depth = u_minify(depth, first_level);
1588 else
1589 depth = 1;
1590
1591 first_level = 0;
1592 first_layer = 0;
1593 lod = 0;
1594 }
1595 else {
1596 layer_offset = 0;
1597 x_offset = 0;
1598 y_offset = 0;
1599 }
1600
1601 assert(num_levels == 1);
1602 lod = first_level;
1603 }
1604 else {
1605 layer_offset = 0;
1606 x_offset = 0;
1607 y_offset = 0;
1608
1609 lod = num_levels - 1;
1610 }
1611
1612 /*
1613 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1614 *
1615 * "The Base Address for linear render target surfaces and surfaces
1616 * accessed with the typed surface read/write data port messages must
1617 * be element-size aligned, for non-YUV surface formats, or a multiple
1618 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1619 * have no alignment requirements (byte alignment is sufficient)."
1620 *
1621 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1622 *
1623 * "For linear render target surfaces and surfaces accessed with the
1624 * typed data port messages, the pitch must be a multiple of the
1625 * element size for non-YUV surface formats. Pitch must be a multiple
1626 * of 2 * element size for YUV surface formats. For linear surfaces
1627 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1628 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1629 * of bytes."
1630 *
1631 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1632 *
1633 * "For linear surfaces, this field (X Offset) must be zero."
1634 */
1635 if (tex->tiling == INTEL_TILING_NONE) {
1636 if (is_rt) {
1637 const int elem_size = util_format_get_blocksize(format);
1638 assert(layer_offset % elem_size == 0);
1639 assert(pitch % elem_size == 0);
1640 }
1641
1642 assert(!x_offset);
1643 }
1644
1645 STATIC_ASSERT(Elements(surf->payload) >= 8);
1646 dw = surf->payload;
1647
1648 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1649 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1650 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
1651
1652 /*
1653 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
1654 *
1655 * "If this field (Surface Array) is enabled, the Surface Type must be
1656 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
1657 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
1658 * SURFTYPE_CUBE, the Depth field must be set to zero."
1659 *
1660 * For non-3D sampler surfaces, resinfo (the sampler message) always
1661 * returns zero for the number of layers when this field is not set.
1662 */
1663 if (surface_type != BRW_SURFACE_3D) {
1664 if (util_resource_is_array_texture(&tex->base))
1665 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1666 else
1667 assert(depth == 1);
1668 }
1669
1670 if (tex->valign_4)
1671 dw[0] |= GEN7_SURFACE_VALIGN_4;
1672
1673 if (tex->halign_8)
1674 dw[0] |= GEN7_SURFACE_HALIGN_8;
1675
1676 if (tex->array_spacing_full)
1677 dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
1678 else
1679 dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
1680
1681 if (render_cache_rw)
1682 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1683
1684 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1685 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1686
1687 dw[1] = layer_offset;
1688
1689 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1690 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1691
1692 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1693 (pitch - 1);
1694
1695 dw[4] = first_layer << 18 |
1696 (num_layers - 1) << 7;
1697
1698 /*
1699 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1700 * means the samples are interleaved. The layouts are the same when the
1701 * number of samples is 1.
1702 */
1703 if (tex->interleaved && tex->base.nr_samples > 1) {
1704 assert(!is_rt);
1705 dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
1706 }
1707 else {
1708 dw[4] |= GEN7_SURFACE_MSFMT_MSS;
1709 }
1710
1711 if (tex->base.nr_samples > 4)
1712 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1713 else if (tex->base.nr_samples > 2)
1714 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1715 else
1716 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1717
1718 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1719 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1720 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1721 lod;
1722
1723 dw[6] = 0;
1724 dw[7] = 0;
1725
1726 /* do not increment reference count */
1727 surf->bo = tex->bo;
1728 }
1729
1730 static int
1731 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1732 enum ilo_gpe_gen7_command cmd,
1733 int arg)
1734 {
1735 static const struct {
1736 int header;
1737 int body;
1738 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1739 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1740 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1741 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1742 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1743 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1744 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1745 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1746 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1747 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1748 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1749 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1750 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1751 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1752 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1753 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1754 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1755 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1756 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1757 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1758 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1759 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1760 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1761 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1762 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1763 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1764 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1765 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1766 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1767 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1768 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1769 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1770 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1771 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1772 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1773 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1774 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1775 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1776 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1777 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1778 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1779 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1780 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1781 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1782 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1783 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1784 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1785 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1786 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1787 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1788 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1789 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1790 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1791 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1792 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1793 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1794 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1795 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1796 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1797 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1798 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1799 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1800 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1801 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1802 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1803 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1804 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1805 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1806 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1807 };
1808 const int header = gen7_command_size_table[cmd].header;
1809 const int body = gen7_command_size_table[cmd].body;
1810 const int count = arg;
1811
1812 ILO_GPE_VALID_GEN(dev, 7, 7);
1813 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1814
1815 return (likely(count)) ? header + body * count : 0;
1816 }
1817
1818 static int
1819 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1820 enum ilo_gpe_gen7_state state,
1821 int arg)
1822 {
1823 static const struct {
1824 int alignment;
1825 int body;
1826 bool is_array;
1827 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1828 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1829 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1830 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1831 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1832 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1833 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1834 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1835 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1836 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1837 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1838 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1839 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1840 };
1841 const int alignment = gen7_state_size_table[state].alignment;
1842 const int body = gen7_state_size_table[state].body;
1843 const bool is_array = gen7_state_size_table[state].is_array;
1844 const int count = arg;
1845 int estimate;
1846
1847 ILO_GPE_VALID_GEN(dev, 7, 7);
1848 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1849
1850 if (likely(count)) {
1851 if (is_array) {
1852 estimate = (alignment - 1) + body * count;
1853 }
1854 else {
1855 estimate = (alignment - 1) + body;
1856 /* all states are aligned */
1857 if (count > 1)
1858 estimate += util_align_npot(body, alignment) * (count - 1);
1859 }
1860 }
1861 else {
1862 estimate = 0;
1863 }
1864
1865 return estimate;
1866 }
1867
1868 static void
1869 gen7_init(struct ilo_gpe_gen7 *gen7)
1870 {
1871 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1872
1873 gen7->estimate_command_size = gen7_estimate_command_size;
1874 gen7->estimate_state_size = gen7_estimate_state_size;
1875
1876 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1877 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1878 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1879 GEN7_USE(gen7, STATE_SIP, gen6);
1880 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1881 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1882 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1883 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1884 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1885 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1886 GEN7_SET(gen7, GPGPU_WALKER);
1887 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1888 GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
1889 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1890 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1891 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1892 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1893 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1894 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1895 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1896 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1897 GEN7_SET(gen7, 3DSTATE_GS);
1898 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1899 GEN7_SET(gen7, 3DSTATE_SF);
1900 GEN7_SET(gen7, 3DSTATE_WM);
1901 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1902 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1903 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1904 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1905 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1906 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1907 GEN7_SET(gen7, 3DSTATE_HS);
1908 GEN7_SET(gen7, 3DSTATE_TE);
1909 GEN7_SET(gen7, 3DSTATE_DS);
1910 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1911 GEN7_SET(gen7, 3DSTATE_SBE);
1912 GEN7_SET(gen7, 3DSTATE_PS);
1913 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1914 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1915 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1916 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1917 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1918 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1919 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1920 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1921 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1922 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1923 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1924 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1925 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1926 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1927 GEN7_SET(gen7, 3DSTATE_URB_VS);
1928 GEN7_SET(gen7, 3DSTATE_URB_HS);
1929 GEN7_SET(gen7, 3DSTATE_URB_DS);
1930 GEN7_SET(gen7, 3DSTATE_URB_GS);
1931 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1932 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1933 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1934 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1935 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1936 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1937 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1938 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1939 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1940 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1941 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1942 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1943 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1944 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1945 GEN7_SET(gen7, 3DPRIMITIVE);
1946 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1947 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1948 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1949 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1950 GEN7_USE(gen7, BLEND_STATE, gen6);
1951 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1952 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1953 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1954 GEN7_USE(gen7, SURFACE_STATE, gen6);
1955 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1956 GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
1957 GEN7_USE(gen7, push_constant_buffer, gen6);
1958 #undef GEN7_USE
1959 #undef GEN7_SET
1960 }
1961
1962 static struct ilo_gpe_gen7 gen7_gpe;
1963
1964 const struct ilo_gpe_gen7 *
1965 ilo_gpe_gen7_get(void)
1966 {
1967 if (!gen7_gpe.estimate_command_size)
1968 gen7_init(&gen7_gpe);
1969
1970 return &gen7_gpe;
1971 }