f374473b29103e2fc048c4848088ecb3dba356da
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_resource.h"
29 #include "brw_defines.h"
30 #include "intel_reg.h"
31
32 #include "shader/ilo_shader_internal.h"
33 #include "ilo_cp.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen7.h"
38
39 static void
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
41 struct ilo_cp *cp)
42 {
43 assert(!"GPGPU_WALKER unsupported");
44 }
45
46 static void
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
48 uint32_t clear_val,
49 struct ilo_cp *cp)
50 {
51 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
52 const uint8_t cmd_len = 3;
53
54 ILO_GPE_VALID_GEN(dev, 7, 7);
55
56 ilo_cp_begin(cp, cmd_len);
57 ilo_cp_write(cp, cmd | (cmd_len - 2));
58 ilo_cp_write(cp, clear_val);
59 ilo_cp_write(cp, 1);
60 ilo_cp_end(cp);
61 }
62
63 static void
64 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
65 int subop, uint32_t pointer,
66 struct ilo_cp *cp)
67 {
68 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
69 const uint8_t cmd_len = 2;
70
71 ILO_GPE_VALID_GEN(dev, 7, 7);
72
73 ilo_cp_begin(cp, cmd_len);
74 ilo_cp_write(cp, cmd | (cmd_len - 2));
75 ilo_cp_write(cp, pointer);
76 ilo_cp_end(cp);
77 }
78
79 static void
80 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
81 uint32_t color_calc_state,
82 struct ilo_cp *cp)
83 {
84 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
85 }
86
87 void
88 ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
89 const struct ilo_shader_state *gs,
90 struct ilo_shader_cso *cso)
91 {
92 int start_grf, vue_read_len, max_threads;
93 uint32_t dw2, dw4, dw5;
94
95 ILO_GPE_VALID_GEN(dev, 7, 7);
96
97 start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
98 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
99
100 /* in pairs */
101 vue_read_len = (vue_read_len + 1) / 2;
102
103 switch (dev->gen) {
104 case ILO_GEN(7):
105 max_threads = (dev->gt == 2) ? 128 : 36;
106 break;
107 default:
108 max_threads = 1;
109 break;
110 }
111
112 dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
113
114 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
115 GEN7_GS_INCLUDE_VERTEX_HANDLES |
116 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
117 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
118
119 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
120 GEN6_GS_STATISTICS_ENABLE |
121 GEN6_GS_ENABLE;
122
123 STATIC_ASSERT(Elements(cso->payload) >= 3);
124 cso->payload[0] = dw2;
125 cso->payload[1] = dw4;
126 cso->payload[2] = dw5;
127 }
128
129 static void
130 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
131 const struct ilo_shader_state *gs,
132 int num_samplers,
133 struct ilo_cp *cp)
134 {
135 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
136 const uint8_t cmd_len = 7;
137 const struct ilo_shader_cso *cso;
138 uint32_t dw2, dw4, dw5;
139
140 ILO_GPE_VALID_GEN(dev, 7, 7);
141
142 if (!gs) {
143 ilo_cp_begin(cp, cmd_len);
144 ilo_cp_write(cp, cmd | (cmd_len - 2));
145 ilo_cp_write(cp, 0);
146 ilo_cp_write(cp, 0);
147 ilo_cp_write(cp, 0);
148 ilo_cp_write(cp, 0);
149 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
150 ilo_cp_write(cp, 0);
151 ilo_cp_end(cp);
152 return;
153 }
154
155 cso = ilo_shader_get_kernel_cso(gs);
156 dw2 = cso->payload[0];
157 dw4 = cso->payload[1];
158 dw5 = cso->payload[2];
159
160 dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
161
162 ilo_cp_begin(cp, cmd_len);
163 ilo_cp_write(cp, cmd | (cmd_len - 2));
164 ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
165 ilo_cp_write(cp, dw2);
166 ilo_cp_write(cp, 0); /* scratch */
167 ilo_cp_write(cp, dw4);
168 ilo_cp_write(cp, dw5);
169 ilo_cp_write(cp, 0);
170 ilo_cp_end(cp);
171 }
172
173 static void
174 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
175 const struct ilo_rasterizer_state *rasterizer,
176 const struct pipe_surface *zs_surf,
177 struct ilo_cp *cp)
178 {
179 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
180 const uint8_t cmd_len = 7;
181 const int num_samples = 1;
182 uint32_t payload[6];
183
184 ILO_GPE_VALID_GEN(dev, 7, 7);
185
186 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
187 &rasterizer->sf, num_samples,
188 (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
189 payload, Elements(payload));
190
191 ilo_cp_begin(cp, cmd_len);
192 ilo_cp_write(cp, cmd | (cmd_len - 2));
193 ilo_cp_write_multi(cp, payload, 6);
194 ilo_cp_end(cp);
195 }
196
197 static void
198 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
199 const struct ilo_shader *fs,
200 const struct pipe_rasterizer_state *rasterizer,
201 bool cc_may_kill,
202 struct ilo_cp *cp)
203 {
204 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
205 const uint8_t cmd_len = 3;
206 const int num_samples = 1;
207 uint32_t dw1, dw2;
208
209 ILO_GPE_VALID_GEN(dev, 7, 7);
210
211 dw1 = GEN7_WM_STATISTICS_ENABLE |
212 GEN7_WM_LINE_AA_WIDTH_2_0;
213
214 if (false) {
215 dw1 |= GEN7_WM_DEPTH_CLEAR;
216 dw1 |= GEN7_WM_DEPTH_RESOLVE;
217 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
218 }
219
220 if (fs) {
221 /*
222 * Set this bit if
223 *
224 * a) fs writes colors and color is not masked, or
225 * b) fs writes depth, or
226 * c) fs or cc kills
227 */
228 dw1 |= GEN7_WM_DISPATCH_ENABLE;
229
230 /*
231 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
232 *
233 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
234 * the PS kernel or color calculator has the ability to kill
235 * (discard) pixels or samples, other than due to depth or stencil
236 * testing. This bit is required to be ENABLED in the following
237 * situations:
238 *
239 * - The API pixel shader program contains "killpix" or "discard"
240 * instructions, or other code in the pixel shader kernel that
241 * can cause the final pixel mask to differ from the pixel mask
242 * received on dispatch.
243 *
244 * - A sampler with chroma key enabled with kill pixel mode is used
245 * by the pixel shader.
246 *
247 * - Any render target has Alpha Test Enable or AlphaToCoverage
248 * Enable enabled.
249 *
250 * - The pixel shader kernel generates and outputs oMask.
251 *
252 * Note: As ClipDistance clipping is fully supported in hardware
253 * and therefore not via PS instructions, there should be no need
254 * to ENABLE this bit due to ClipDistance clipping."
255 */
256 if (fs->has_kill || cc_may_kill)
257 dw1 |= GEN7_WM_KILL_ENABLE;
258
259 if (fs->out.has_pos)
260 dw1 |= GEN7_WM_PSCDEPTH_ON;
261 if (fs->in.has_pos)
262 dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
263
264 dw1 |= fs->in.barycentric_interpolation_mode <<
265 GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
266 }
267 else if (cc_may_kill) {
268 dw1 |= GEN7_WM_DISPATCH_ENABLE |
269 GEN7_WM_KILL_ENABLE;
270 }
271
272 dw1 |= GEN7_WM_POSITION_ZW_PIXEL;
273
274 /* same value as in 3DSTATE_SF */
275 if (rasterizer->line_smooth)
276 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
277
278 if (rasterizer->poly_stipple_enable)
279 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
280 if (rasterizer->line_stipple_enable)
281 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
282
283 if (rasterizer->bottom_edge_rule)
284 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
285
286 if (num_samples > 1) {
287 if (rasterizer->multisample)
288 dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
289 else
290 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
291
292 dw2 = GEN7_WM_MSDISPMODE_PERPIXEL;
293 }
294 else {
295 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
296
297 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
298 }
299
300 ilo_cp_begin(cp, cmd_len);
301 ilo_cp_write(cp, cmd | (cmd_len - 2));
302 ilo_cp_write(cp, dw1);
303 ilo_cp_write(cp, dw2);
304 ilo_cp_end(cp);
305 }
306
307 static void
308 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
309 int subop,
310 const uint32_t *bufs, const int *sizes,
311 int num_bufs,
312 struct ilo_cp *cp)
313 {
314 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
315 const uint8_t cmd_len = 7;
316 uint32_t dw[6];
317 int total_read_length, i;
318
319 ILO_GPE_VALID_GEN(dev, 7, 7);
320
321 /* VS, HS, DS, GS, and PS variants */
322 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
323
324 assert(num_bufs <= 4);
325
326 dw[0] = 0;
327 dw[1] = 0;
328
329 total_read_length = 0;
330 for (i = 0; i < 4; i++) {
331 int read_len;
332
333 /*
334 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
335 *
336 * "Constant buffers must be enabled in order from Constant Buffer 0
337 * to Constant Buffer 3 within this command. For example, it is
338 * not allowed to enable Constant Buffer 1 by programming a
339 * non-zero value in the VS Constant Buffer 1 Read Length without a
340 * non-zero value in VS Constant Buffer 0 Read Length."
341 */
342 if (i >= num_bufs || !sizes[i]) {
343 for (; i < 4; i++) {
344 assert(i >= num_bufs || !sizes[i]);
345 dw[2 + i] = 0;
346 }
347 break;
348 }
349
350 /* read lengths are in 256-bit units */
351 read_len = (sizes[i] + 31) / 32;
352 /* the lower 5 bits are used for memory object control state */
353 assert(bufs[i] % 32 == 0);
354
355 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
356 dw[2 + i] = bufs[i];
357
358 total_read_length += read_len;
359 }
360
361 /*
362 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
363 *
364 * "The sum of all four read length fields must be less than or equal
365 * to the size of 64"
366 */
367 assert(total_read_length <= 64);
368
369 ilo_cp_begin(cp, cmd_len);
370 ilo_cp_write(cp, cmd | (cmd_len - 2));
371 ilo_cp_write_multi(cp, dw, 6);
372 ilo_cp_end(cp);
373 }
374
375 static void
376 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
377 const uint32_t *bufs, const int *sizes,
378 int num_bufs,
379 struct ilo_cp *cp)
380 {
381 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
382 }
383
384 static void
385 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
386 const uint32_t *bufs, const int *sizes,
387 int num_bufs,
388 struct ilo_cp *cp)
389 {
390 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
391 }
392
393 static void
394 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
395 const uint32_t *bufs, const int *sizes,
396 int num_bufs,
397 struct ilo_cp *cp)
398 {
399 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
400 }
401
402 static void
403 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
404 unsigned sample_mask,
405 int num_samples,
406 struct ilo_cp *cp)
407 {
408 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
409 const uint8_t cmd_len = 2;
410 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
411
412 ILO_GPE_VALID_GEN(dev, 7, 7);
413
414 /*
415 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
416 *
417 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
418 * (Sample Mask) must be zero.
419 *
420 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
421 * must be zero."
422 */
423 sample_mask &= valid_mask;
424
425 ilo_cp_begin(cp, cmd_len);
426 ilo_cp_write(cp, cmd | (cmd_len - 2));
427 ilo_cp_write(cp, sample_mask);
428 ilo_cp_end(cp);
429 }
430
431 static void
432 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
433 const uint32_t *bufs, const int *sizes,
434 int num_bufs,
435 struct ilo_cp *cp)
436 {
437 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
438 }
439
440 static void
441 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
442 const uint32_t *bufs, const int *sizes,
443 int num_bufs,
444 struct ilo_cp *cp)
445 {
446 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
447 }
448
449 static void
450 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
451 const struct ilo_shader *hs,
452 int max_threads, int num_samplers,
453 struct ilo_cp *cp)
454 {
455 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
456 const uint8_t cmd_len = 7;
457 uint32_t dw1, dw2, dw5;
458
459 ILO_GPE_VALID_GEN(dev, 7, 7);
460
461 if (!hs) {
462 ilo_cp_begin(cp, cmd_len);
463 ilo_cp_write(cp, cmd | (cmd_len - 2));
464 ilo_cp_write(cp, 0);
465 ilo_cp_write(cp, 0);
466 ilo_cp_write(cp, 0);
467 ilo_cp_write(cp, 0);
468 ilo_cp_write(cp, 0);
469 ilo_cp_write(cp, 0);
470 ilo_cp_end(cp);
471
472 return;
473 }
474
475 dw1 = (num_samplers + 3) / 4 << 27 |
476 0 << 18 |
477 (max_threads - 1);
478 if (false)
479 dw1 |= 1 << 16;
480
481 dw2 = 1 << 31 | /* HS Enable */
482 1 << 29 | /* HS Statistics Enable */
483 0; /* Instance Count */
484
485 dw5 = hs->in.start_grf << 19 |
486 0 << 11 |
487 0 << 4;
488
489 ilo_cp_begin(cp, cmd_len);
490 ilo_cp_write(cp, cmd | (cmd_len - 2));
491 ilo_cp_write(cp, dw1);
492 ilo_cp_write(cp, dw2);
493 ilo_cp_write(cp, hs->cache_offset);
494 ilo_cp_write(cp, 0);
495 ilo_cp_write(cp, dw5);
496 ilo_cp_write(cp, 0);
497 ilo_cp_end(cp);
498 }
499
500 static void
501 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
502 struct ilo_cp *cp)
503 {
504 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
505 const uint8_t cmd_len = 4;
506
507 ILO_GPE_VALID_GEN(dev, 7, 7);
508
509 ilo_cp_begin(cp, cmd_len);
510 ilo_cp_write(cp, cmd | (cmd_len - 2));
511 ilo_cp_write(cp, 0);
512 ilo_cp_write(cp, 0);
513 ilo_cp_write(cp, 0);
514 ilo_cp_end(cp);
515 }
516
517 static void
518 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
519 const struct ilo_shader *ds,
520 int max_threads, int num_samplers,
521 struct ilo_cp *cp)
522 {
523 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
524 const uint8_t cmd_len = 6;
525 uint32_t dw2, dw4, dw5;
526
527 ILO_GPE_VALID_GEN(dev, 7, 7);
528
529 if (!ds) {
530 ilo_cp_begin(cp, cmd_len);
531 ilo_cp_write(cp, cmd | (cmd_len - 2));
532 ilo_cp_write(cp, 0);
533 ilo_cp_write(cp, 0);
534 ilo_cp_write(cp, 0);
535 ilo_cp_write(cp, 0);
536 ilo_cp_write(cp, 0);
537 ilo_cp_end(cp);
538
539 return;
540 }
541
542 dw2 = (num_samplers + 3) / 4 << 27 |
543 0 << 18 |
544 (max_threads - 1);
545 if (false)
546 dw2 |= 1 << 16;
547
548 dw4 = ds->in.start_grf << 20 |
549 0 << 11 |
550 0 << 4;
551
552 dw5 = (max_threads - 1) << 25 |
553 1 << 10 |
554 1;
555
556 ilo_cp_begin(cp, cmd_len);
557 ilo_cp_write(cp, cmd | (cmd_len - 2));
558 ilo_cp_write(cp, ds->cache_offset);
559 ilo_cp_write(cp, dw2);
560 ilo_cp_write(cp, 0);
561 ilo_cp_write(cp, dw4);
562 ilo_cp_write(cp, dw5);
563 ilo_cp_end(cp);
564 }
565
566 static void
567 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
568 unsigned buffer_mask,
569 int vertex_attrib_count,
570 bool rasterizer_discard,
571 struct ilo_cp *cp)
572 {
573 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
574 const uint8_t cmd_len = 3;
575 const bool enable = (buffer_mask != 0);
576 uint32_t dw1, dw2;
577 int read_len;
578
579 ILO_GPE_VALID_GEN(dev, 7, 7);
580
581 if (!enable) {
582 dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
583 if (rasterizer_discard)
584 dw1 |= SO_RENDERING_DISABLE;
585
586 dw2 = 0;
587
588 ilo_cp_begin(cp, cmd_len);
589 ilo_cp_write(cp, cmd | (cmd_len - 2));
590 ilo_cp_write(cp, dw1);
591 ilo_cp_write(cp, dw2);
592 ilo_cp_end(cp);
593 return;
594 }
595
596 read_len = (vertex_attrib_count + 1) / 2;
597 if (!read_len)
598 read_len = 1;
599
600 dw1 = SO_FUNCTION_ENABLE |
601 0 << SO_RENDER_STREAM_SELECT_SHIFT |
602 SO_STATISTICS_ENABLE |
603 buffer_mask << 8;
604
605 if (rasterizer_discard)
606 dw1 |= SO_RENDERING_DISABLE;
607
608 /* API_OPENGL */
609 if (true)
610 dw1 |= SO_REORDER_TRAILING;
611
612 dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
613 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
614 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
615 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
616 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
617 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
618 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
619 (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
620
621 ilo_cp_begin(cp, cmd_len);
622 ilo_cp_write(cp, cmd | (cmd_len - 2));
623 ilo_cp_write(cp, dw1);
624 ilo_cp_write(cp, dw2);
625 ilo_cp_end(cp);
626 }
627
628 static void
629 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
630 const struct pipe_rasterizer_state *rasterizer,
631 const struct ilo_shader *fs,
632 const struct ilo_shader *last_sh,
633 struct ilo_cp *cp)
634 {
635 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
636 const uint8_t cmd_len = 14;
637 uint32_t dw[13];
638
639 ILO_GPE_VALID_GEN(dev, 7, 7);
640
641 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
642 fs, last_sh, dw, Elements(dw));
643
644 ilo_cp_begin(cp, cmd_len);
645 ilo_cp_write(cp, cmd | (cmd_len - 2));
646 ilo_cp_write_multi(cp, dw, 13);
647 ilo_cp_end(cp);
648 }
649
650 static void
651 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
652 const struct ilo_shader *fs,
653 int num_samplers, bool dual_blend,
654 struct ilo_cp *cp)
655 {
656 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
657 const uint8_t cmd_len = 8;
658 uint32_t dw2, dw4, dw5;
659 int max_threads;
660
661 ILO_GPE_VALID_GEN(dev, 7, 7);
662
663 /* see brwCreateContext() */
664 max_threads = (dev->gt == 2) ? 172 : 48;
665
666 if (!fs) {
667 ilo_cp_begin(cp, cmd_len);
668 ilo_cp_write(cp, cmd | (cmd_len - 2));
669 ilo_cp_write(cp, 0);
670 ilo_cp_write(cp, 0);
671 ilo_cp_write(cp, 0);
672 /* GPU hangs if none of the dispatch enable bits is set */
673 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
674 GEN7_PS_8_DISPATCH_ENABLE);
675 ilo_cp_write(cp, 0);
676 ilo_cp_write(cp, 0);
677 ilo_cp_write(cp, 0);
678 ilo_cp_end(cp);
679
680 return;
681 }
682
683 dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
684 0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
685 if (false)
686 dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
687
688 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
689 GEN7_PS_POSOFFSET_NONE;
690
691 if (false)
692 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
693 if (fs->in.count)
694 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
695 if (dual_blend)
696 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
697
698 if (fs->dispatch_16)
699 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
700 else
701 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
702
703 dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
704 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
705 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
706
707 ilo_cp_begin(cp, cmd_len);
708 ilo_cp_write(cp, cmd | (cmd_len - 2));
709 ilo_cp_write(cp, fs->cache_offset);
710 ilo_cp_write(cp, dw2);
711 ilo_cp_write(cp, 0); /* scratch */
712 ilo_cp_write(cp, dw4);
713 ilo_cp_write(cp, dw5);
714 ilo_cp_write(cp, 0); /* kernel 1 */
715 ilo_cp_write(cp, 0); /* kernel 2 */
716 ilo_cp_end(cp);
717 }
718
719 static void
720 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
721 uint32_t sf_clip_viewport,
722 struct ilo_cp *cp)
723 {
724 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
725 }
726
727 static void
728 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
729 uint32_t cc_viewport,
730 struct ilo_cp *cp)
731 {
732 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
733 }
734
735 static void
736 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
737 uint32_t blend_state,
738 struct ilo_cp *cp)
739 {
740 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
741 }
742
743 static void
744 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
745 uint32_t depth_stencil_state,
746 struct ilo_cp *cp)
747 {
748 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
749 }
750
751 static void
752 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
753 uint32_t binding_table,
754 struct ilo_cp *cp)
755 {
756 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
757 }
758
759 static void
760 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
761 uint32_t binding_table,
762 struct ilo_cp *cp)
763 {
764 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
765 }
766
767 static void
768 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
769 uint32_t binding_table,
770 struct ilo_cp *cp)
771 {
772 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
773 }
774
775 static void
776 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
777 uint32_t binding_table,
778 struct ilo_cp *cp)
779 {
780 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
781 }
782
783 static void
784 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
785 uint32_t binding_table,
786 struct ilo_cp *cp)
787 {
788 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
789 }
790
791 static void
792 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
793 uint32_t sampler_state,
794 struct ilo_cp *cp)
795 {
796 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
797 }
798
799 static void
800 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
801 uint32_t sampler_state,
802 struct ilo_cp *cp)
803 {
804 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
805 }
806
807 static void
808 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
809 uint32_t sampler_state,
810 struct ilo_cp *cp)
811 {
812 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
813 }
814
815 static void
816 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
817 uint32_t sampler_state,
818 struct ilo_cp *cp)
819 {
820 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
821 }
822
823 static void
824 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
825 uint32_t sampler_state,
826 struct ilo_cp *cp)
827 {
828 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
829 }
830
831 static void
832 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
833 int subop, int offset, int size,
834 int entry_size,
835 struct ilo_cp *cp)
836 {
837 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
838 const uint8_t cmd_len = 2;
839 const int row_size = 64; /* 512 bits */
840 int alloc_size, num_entries, min_entries, max_entries;
841
842 ILO_GPE_VALID_GEN(dev, 7, 7);
843
844 /* VS, HS, DS, and GS variants */
845 assert(subop >= 0x30 && subop <= 0x33);
846
847 /* in multiples of 8KB */
848 assert(offset % 8192 == 0);
849 offset /= 8192;
850
851 /* in multiple of 512-bit rows */
852 alloc_size = (entry_size + row_size - 1) / row_size;
853 if (!alloc_size)
854 alloc_size = 1;
855
856 /*
857 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
858 *
859 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
860 * cause performance to decrease due to banking in the URB. Element
861 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
862 */
863 if (subop == 0x30 && alloc_size == 5)
864 alloc_size = 6;
865
866 /* in multiples of 8 */
867 num_entries = (size / row_size / alloc_size) & ~7;
868
869 switch (subop) {
870 case 0x30: /* 3DSTATE_URB_VS */
871 min_entries = 32;
872 max_entries = (dev->gt == 2) ? 704 : 512;
873
874 assert(num_entries >= min_entries);
875 if (num_entries > max_entries)
876 num_entries = max_entries;
877 break;
878 case 0x31: /* 3DSTATE_URB_HS */
879 max_entries = (dev->gt == 2) ? 64 : 32;
880 if (num_entries > max_entries)
881 num_entries = max_entries;
882 break;
883 case 0x32: /* 3DSTATE_URB_DS */
884 if (num_entries)
885 assert(num_entries >= 138);
886 break;
887 case 0x33: /* 3DSTATE_URB_GS */
888 max_entries = (dev->gt == 2) ? 320 : 192;
889 if (num_entries > max_entries)
890 num_entries = max_entries;
891 break;
892 default:
893 break;
894 }
895
896 ilo_cp_begin(cp, cmd_len);
897 ilo_cp_write(cp, cmd | (cmd_len - 2));
898 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
899 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
900 num_entries);
901 ilo_cp_end(cp);
902 }
903
904 static void
905 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
906 int offset, int size, int entry_size,
907 struct ilo_cp *cp)
908 {
909 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
910 }
911
912 static void
913 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
914 int offset, int size, int entry_size,
915 struct ilo_cp *cp)
916 {
917 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
918 }
919
920 static void
921 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
922 int offset, int size, int entry_size,
923 struct ilo_cp *cp)
924 {
925 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
926 }
927
928 static void
929 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
930 int offset, int size, int entry_size,
931 struct ilo_cp *cp)
932 {
933 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
934 }
935
936 static void
937 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
938 int subop, int offset, int size,
939 struct ilo_cp *cp)
940 {
941 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
942 const uint8_t cmd_len = 2;
943 int end;
944
945 ILO_GPE_VALID_GEN(dev, 7, 7);
946
947 /* VS, HS, DS, GS, and PS variants */
948 assert(subop >= 0x12 && subop <= 0x16);
949
950 /*
951 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
952 *
953 * "(A table that says the maximum size of each constant buffer is
954 * 16KB")
955 *
956 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
957 *
958 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
959 * may not exceed the maximum value of the Constant Buffer Size."
960 *
961 * Thus, the valid range of buffer end is [0KB, 16KB].
962 */
963 end = (offset + size) / 1024;
964 if (end > 16) {
965 assert(!"invalid constant buffer end");
966 end = 16;
967 }
968
969 /* the valid range of buffer offset is [0KB, 15KB] */
970 offset = (offset + 1023) / 1024;
971 if (offset > 15) {
972 assert(!"invalid constant buffer offset");
973 offset = 15;
974 }
975
976 if (offset > end) {
977 assert(!size);
978 offset = end;
979 }
980
981 /* the valid range of buffer size is [0KB, 15KB] */
982 size = end - offset;
983 if (size > 15) {
984 assert(!"invalid constant buffer size");
985 size = 15;
986 }
987
988 ilo_cp_begin(cp, cmd_len);
989 ilo_cp_write(cp, cmd | (cmd_len - 2));
990 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
991 size);
992 ilo_cp_end(cp);
993 }
994
995 static void
996 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
997 int offset, int size,
998 struct ilo_cp *cp)
999 {
1000 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
1001 }
1002
1003 static void
1004 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
1005 int offset, int size,
1006 struct ilo_cp *cp)
1007 {
1008 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
1009 }
1010
1011 static void
1012 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
1013 int offset, int size,
1014 struct ilo_cp *cp)
1015 {
1016 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
1017 }
1018
1019 static void
1020 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
1021 int offset, int size,
1022 struct ilo_cp *cp)
1023 {
1024 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
1025 }
1026
1027 static void
1028 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
1029 int offset, int size,
1030 struct ilo_cp *cp)
1031 {
1032 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
1033 }
1034
1035 static void
1036 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
1037 const struct pipe_stream_output_info *so_info,
1038 const struct ilo_shader *sh,
1039 struct ilo_cp *cp)
1040 {
1041 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
1042 uint16_t cmd_len;
1043 int buffer_selects, num_entries, i;
1044 uint16_t so_decls[128];
1045
1046 ILO_GPE_VALID_GEN(dev, 7, 7);
1047
1048 buffer_selects = 0;
1049 num_entries = 0;
1050
1051 if (so_info) {
1052 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
1053
1054 memset(buffer_offsets, 0, sizeof(buffer_offsets));
1055
1056 for (i = 0; i < so_info->num_outputs; i++) {
1057 unsigned decl, buf, attr, mask;
1058
1059 buf = so_info->output[i].output_buffer;
1060
1061 /* pad with holes */
1062 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
1063 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
1064 int num_dwords;
1065
1066 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
1067 if (num_dwords > 4)
1068 num_dwords = 4;
1069
1070 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1071 SO_DECL_HOLE_FLAG |
1072 ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
1073
1074 so_decls[num_entries++] = decl;
1075 buffer_offsets[buf] += num_dwords;
1076 }
1077
1078 /* figure out which attribute is sourced */
1079 for (attr = 0; attr < sh->out.count; attr++) {
1080 const int idx = sh->out.register_indices[attr];
1081 if (idx == so_info->output[i].register_index)
1082 break;
1083 }
1084
1085 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
1086
1087 if (attr < sh->out.count) {
1088 mask = ((1 << so_info->output[i].num_components) - 1) <<
1089 so_info->output[i].start_component;
1090
1091 /* PSIZE is at W channel */
1092 if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
1093 assert(mask == 0x1);
1094 mask = (mask << 3) & 0xf;
1095 }
1096
1097 decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT |
1098 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1099 }
1100 else {
1101 assert(!"stream output an undefined register");
1102 mask = (1 << so_info->output[i].num_components) - 1;
1103 decl |= SO_DECL_HOLE_FLAG |
1104 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1105 }
1106
1107 so_decls[num_entries++] = decl;
1108 buffer_selects |= 1 << buf;
1109 buffer_offsets[buf] += so_info->output[i].num_components;
1110 }
1111 }
1112
1113 /*
1114 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1115 *
1116 * "Errata: All 128 decls for all four streams must be included
1117 * whenever this command is issued. The "Num Entries [n]" fields still
1118 * contain the actual numbers of valid decls."
1119 *
1120 * Also note that "DWord Length" has 9 bits for this command, and the type
1121 * of cmd_len is thus uint16_t.
1122 */
1123 cmd_len = 2 * 128 + 3;
1124
1125 ilo_cp_begin(cp, cmd_len);
1126 ilo_cp_write(cp, cmd | (cmd_len - 2));
1127 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
1128 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1129 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1130 buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
1131 ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
1132 0 << SO_NUM_ENTRIES_2_SHIFT |
1133 0 << SO_NUM_ENTRIES_1_SHIFT |
1134 num_entries << SO_NUM_ENTRIES_0_SHIFT);
1135
1136 for (i = 0; i < num_entries; i++) {
1137 ilo_cp_write(cp, so_decls[i]);
1138 ilo_cp_write(cp, 0);
1139 }
1140 for (; i < 128; i++) {
1141 ilo_cp_write(cp, 0);
1142 ilo_cp_write(cp, 0);
1143 }
1144
1145 ilo_cp_end(cp);
1146 }
1147
1148 static void
1149 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1150 int index, int base, int stride,
1151 const struct pipe_stream_output_target *so_target,
1152 struct ilo_cp *cp)
1153 {
1154 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1155 const uint8_t cmd_len = 4;
1156 struct ilo_buffer *buf;
1157 int end;
1158
1159 ILO_GPE_VALID_GEN(dev, 7, 7);
1160
1161 if (!so_target || !so_target->buffer) {
1162 ilo_cp_begin(cp, cmd_len);
1163 ilo_cp_write(cp, cmd | (cmd_len - 2));
1164 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1165 ilo_cp_write(cp, 0);
1166 ilo_cp_write(cp, 0);
1167 ilo_cp_end(cp);
1168 return;
1169 }
1170
1171 buf = ilo_buffer(so_target->buffer);
1172
1173 /* DWord-aligned */
1174 assert(stride % 4 == 0 && base % 4 == 0);
1175 assert(so_target->buffer_offset % 4 == 0);
1176
1177 stride &= ~3;
1178 base = (base + so_target->buffer_offset) & ~3;
1179 end = (base + so_target->buffer_size) & ~3;
1180
1181 ilo_cp_begin(cp, cmd_len);
1182 ilo_cp_write(cp, cmd | (cmd_len - 2));
1183 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
1184 stride);
1185 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1186 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1187 ilo_cp_end(cp);
1188 }
1189
1190 static void
1191 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1192 const struct pipe_draw_info *info,
1193 bool rectlist,
1194 struct ilo_cp *cp)
1195 {
1196 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1197 const uint8_t cmd_len = 7;
1198 const int prim = (rectlist) ?
1199 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1200 const int vb_access = (info->indexed) ?
1201 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1202 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1203
1204 ILO_GPE_VALID_GEN(dev, 7, 7);
1205
1206 ilo_cp_begin(cp, cmd_len);
1207 ilo_cp_write(cp, cmd | (cmd_len - 2));
1208 ilo_cp_write(cp, vb_access | prim);
1209 ilo_cp_write(cp, info->count);
1210 ilo_cp_write(cp, info->start);
1211 ilo_cp_write(cp, info->instance_count);
1212 ilo_cp_write(cp, info->start_instance);
1213 ilo_cp_write(cp, info->index_bias);
1214 ilo_cp_end(cp);
1215 }
1216
1217 static uint32_t
1218 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1219 const struct ilo_viewport_cso *viewports,
1220 unsigned num_viewports,
1221 struct ilo_cp *cp)
1222 {
1223 const int state_align = 64 / 4;
1224 const int state_len = 16 * num_viewports;
1225 uint32_t state_offset, *dw;
1226 unsigned i;
1227
1228 ILO_GPE_VALID_GEN(dev, 7, 7);
1229
1230 /*
1231 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1232 *
1233 * "The viewport-specific state used by both the SF and CL units
1234 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1235 * of which contains the DWords described below. The start of each
1236 * element is spaced 16 DWords apart. The location of first element of
1237 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1238 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1239 */
1240 assert(num_viewports && num_viewports <= 16);
1241
1242 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1243 state_len, state_align, &state_offset);
1244
1245 for (i = 0; i < num_viewports; i++) {
1246 const struct ilo_viewport_cso *vp = &viewports[i];
1247
1248 dw[0] = fui(vp->m00);
1249 dw[1] = fui(vp->m11);
1250 dw[2] = fui(vp->m22);
1251 dw[3] = fui(vp->m30);
1252 dw[4] = fui(vp->m31);
1253 dw[5] = fui(vp->m32);
1254 dw[6] = 0;
1255 dw[7] = 0;
1256 dw[8] = fui(vp->min_gbx);
1257 dw[9] = fui(vp->max_gbx);
1258 dw[10] = fui(vp->min_gby);
1259 dw[11] = fui(vp->max_gby);
1260 dw[12] = 0;
1261 dw[13] = 0;
1262 dw[14] = 0;
1263 dw[15] = 0;
1264
1265 dw += 16;
1266 }
1267
1268 return state_offset;
1269 }
1270
1271 void
1272 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
1273 unsigned width, unsigned height,
1274 unsigned depth, unsigned level,
1275 struct ilo_view_surface *surf)
1276 {
1277 uint32_t *dw;
1278
1279 ILO_GPE_VALID_GEN(dev, 7, 7);
1280
1281 /*
1282 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1283 *
1284 * "A null surface is used in instances where an actual surface is not
1285 * bound. When a write message is generated to a null surface, no
1286 * actual surface is written to. When a read message (including any
1287 * sampling engine message) is generated to a null surface, the result
1288 * is all zeros. Note that a null surface type is allowed to be used
1289 * with all messages, even if it is not specificially indicated as
1290 * supported. All of the remaining fields in surface state are ignored
1291 * for null surfaces, with the following exceptions:
1292 *
1293 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1294 * must match the depth buffer's corresponding state for all render
1295 * target surfaces, including null.
1296 * * All sampling engine and data port messages support null surfaces
1297 * with the above behavior, even if not mentioned as specifically
1298 * supported, except for the following:
1299 * * Data Port Media Block Read/Write messages.
1300 * * The Surface Type of a surface used as a render target (accessed
1301 * via the Data Port's Render Target Write message) must be the same
1302 * as the Surface Type of all other render targets and of the depth
1303 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1304 * buffer or render targets are SURFTYPE_NULL."
1305 *
1306 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1307 *
1308 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1309 * true"
1310 */
1311
1312 STATIC_ASSERT(Elements(surf->payload) >= 8);
1313 dw = surf->payload;
1314
1315 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1316 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1317 BRW_SURFACE_TILED << 13;
1318
1319 dw[1] = 0;
1320
1321 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1322 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1323
1324 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1325
1326 dw[4] = 0;
1327 dw[5] = level;
1328
1329 dw[6] = 0;
1330 dw[7] = 0;
1331
1332 surf->bo = NULL;
1333 }
1334
1335 void
1336 ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
1337 const struct ilo_buffer *buf,
1338 unsigned offset, unsigned size,
1339 unsigned struct_size,
1340 enum pipe_format elem_format,
1341 bool is_rt, bool render_cache_rw,
1342 struct ilo_view_surface *surf)
1343 {
1344 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1345 const bool structured = (!typed && struct_size > 1);
1346 const int elem_size = (typed) ?
1347 util_format_get_blocksize(elem_format) : 1;
1348 int width, height, depth, pitch;
1349 int surface_type, surface_format, num_entries;
1350 uint32_t *dw;
1351
1352 ILO_GPE_VALID_GEN(dev, 7, 7);
1353
1354 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1355
1356 surface_format = (typed) ?
1357 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1358
1359 num_entries = size / struct_size;
1360 /* see if there is enough space to fit another element */
1361 if (size % struct_size >= elem_size && !structured)
1362 num_entries++;
1363
1364 /*
1365 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1366 *
1367 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1368 * Address) specifies the base address of first element of the
1369 * surface. The surface is interpreted as a simple array of that
1370 * single element type. The address must be naturally-aligned to the
1371 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1372 * must be 16-byte aligned)
1373 *
1374 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1375 * the base address of the first element of the surface, computed in
1376 * software by adding the surface base address to the byte offset of
1377 * the element in the buffer."
1378 */
1379 if (is_rt)
1380 assert(offset % elem_size == 0);
1381
1382 /*
1383 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1384 *
1385 * "For typed buffer and structured buffer surfaces, the number of
1386 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1387 * surfaces, the number of entries in the buffer is the number of
1388 * bytes which can range from 1 to 2^30."
1389 */
1390 assert(num_entries >= 1 &&
1391 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1392
1393 /*
1394 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1395 *
1396 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1397 * 11 if the Surface Format is RAW (the size of the buffer must be a
1398 * multiple of 4 bytes)."
1399 *
1400 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1401 *
1402 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1403 * field (Surface Pitch) indicates the size of the structure."
1404 *
1405 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1406 * must be a multiple of 4 bytes."
1407 */
1408 if (structured)
1409 assert(struct_size % 4 == 0);
1410 else if (!typed)
1411 assert(num_entries % 4 == 0);
1412
1413 pitch = struct_size;
1414
1415 pitch--;
1416 num_entries--;
1417 /* bits [6:0] */
1418 width = (num_entries & 0x0000007f);
1419 /* bits [20:7] */
1420 height = (num_entries & 0x001fff80) >> 7;
1421 /* bits [30:21] */
1422 depth = (num_entries & 0x7fe00000) >> 21;
1423 /* limit to [26:21] */
1424 if (typed || structured)
1425 depth &= 0x3f;
1426
1427 STATIC_ASSERT(Elements(surf->payload) >= 8);
1428 dw = surf->payload;
1429
1430 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1431 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1432 if (render_cache_rw)
1433 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1434
1435 dw[1] = offset;
1436
1437 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1438 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1439
1440 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1441 pitch;
1442
1443 dw[4] = 0;
1444 dw[5] = 0;
1445
1446 dw[6] = 0;
1447 dw[7] = 0;
1448
1449 /* do not increment reference count */
1450 surf->bo = buf->bo;
1451 }
1452
1453 void
1454 ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
1455 const struct ilo_texture *tex,
1456 enum pipe_format format,
1457 unsigned first_level,
1458 unsigned num_levels,
1459 unsigned first_layer,
1460 unsigned num_layers,
1461 bool is_rt, bool render_cache_rw,
1462 struct ilo_view_surface *surf)
1463 {
1464 int surface_type, surface_format;
1465 int width, height, depth, pitch, lod;
1466 unsigned layer_offset, x_offset, y_offset;
1467 uint32_t *dw;
1468
1469 ILO_GPE_VALID_GEN(dev, 7, 7);
1470
1471 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1472 assert(surface_type != BRW_SURFACE_BUFFER);
1473
1474 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1475 format = PIPE_FORMAT_Z32_FLOAT;
1476
1477 if (is_rt)
1478 surface_format = ilo_translate_render_format(format);
1479 else
1480 surface_format = ilo_translate_texture_format(format);
1481 assert(surface_format >= 0);
1482
1483 width = tex->base.width0;
1484 height = tex->base.height0;
1485 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1486 tex->base.depth0 : num_layers;
1487 pitch = tex->bo_stride;
1488
1489 if (surface_type == BRW_SURFACE_CUBE) {
1490 /*
1491 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1492 *
1493 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
1494 * this field is [0,340], indicating the number of cube array
1495 * elements (equal to the number of underlying 2D array elements
1496 * divided by 6). For other surfaces, this field must be zero."
1497 *
1498 * When is_rt is true, we treat the texture as a 2D one to avoid the
1499 * restriction.
1500 */
1501 if (is_rt) {
1502 surface_type = BRW_SURFACE_2D;
1503 }
1504 else {
1505 assert(num_layers % 6 == 0);
1506 depth = num_layers / 6;
1507 }
1508 }
1509
1510 /* sanity check the size */
1511 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1512 assert(first_layer < 2048 && num_layers <= 2048);
1513 switch (surface_type) {
1514 case BRW_SURFACE_1D:
1515 assert(width <= 16384 && height == 1 && depth <= 2048);
1516 break;
1517 case BRW_SURFACE_2D:
1518 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1519 break;
1520 case BRW_SURFACE_3D:
1521 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1522 if (!is_rt)
1523 assert(first_layer == 0);
1524 break;
1525 case BRW_SURFACE_CUBE:
1526 assert(width <= 16384 && height <= 16384 && depth <= 86);
1527 assert(width == height);
1528 if (is_rt)
1529 assert(first_layer == 0);
1530 break;
1531 default:
1532 assert(!"unexpected surface type");
1533 break;
1534 }
1535
1536 if (is_rt) {
1537 /*
1538 * Compute the offset to the layer manually.
1539 *
1540 * For rendering, the hardware requires LOD to be the same for all
1541 * render targets and the depth buffer. We need to compute the offset
1542 * to the layer manually and always set LOD to 0.
1543 */
1544 if (true) {
1545 /* we lose the capability for layered rendering */
1546 assert(num_layers == 1);
1547
1548 layer_offset = ilo_texture_get_slice_offset(tex,
1549 first_level, first_layer, &x_offset, &y_offset);
1550
1551 assert(x_offset % 4 == 0);
1552 assert(y_offset % 2 == 0);
1553 x_offset /= 4;
1554 y_offset /= 2;
1555
1556 /* derive the size for the LOD */
1557 width = u_minify(width, first_level);
1558 height = u_minify(height, first_level);
1559 if (surface_type == BRW_SURFACE_3D)
1560 depth = u_minify(depth, first_level);
1561 else
1562 depth = 1;
1563
1564 first_level = 0;
1565 first_layer = 0;
1566 lod = 0;
1567 }
1568 else {
1569 layer_offset = 0;
1570 x_offset = 0;
1571 y_offset = 0;
1572 }
1573
1574 assert(num_levels == 1);
1575 lod = first_level;
1576 }
1577 else {
1578 layer_offset = 0;
1579 x_offset = 0;
1580 y_offset = 0;
1581
1582 lod = num_levels - 1;
1583 }
1584
1585 /*
1586 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1587 *
1588 * "The Base Address for linear render target surfaces and surfaces
1589 * accessed with the typed surface read/write data port messages must
1590 * be element-size aligned, for non-YUV surface formats, or a multiple
1591 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1592 * have no alignment requirements (byte alignment is sufficient)."
1593 *
1594 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1595 *
1596 * "For linear render target surfaces and surfaces accessed with the
1597 * typed data port messages, the pitch must be a multiple of the
1598 * element size for non-YUV surface formats. Pitch must be a multiple
1599 * of 2 * element size for YUV surface formats. For linear surfaces
1600 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1601 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1602 * of bytes."
1603 *
1604 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1605 *
1606 * "For linear surfaces, this field (X Offset) must be zero."
1607 */
1608 if (tex->tiling == INTEL_TILING_NONE) {
1609 if (is_rt) {
1610 const int elem_size = util_format_get_blocksize(format);
1611 assert(layer_offset % elem_size == 0);
1612 assert(pitch % elem_size == 0);
1613 }
1614
1615 assert(!x_offset);
1616 }
1617
1618 STATIC_ASSERT(Elements(surf->payload) >= 8);
1619 dw = surf->payload;
1620
1621 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1622 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1623 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
1624
1625 /*
1626 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
1627 *
1628 * "If this field (Surface Array) is enabled, the Surface Type must be
1629 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
1630 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
1631 * SURFTYPE_CUBE, the Depth field must be set to zero."
1632 *
1633 * For non-3D sampler surfaces, resinfo (the sampler message) always
1634 * returns zero for the number of layers when this field is not set.
1635 */
1636 if (surface_type != BRW_SURFACE_3D) {
1637 if (util_resource_is_array_texture(&tex->base))
1638 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1639 else
1640 assert(depth == 1);
1641 }
1642
1643 if (tex->valign_4)
1644 dw[0] |= GEN7_SURFACE_VALIGN_4;
1645
1646 if (tex->halign_8)
1647 dw[0] |= GEN7_SURFACE_HALIGN_8;
1648
1649 if (tex->array_spacing_full)
1650 dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
1651 else
1652 dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
1653
1654 if (render_cache_rw)
1655 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1656
1657 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1658 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1659
1660 dw[1] = layer_offset;
1661
1662 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1663 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1664
1665 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1666 (pitch - 1);
1667
1668 dw[4] = first_layer << 18 |
1669 (num_layers - 1) << 7;
1670
1671 /*
1672 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1673 * means the samples are interleaved. The layouts are the same when the
1674 * number of samples is 1.
1675 */
1676 if (tex->interleaved && tex->base.nr_samples > 1) {
1677 assert(!is_rt);
1678 dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
1679 }
1680 else {
1681 dw[4] |= GEN7_SURFACE_MSFMT_MSS;
1682 }
1683
1684 if (tex->base.nr_samples > 4)
1685 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1686 else if (tex->base.nr_samples > 2)
1687 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1688 else
1689 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1690
1691 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1692 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1693 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1694 lod;
1695
1696 dw[6] = 0;
1697 dw[7] = 0;
1698
1699 /* do not increment reference count */
1700 surf->bo = tex->bo;
1701 }
1702
1703 static int
1704 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1705 enum ilo_gpe_gen7_command cmd,
1706 int arg)
1707 {
1708 static const struct {
1709 int header;
1710 int body;
1711 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1712 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1713 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1714 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1715 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1716 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1717 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1718 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1719 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1720 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1721 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1722 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1723 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1724 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1725 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1726 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1727 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1728 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1729 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1730 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1731 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1732 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1733 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1734 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1735 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1736 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1737 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1738 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1739 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1740 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1741 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1742 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1743 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1744 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1745 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1746 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1747 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1748 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1749 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1750 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1751 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1752 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1753 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1754 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1755 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1756 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1757 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1758 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1759 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1760 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1761 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1762 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1763 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1764 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1765 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1766 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1767 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1768 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1769 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1770 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1771 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1772 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1773 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1774 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1775 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1776 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1777 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1778 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1779 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1780 };
1781 const int header = gen7_command_size_table[cmd].header;
1782 const int body = gen7_command_size_table[cmd].body;
1783 const int count = arg;
1784
1785 ILO_GPE_VALID_GEN(dev, 7, 7);
1786 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1787
1788 return (likely(count)) ? header + body * count : 0;
1789 }
1790
1791 static int
1792 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1793 enum ilo_gpe_gen7_state state,
1794 int arg)
1795 {
1796 static const struct {
1797 int alignment;
1798 int body;
1799 bool is_array;
1800 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1801 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1802 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1803 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1804 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1805 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1806 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1807 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1808 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1809 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1810 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1811 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1812 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1813 };
1814 const int alignment = gen7_state_size_table[state].alignment;
1815 const int body = gen7_state_size_table[state].body;
1816 const bool is_array = gen7_state_size_table[state].is_array;
1817 const int count = arg;
1818 int estimate;
1819
1820 ILO_GPE_VALID_GEN(dev, 7, 7);
1821 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1822
1823 if (likely(count)) {
1824 if (is_array) {
1825 estimate = (alignment - 1) + body * count;
1826 }
1827 else {
1828 estimate = (alignment - 1) + body;
1829 /* all states are aligned */
1830 if (count > 1)
1831 estimate += util_align_npot(body, alignment) * (count - 1);
1832 }
1833 }
1834 else {
1835 estimate = 0;
1836 }
1837
1838 return estimate;
1839 }
1840
1841 static void
1842 gen7_init(struct ilo_gpe_gen7 *gen7)
1843 {
1844 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1845
1846 gen7->estimate_command_size = gen7_estimate_command_size;
1847 gen7->estimate_state_size = gen7_estimate_state_size;
1848
1849 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1850 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1851 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1852 GEN7_USE(gen7, STATE_SIP, gen6);
1853 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1854 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1855 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1856 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1857 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1858 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1859 GEN7_SET(gen7, GPGPU_WALKER);
1860 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1861 GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
1862 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1863 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1864 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1865 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1866 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1867 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1868 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1869 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1870 GEN7_SET(gen7, 3DSTATE_GS);
1871 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1872 GEN7_SET(gen7, 3DSTATE_SF);
1873 GEN7_SET(gen7, 3DSTATE_WM);
1874 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1875 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1876 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1877 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1878 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1879 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1880 GEN7_SET(gen7, 3DSTATE_HS);
1881 GEN7_SET(gen7, 3DSTATE_TE);
1882 GEN7_SET(gen7, 3DSTATE_DS);
1883 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1884 GEN7_SET(gen7, 3DSTATE_SBE);
1885 GEN7_SET(gen7, 3DSTATE_PS);
1886 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1887 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1888 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1889 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1890 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1891 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1892 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1893 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1894 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1895 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1896 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1897 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1898 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1899 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1900 GEN7_SET(gen7, 3DSTATE_URB_VS);
1901 GEN7_SET(gen7, 3DSTATE_URB_HS);
1902 GEN7_SET(gen7, 3DSTATE_URB_DS);
1903 GEN7_SET(gen7, 3DSTATE_URB_GS);
1904 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1905 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1906 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1907 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1908 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1909 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1910 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1911 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1912 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1913 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1914 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1915 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1916 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1917 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1918 GEN7_SET(gen7, 3DPRIMITIVE);
1919 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1920 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1921 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1922 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1923 GEN7_USE(gen7, BLEND_STATE, gen6);
1924 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1925 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1926 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1927 GEN7_USE(gen7, SURFACE_STATE, gen6);
1928 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1929 GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
1930 GEN7_USE(gen7, push_constant_buffer, gen6);
1931 #undef GEN7_USE
1932 #undef GEN7_SET
1933 }
1934
1935 static struct ilo_gpe_gen7 gen7_gpe;
1936
1937 const struct ilo_gpe_gen7 *
1938 ilo_gpe_gen7_get(void)
1939 {
1940 if (!gen7_gpe.estimate_command_size)
1941 gen7_init(&gen7_gpe);
1942
1943 return &gen7_gpe;
1944 }