2a590be2ddcf01fa25ab008bf7aa944ebdbf5832
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_resource.h"
29 #include "brw_defines.h"
30 #include "intel_reg.h"
31
32 #include "ilo_cp.h"
33 #include "ilo_format.h"
34 #include "ilo_resource.h"
35 #include "ilo_shader.h"
36 #include "ilo_gpe_gen7.h"
37
38 static void
39 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
40 struct ilo_cp *cp)
41 {
42 assert(!"GPGPU_WALKER unsupported");
43 }
44
45 static void
46 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
47 uint32_t clear_val,
48 struct ilo_cp *cp)
49 {
50 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
51 const uint8_t cmd_len = 3;
52
53 ILO_GPE_VALID_GEN(dev, 7, 7);
54
55 ilo_cp_begin(cp, cmd_len);
56 ilo_cp_write(cp, cmd | (cmd_len - 2));
57 ilo_cp_write(cp, clear_val);
58 ilo_cp_write(cp, 1);
59 ilo_cp_end(cp);
60 }
61
62 static void
63 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
64 int subop, uint32_t pointer,
65 struct ilo_cp *cp)
66 {
67 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
68 const uint8_t cmd_len = 2;
69
70 ILO_GPE_VALID_GEN(dev, 7, 7);
71
72 ilo_cp_begin(cp, cmd_len);
73 ilo_cp_write(cp, cmd | (cmd_len - 2));
74 ilo_cp_write(cp, pointer);
75 ilo_cp_end(cp);
76 }
77
78 static void
79 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
80 uint32_t color_calc_state,
81 struct ilo_cp *cp)
82 {
83 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
84 }
85
86 void
87 ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev,
88 const struct ilo_shader_state *gs,
89 struct ilo_shader_cso *cso)
90 {
91 int start_grf, vue_read_len, max_threads;
92 uint32_t dw2, dw4, dw5;
93
94 ILO_GPE_VALID_GEN(dev, 7, 7);
95
96 start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
97 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
98
99 /* in pairs */
100 vue_read_len = (vue_read_len + 1) / 2;
101
102 switch (dev->gen) {
103 case ILO_GEN(7):
104 max_threads = (dev->gt == 2) ? 128 : 36;
105 break;
106 default:
107 max_threads = 1;
108 break;
109 }
110
111 dw2 = (true) ? 0 : GEN6_GS_FLOATING_POINT_MODE_ALT;
112
113 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
114 GEN7_GS_INCLUDE_VERTEX_HANDLES |
115 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
116 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
117
118 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
119 GEN6_GS_STATISTICS_ENABLE |
120 GEN6_GS_ENABLE;
121
122 STATIC_ASSERT(Elements(cso->payload) >= 3);
123 cso->payload[0] = dw2;
124 cso->payload[1] = dw4;
125 cso->payload[2] = dw5;
126 }
127
128 static void
129 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
130 const struct ilo_shader_state *gs,
131 int num_samplers,
132 struct ilo_cp *cp)
133 {
134 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
135 const uint8_t cmd_len = 7;
136 const struct ilo_shader_cso *cso;
137 uint32_t dw2, dw4, dw5;
138
139 ILO_GPE_VALID_GEN(dev, 7, 7);
140
141 if (!gs) {
142 ilo_cp_begin(cp, cmd_len);
143 ilo_cp_write(cp, cmd | (cmd_len - 2));
144 ilo_cp_write(cp, 0);
145 ilo_cp_write(cp, 0);
146 ilo_cp_write(cp, 0);
147 ilo_cp_write(cp, 0);
148 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
149 ilo_cp_write(cp, 0);
150 ilo_cp_end(cp);
151 return;
152 }
153
154 cso = ilo_shader_get_kernel_cso(gs);
155 dw2 = cso->payload[0];
156 dw4 = cso->payload[1];
157 dw5 = cso->payload[2];
158
159 dw2 |= ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
160
161 ilo_cp_begin(cp, cmd_len);
162 ilo_cp_write(cp, cmd | (cmd_len - 2));
163 ilo_cp_write(cp, ilo_shader_get_kernel_offset(gs));
164 ilo_cp_write(cp, dw2);
165 ilo_cp_write(cp, 0); /* scratch */
166 ilo_cp_write(cp, dw4);
167 ilo_cp_write(cp, dw5);
168 ilo_cp_write(cp, 0);
169 ilo_cp_end(cp);
170 }
171
172 static void
173 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
174 const struct ilo_rasterizer_state *rasterizer,
175 const struct pipe_surface *zs_surf,
176 struct ilo_cp *cp)
177 {
178 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
179 const uint8_t cmd_len = 7;
180 const int num_samples = 1;
181 uint32_t payload[6];
182
183 ILO_GPE_VALID_GEN(dev, 7, 7);
184
185 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
186 rasterizer, num_samples,
187 (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
188 payload, Elements(payload));
189
190 ilo_cp_begin(cp, cmd_len);
191 ilo_cp_write(cp, cmd | (cmd_len - 2));
192 ilo_cp_write_multi(cp, payload, 6);
193 ilo_cp_end(cp);
194 }
195
196 void
197 ilo_gpe_init_rasterizer_wm_gen7(const struct ilo_dev_info *dev,
198 const struct pipe_rasterizer_state *state,
199 struct ilo_rasterizer_wm *wm)
200 {
201 uint32_t dw1, dw2;
202
203 ILO_GPE_VALID_GEN(dev, 7, 7);
204
205 dw1 = GEN7_WM_POSITION_ZW_PIXEL |
206 GEN7_WM_LINE_AA_WIDTH_2_0 |
207 GEN7_WM_MSRAST_OFF_PIXEL;
208
209 /* same value as in 3DSTATE_SF */
210 if (state->line_smooth)
211 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
212
213 if (state->poly_stipple_enable)
214 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
215 if (state->line_stipple_enable)
216 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
217
218 if (state->bottom_edge_rule)
219 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
220
221 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
222
223 /*
224 * assertion that makes sure
225 *
226 * dw1 |= wm->dw_msaa_rast;
227 * dw2 |= wm->dw_msaa_disp;
228 *
229 * is valid
230 */
231 STATIC_ASSERT(GEN7_WM_MSRAST_OFF_PIXEL == 0 &&
232 GEN7_WM_MSDISPMODE_PERSAMPLE == 0);
233
234 wm->dw_msaa_rast =
235 (state->multisample) ? GEN7_WM_MSRAST_ON_PATTERN : 0;
236 wm->dw_msaa_disp = GEN7_WM_MSDISPMODE_PERPIXEL;
237
238 STATIC_ASSERT(Elements(wm->payload) >= 2);
239 wm->payload[0] = dw1;
240 wm->payload[1] = dw2;
241 }
242
243 void
244 ilo_gpe_init_fs_cso_gen7(const struct ilo_dev_info *dev,
245 const struct ilo_shader_state *fs,
246 struct ilo_shader_cso *cso)
247 {
248 int start_grf, max_threads;
249 uint32_t dw2, dw4, dw5;
250 uint32_t wm_interps, wm_dw1;
251
252 ILO_GPE_VALID_GEN(dev, 7, 7);
253
254 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
255 /* see brwCreateContext() */
256 max_threads = (dev->gt == 2) ? 172 : 48;
257
258 dw2 = (true) ? 0 : GEN7_PS_FLOATING_POINT_MODE_ALT;
259
260 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
261 GEN7_PS_POSOFFSET_NONE;
262
263 if (false)
264 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
265
266 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
267 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
268
269 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
270 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
271
272 dw5 = start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
273 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
274 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
275
276 /* FS affects 3DSTATE_WM too */
277 wm_dw1 = 0;
278
279 /*
280 * TODO set this bit only when
281 *
282 * a) fs writes colors and color is not masked, or
283 * b) fs writes depth, or
284 * c) fs or cc kills
285 */
286 wm_dw1 |= GEN7_WM_DISPATCH_ENABLE;
287
288 /*
289 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
290 *
291 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
292 * the PS kernel or color calculator has the ability to kill
293 * (discard) pixels or samples, other than due to depth or stencil
294 * testing. This bit is required to be ENABLED in the following
295 * situations:
296 *
297 * - The API pixel shader program contains "killpix" or "discard"
298 * instructions, or other code in the pixel shader kernel that
299 * can cause the final pixel mask to differ from the pixel mask
300 * received on dispatch.
301 *
302 * - A sampler with chroma key enabled with kill pixel mode is used
303 * by the pixel shader.
304 *
305 * - Any render target has Alpha Test Enable or AlphaToCoverage
306 * Enable enabled.
307 *
308 * - The pixel shader kernel generates and outputs oMask.
309 *
310 * Note: As ClipDistance clipping is fully supported in hardware
311 * and therefore not via PS instructions, there should be no need
312 * to ENABLE this bit due to ClipDistance clipping."
313 */
314 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
315 wm_dw1 |= GEN7_WM_KILL_ENABLE;
316
317 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
318 wm_dw1 |= GEN7_WM_PSCDEPTH_ON;
319
320 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
321 wm_dw1 |= GEN7_WM_USES_SOURCE_DEPTH;
322
323 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
324 wm_dw1 |= GEN7_WM_USES_SOURCE_W;
325
326 wm_interps = ilo_shader_get_kernel_param(fs,
327 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
328
329 wm_dw1 |= wm_interps << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
330
331 STATIC_ASSERT(Elements(cso->payload) >= 4);
332 cso->payload[0] = dw2;
333 cso->payload[1] = dw4;
334 cso->payload[2] = dw5;
335 cso->payload[3] = wm_dw1;
336 }
337
338 static void
339 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
340 const struct ilo_shader_state *fs,
341 const struct ilo_rasterizer_state *rasterizer,
342 bool cc_may_kill,
343 struct ilo_cp *cp)
344 {
345 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
346 const uint8_t cmd_len = 3;
347 const int num_samples = 1;
348 uint32_t dw1, dw2;
349
350 ILO_GPE_VALID_GEN(dev, 7, 7);
351
352 /* see ilo_gpe_init_rasterizer_wm() */
353 dw1 = rasterizer->wm.payload[0];
354 dw2 = rasterizer->wm.payload[1];
355
356 dw1 |= GEN7_WM_STATISTICS_ENABLE;
357
358 if (false) {
359 dw1 |= GEN7_WM_DEPTH_CLEAR;
360 dw1 |= GEN7_WM_DEPTH_RESOLVE;
361 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
362 }
363
364 if (fs) {
365 const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs);
366
367 dw1 |= fs_cso->payload[3];
368 }
369
370 if (cc_may_kill) {
371 dw1 |= GEN7_WM_DISPATCH_ENABLE |
372 GEN7_WM_KILL_ENABLE;
373 }
374
375 if (num_samples > 1) {
376 dw1 |= rasterizer->wm.dw_msaa_rast;
377 dw2 |= rasterizer->wm.dw_msaa_disp;
378 }
379
380 ilo_cp_begin(cp, cmd_len);
381 ilo_cp_write(cp, cmd | (cmd_len - 2));
382 ilo_cp_write(cp, dw1);
383 ilo_cp_write(cp, dw2);
384 ilo_cp_end(cp);
385 }
386
387 static void
388 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
389 int subop,
390 const uint32_t *bufs, const int *sizes,
391 int num_bufs,
392 struct ilo_cp *cp)
393 {
394 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
395 const uint8_t cmd_len = 7;
396 uint32_t dw[6];
397 int total_read_length, i;
398
399 ILO_GPE_VALID_GEN(dev, 7, 7);
400
401 /* VS, HS, DS, GS, and PS variants */
402 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
403
404 assert(num_bufs <= 4);
405
406 dw[0] = 0;
407 dw[1] = 0;
408
409 total_read_length = 0;
410 for (i = 0; i < 4; i++) {
411 int read_len;
412
413 /*
414 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
415 *
416 * "Constant buffers must be enabled in order from Constant Buffer 0
417 * to Constant Buffer 3 within this command. For example, it is
418 * not allowed to enable Constant Buffer 1 by programming a
419 * non-zero value in the VS Constant Buffer 1 Read Length without a
420 * non-zero value in VS Constant Buffer 0 Read Length."
421 */
422 if (i >= num_bufs || !sizes[i]) {
423 for (; i < 4; i++) {
424 assert(i >= num_bufs || !sizes[i]);
425 dw[2 + i] = 0;
426 }
427 break;
428 }
429
430 /* read lengths are in 256-bit units */
431 read_len = (sizes[i] + 31) / 32;
432 /* the lower 5 bits are used for memory object control state */
433 assert(bufs[i] % 32 == 0);
434
435 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
436 dw[2 + i] = bufs[i];
437
438 total_read_length += read_len;
439 }
440
441 /*
442 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
443 *
444 * "The sum of all four read length fields must be less than or equal
445 * to the size of 64"
446 */
447 assert(total_read_length <= 64);
448
449 ilo_cp_begin(cp, cmd_len);
450 ilo_cp_write(cp, cmd | (cmd_len - 2));
451 ilo_cp_write_multi(cp, dw, 6);
452 ilo_cp_end(cp);
453 }
454
455 static void
456 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
457 const uint32_t *bufs, const int *sizes,
458 int num_bufs,
459 struct ilo_cp *cp)
460 {
461 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
462 }
463
464 static void
465 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
466 const uint32_t *bufs, const int *sizes,
467 int num_bufs,
468 struct ilo_cp *cp)
469 {
470 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
471 }
472
473 static void
474 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
475 const uint32_t *bufs, const int *sizes,
476 int num_bufs,
477 struct ilo_cp *cp)
478 {
479 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
480 }
481
482 static void
483 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
484 unsigned sample_mask,
485 int num_samples,
486 struct ilo_cp *cp)
487 {
488 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
489 const uint8_t cmd_len = 2;
490 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
491
492 ILO_GPE_VALID_GEN(dev, 7, 7);
493
494 /*
495 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
496 *
497 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
498 * (Sample Mask) must be zero.
499 *
500 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
501 * must be zero."
502 */
503 sample_mask &= valid_mask;
504
505 ilo_cp_begin(cp, cmd_len);
506 ilo_cp_write(cp, cmd | (cmd_len - 2));
507 ilo_cp_write(cp, sample_mask);
508 ilo_cp_end(cp);
509 }
510
511 static void
512 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
513 const uint32_t *bufs, const int *sizes,
514 int num_bufs,
515 struct ilo_cp *cp)
516 {
517 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
518 }
519
520 static void
521 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
522 const uint32_t *bufs, const int *sizes,
523 int num_bufs,
524 struct ilo_cp *cp)
525 {
526 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
527 }
528
529 static void
530 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
531 const struct ilo_shader_state *hs,
532 int num_samplers,
533 struct ilo_cp *cp)
534 {
535 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
536 const uint8_t cmd_len = 7;
537
538 ILO_GPE_VALID_GEN(dev, 7, 7);
539
540 assert(!hs);
541
542 ilo_cp_begin(cp, cmd_len);
543 ilo_cp_write(cp, cmd | (cmd_len - 2));
544 ilo_cp_write(cp, 0);
545 ilo_cp_write(cp, 0);
546 ilo_cp_write(cp, 0);
547 ilo_cp_write(cp, 0);
548 ilo_cp_write(cp, 0);
549 ilo_cp_write(cp, 0);
550 ilo_cp_end(cp);
551 }
552
553 static void
554 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
555 struct ilo_cp *cp)
556 {
557 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
558 const uint8_t cmd_len = 4;
559
560 ILO_GPE_VALID_GEN(dev, 7, 7);
561
562 ilo_cp_begin(cp, cmd_len);
563 ilo_cp_write(cp, cmd | (cmd_len - 2));
564 ilo_cp_write(cp, 0);
565 ilo_cp_write(cp, 0);
566 ilo_cp_write(cp, 0);
567 ilo_cp_end(cp);
568 }
569
570 static void
571 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
572 const struct ilo_shader_state *ds,
573 int num_samplers,
574 struct ilo_cp *cp)
575 {
576 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
577 const uint8_t cmd_len = 6;
578
579 ILO_GPE_VALID_GEN(dev, 7, 7);
580
581 assert(!ds);
582
583 ilo_cp_begin(cp, cmd_len);
584 ilo_cp_write(cp, cmd | (cmd_len - 2));
585 ilo_cp_write(cp, 0);
586 ilo_cp_write(cp, 0);
587 ilo_cp_write(cp, 0);
588 ilo_cp_write(cp, 0);
589 ilo_cp_write(cp, 0);
590 ilo_cp_end(cp);
591
592 }
593
594 static void
595 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
596 unsigned buffer_mask,
597 int vertex_attrib_count,
598 bool rasterizer_discard,
599 struct ilo_cp *cp)
600 {
601 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
602 const uint8_t cmd_len = 3;
603 const bool enable = (buffer_mask != 0);
604 uint32_t dw1, dw2;
605 int read_len;
606
607 ILO_GPE_VALID_GEN(dev, 7, 7);
608
609 if (!enable) {
610 dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
611 if (rasterizer_discard)
612 dw1 |= SO_RENDERING_DISABLE;
613
614 dw2 = 0;
615
616 ilo_cp_begin(cp, cmd_len);
617 ilo_cp_write(cp, cmd | (cmd_len - 2));
618 ilo_cp_write(cp, dw1);
619 ilo_cp_write(cp, dw2);
620 ilo_cp_end(cp);
621 return;
622 }
623
624 read_len = (vertex_attrib_count + 1) / 2;
625 if (!read_len)
626 read_len = 1;
627
628 dw1 = SO_FUNCTION_ENABLE |
629 0 << SO_RENDER_STREAM_SELECT_SHIFT |
630 SO_STATISTICS_ENABLE |
631 buffer_mask << 8;
632
633 if (rasterizer_discard)
634 dw1 |= SO_RENDERING_DISABLE;
635
636 /* API_OPENGL */
637 if (true)
638 dw1 |= SO_REORDER_TRAILING;
639
640 dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
641 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
642 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
643 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
644 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
645 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
646 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
647 (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
648
649 ilo_cp_begin(cp, cmd_len);
650 ilo_cp_write(cp, cmd | (cmd_len - 2));
651 ilo_cp_write(cp, dw1);
652 ilo_cp_write(cp, dw2);
653 ilo_cp_end(cp);
654 }
655
656 static void
657 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
658 const struct ilo_rasterizer_state *rasterizer,
659 const struct ilo_shader_state *fs,
660 const struct ilo_shader_state *last_sh,
661 struct ilo_cp *cp)
662 {
663 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
664 const uint8_t cmd_len = 14;
665 uint32_t dw[13];
666
667 ILO_GPE_VALID_GEN(dev, 7, 7);
668
669 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
670 fs, last_sh, dw, Elements(dw));
671
672 ilo_cp_begin(cp, cmd_len);
673 ilo_cp_write(cp, cmd | (cmd_len - 2));
674 ilo_cp_write_multi(cp, dw, 13);
675 ilo_cp_end(cp);
676 }
677
678 static void
679 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
680 const struct ilo_shader_state *fs,
681 int num_samplers, bool dual_blend,
682 struct ilo_cp *cp)
683 {
684 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
685 const uint8_t cmd_len = 8;
686 const struct ilo_shader_cso *cso;
687 uint32_t dw2, dw4, dw5;
688
689 ILO_GPE_VALID_GEN(dev, 7, 7);
690
691 if (!fs) {
692 /* see brwCreateContext() */
693 const int max_threads = (dev->gt == 2) ? 172 : 48;
694
695 ilo_cp_begin(cp, cmd_len);
696 ilo_cp_write(cp, cmd | (cmd_len - 2));
697 ilo_cp_write(cp, 0);
698 ilo_cp_write(cp, 0);
699 ilo_cp_write(cp, 0);
700 /* GPU hangs if none of the dispatch enable bits is set */
701 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
702 GEN7_PS_8_DISPATCH_ENABLE);
703 ilo_cp_write(cp, 0);
704 ilo_cp_write(cp, 0);
705 ilo_cp_write(cp, 0);
706 ilo_cp_end(cp);
707
708 return;
709 }
710
711 cso = ilo_shader_get_kernel_cso(fs);
712 dw2 = cso->payload[0];
713 dw4 = cso->payload[1];
714 dw5 = cso->payload[2];
715
716 dw2 |= (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT;
717
718 if (dual_blend)
719 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
720
721 ilo_cp_begin(cp, cmd_len);
722 ilo_cp_write(cp, cmd | (cmd_len - 2));
723 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
724 ilo_cp_write(cp, dw2);
725 ilo_cp_write(cp, 0); /* scratch */
726 ilo_cp_write(cp, dw4);
727 ilo_cp_write(cp, dw5);
728 ilo_cp_write(cp, 0); /* kernel 1 */
729 ilo_cp_write(cp, 0); /* kernel 2 */
730 ilo_cp_end(cp);
731 }
732
733 static void
734 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
735 uint32_t sf_clip_viewport,
736 struct ilo_cp *cp)
737 {
738 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
739 }
740
741 static void
742 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
743 uint32_t cc_viewport,
744 struct ilo_cp *cp)
745 {
746 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
747 }
748
749 static void
750 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
751 uint32_t blend_state,
752 struct ilo_cp *cp)
753 {
754 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
755 }
756
757 static void
758 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
759 uint32_t depth_stencil_state,
760 struct ilo_cp *cp)
761 {
762 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
763 }
764
765 static void
766 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
767 uint32_t binding_table,
768 struct ilo_cp *cp)
769 {
770 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
771 }
772
773 static void
774 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
775 uint32_t binding_table,
776 struct ilo_cp *cp)
777 {
778 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
779 }
780
781 static void
782 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
783 uint32_t binding_table,
784 struct ilo_cp *cp)
785 {
786 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
787 }
788
789 static void
790 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
791 uint32_t binding_table,
792 struct ilo_cp *cp)
793 {
794 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
795 }
796
797 static void
798 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
799 uint32_t binding_table,
800 struct ilo_cp *cp)
801 {
802 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
803 }
804
805 static void
806 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
807 uint32_t sampler_state,
808 struct ilo_cp *cp)
809 {
810 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
811 }
812
813 static void
814 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
815 uint32_t sampler_state,
816 struct ilo_cp *cp)
817 {
818 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
819 }
820
821 static void
822 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
823 uint32_t sampler_state,
824 struct ilo_cp *cp)
825 {
826 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
827 }
828
829 static void
830 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
831 uint32_t sampler_state,
832 struct ilo_cp *cp)
833 {
834 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
835 }
836
837 static void
838 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
839 uint32_t sampler_state,
840 struct ilo_cp *cp)
841 {
842 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
843 }
844
845 static void
846 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
847 int subop, int offset, int size,
848 int entry_size,
849 struct ilo_cp *cp)
850 {
851 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
852 const uint8_t cmd_len = 2;
853 const int row_size = 64; /* 512 bits */
854 int alloc_size, num_entries, min_entries, max_entries;
855
856 ILO_GPE_VALID_GEN(dev, 7, 7);
857
858 /* VS, HS, DS, and GS variants */
859 assert(subop >= 0x30 && subop <= 0x33);
860
861 /* in multiples of 8KB */
862 assert(offset % 8192 == 0);
863 offset /= 8192;
864
865 /* in multiple of 512-bit rows */
866 alloc_size = (entry_size + row_size - 1) / row_size;
867 if (!alloc_size)
868 alloc_size = 1;
869
870 /*
871 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
872 *
873 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
874 * cause performance to decrease due to banking in the URB. Element
875 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
876 */
877 if (subop == 0x30 && alloc_size == 5)
878 alloc_size = 6;
879
880 /* in multiples of 8 */
881 num_entries = (size / row_size / alloc_size) & ~7;
882
883 switch (subop) {
884 case 0x30: /* 3DSTATE_URB_VS */
885 min_entries = 32;
886 max_entries = (dev->gt == 2) ? 704 : 512;
887
888 assert(num_entries >= min_entries);
889 if (num_entries > max_entries)
890 num_entries = max_entries;
891 break;
892 case 0x31: /* 3DSTATE_URB_HS */
893 max_entries = (dev->gt == 2) ? 64 : 32;
894 if (num_entries > max_entries)
895 num_entries = max_entries;
896 break;
897 case 0x32: /* 3DSTATE_URB_DS */
898 if (num_entries)
899 assert(num_entries >= 138);
900 break;
901 case 0x33: /* 3DSTATE_URB_GS */
902 max_entries = (dev->gt == 2) ? 320 : 192;
903 if (num_entries > max_entries)
904 num_entries = max_entries;
905 break;
906 default:
907 break;
908 }
909
910 ilo_cp_begin(cp, cmd_len);
911 ilo_cp_write(cp, cmd | (cmd_len - 2));
912 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
913 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
914 num_entries);
915 ilo_cp_end(cp);
916 }
917
918 static void
919 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
920 int offset, int size, int entry_size,
921 struct ilo_cp *cp)
922 {
923 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
924 }
925
926 static void
927 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
928 int offset, int size, int entry_size,
929 struct ilo_cp *cp)
930 {
931 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
932 }
933
934 static void
935 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
936 int offset, int size, int entry_size,
937 struct ilo_cp *cp)
938 {
939 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
940 }
941
942 static void
943 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
944 int offset, int size, int entry_size,
945 struct ilo_cp *cp)
946 {
947 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
948 }
949
950 static void
951 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
952 int subop, int offset, int size,
953 struct ilo_cp *cp)
954 {
955 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
956 const uint8_t cmd_len = 2;
957 int end;
958
959 ILO_GPE_VALID_GEN(dev, 7, 7);
960
961 /* VS, HS, DS, GS, and PS variants */
962 assert(subop >= 0x12 && subop <= 0x16);
963
964 /*
965 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
966 *
967 * "(A table that says the maximum size of each constant buffer is
968 * 16KB")
969 *
970 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
971 *
972 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
973 * may not exceed the maximum value of the Constant Buffer Size."
974 *
975 * Thus, the valid range of buffer end is [0KB, 16KB].
976 */
977 end = (offset + size) / 1024;
978 if (end > 16) {
979 assert(!"invalid constant buffer end");
980 end = 16;
981 }
982
983 /* the valid range of buffer offset is [0KB, 15KB] */
984 offset = (offset + 1023) / 1024;
985 if (offset > 15) {
986 assert(!"invalid constant buffer offset");
987 offset = 15;
988 }
989
990 if (offset > end) {
991 assert(!size);
992 offset = end;
993 }
994
995 /* the valid range of buffer size is [0KB, 15KB] */
996 size = end - offset;
997 if (size > 15) {
998 assert(!"invalid constant buffer size");
999 size = 15;
1000 }
1001
1002 ilo_cp_begin(cp, cmd_len);
1003 ilo_cp_write(cp, cmd | (cmd_len - 2));
1004 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
1005 size);
1006 ilo_cp_end(cp);
1007 }
1008
1009 static void
1010 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
1011 int offset, int size,
1012 struct ilo_cp *cp)
1013 {
1014 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
1015 }
1016
1017 static void
1018 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
1019 int offset, int size,
1020 struct ilo_cp *cp)
1021 {
1022 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
1023 }
1024
1025 static void
1026 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
1027 int offset, int size,
1028 struct ilo_cp *cp)
1029 {
1030 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
1031 }
1032
1033 static void
1034 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
1035 int offset, int size,
1036 struct ilo_cp *cp)
1037 {
1038 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
1039 }
1040
1041 static void
1042 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
1043 int offset, int size,
1044 struct ilo_cp *cp)
1045 {
1046 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
1047 }
1048
1049 static void
1050 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
1051 const struct pipe_stream_output_info *so_info,
1052 struct ilo_cp *cp)
1053 {
1054 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
1055 uint16_t cmd_len;
1056 int buffer_selects, num_entries, i;
1057 uint16_t so_decls[128];
1058
1059 ILO_GPE_VALID_GEN(dev, 7, 7);
1060
1061 buffer_selects = 0;
1062 num_entries = 0;
1063
1064 if (so_info) {
1065 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
1066
1067 memset(buffer_offsets, 0, sizeof(buffer_offsets));
1068
1069 for (i = 0; i < so_info->num_outputs; i++) {
1070 unsigned decl, buf, reg, mask;
1071
1072 buf = so_info->output[i].output_buffer;
1073
1074 /* pad with holes */
1075 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
1076 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
1077 int num_dwords;
1078
1079 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
1080 if (num_dwords > 4)
1081 num_dwords = 4;
1082
1083 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1084 SO_DECL_HOLE_FLAG |
1085 ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
1086
1087 so_decls[num_entries++] = decl;
1088 buffer_offsets[buf] += num_dwords;
1089 }
1090
1091 reg = so_info->output[i].register_index;
1092 mask = ((1 << so_info->output[i].num_components) - 1) <<
1093 so_info->output[i].start_component;
1094
1095 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1096 reg << SO_DECL_REGISTER_INDEX_SHIFT |
1097 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1098
1099 so_decls[num_entries++] = decl;
1100 buffer_selects |= 1 << buf;
1101 buffer_offsets[buf] += so_info->output[i].num_components;
1102 }
1103 }
1104
1105 /*
1106 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1107 *
1108 * "Errata: All 128 decls for all four streams must be included
1109 * whenever this command is issued. The "Num Entries [n]" fields still
1110 * contain the actual numbers of valid decls."
1111 *
1112 * Also note that "DWord Length" has 9 bits for this command, and the type
1113 * of cmd_len is thus uint16_t.
1114 */
1115 cmd_len = 2 * 128 + 3;
1116
1117 ilo_cp_begin(cp, cmd_len);
1118 ilo_cp_write(cp, cmd | (cmd_len - 2));
1119 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
1120 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1121 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1122 buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
1123 ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
1124 0 << SO_NUM_ENTRIES_2_SHIFT |
1125 0 << SO_NUM_ENTRIES_1_SHIFT |
1126 num_entries << SO_NUM_ENTRIES_0_SHIFT);
1127
1128 for (i = 0; i < num_entries; i++) {
1129 ilo_cp_write(cp, so_decls[i]);
1130 ilo_cp_write(cp, 0);
1131 }
1132 for (; i < 128; i++) {
1133 ilo_cp_write(cp, 0);
1134 ilo_cp_write(cp, 0);
1135 }
1136
1137 ilo_cp_end(cp);
1138 }
1139
1140 static void
1141 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1142 int index, int base, int stride,
1143 const struct pipe_stream_output_target *so_target,
1144 struct ilo_cp *cp)
1145 {
1146 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1147 const uint8_t cmd_len = 4;
1148 struct ilo_buffer *buf;
1149 int end;
1150
1151 ILO_GPE_VALID_GEN(dev, 7, 7);
1152
1153 if (!so_target || !so_target->buffer) {
1154 ilo_cp_begin(cp, cmd_len);
1155 ilo_cp_write(cp, cmd | (cmd_len - 2));
1156 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1157 ilo_cp_write(cp, 0);
1158 ilo_cp_write(cp, 0);
1159 ilo_cp_end(cp);
1160 return;
1161 }
1162
1163 buf = ilo_buffer(so_target->buffer);
1164
1165 /* DWord-aligned */
1166 assert(stride % 4 == 0 && base % 4 == 0);
1167 assert(so_target->buffer_offset % 4 == 0);
1168
1169 stride &= ~3;
1170 base = (base + so_target->buffer_offset) & ~3;
1171 end = (base + so_target->buffer_size) & ~3;
1172
1173 ilo_cp_begin(cp, cmd_len);
1174 ilo_cp_write(cp, cmd | (cmd_len - 2));
1175 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
1176 stride);
1177 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1178 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1179 ilo_cp_end(cp);
1180 }
1181
1182 static void
1183 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1184 const struct pipe_draw_info *info,
1185 const struct ilo_ib_state *ib,
1186 bool rectlist,
1187 struct ilo_cp *cp)
1188 {
1189 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1190 const uint8_t cmd_len = 7;
1191 const int prim = (rectlist) ?
1192 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1193 const int vb_access = (info->indexed) ?
1194 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1195 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1196 const uint32_t vb_start = info->start +
1197 ((info->indexed) ? ib->draw_start_offset : 0);
1198
1199 ILO_GPE_VALID_GEN(dev, 7, 7);
1200
1201 ilo_cp_begin(cp, cmd_len);
1202 ilo_cp_write(cp, cmd | (cmd_len - 2));
1203 ilo_cp_write(cp, vb_access | prim);
1204 ilo_cp_write(cp, info->count);
1205 ilo_cp_write(cp, vb_start);
1206 ilo_cp_write(cp, info->instance_count);
1207 ilo_cp_write(cp, info->start_instance);
1208 ilo_cp_write(cp, info->index_bias);
1209 ilo_cp_end(cp);
1210 }
1211
1212 static uint32_t
1213 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1214 const struct ilo_viewport_cso *viewports,
1215 unsigned num_viewports,
1216 struct ilo_cp *cp)
1217 {
1218 const int state_align = 64 / 4;
1219 const int state_len = 16 * num_viewports;
1220 uint32_t state_offset, *dw;
1221 unsigned i;
1222
1223 ILO_GPE_VALID_GEN(dev, 7, 7);
1224
1225 /*
1226 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1227 *
1228 * "The viewport-specific state used by both the SF and CL units
1229 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1230 * of which contains the DWords described below. The start of each
1231 * element is spaced 16 DWords apart. The location of first element of
1232 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1233 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1234 */
1235 assert(num_viewports && num_viewports <= 16);
1236
1237 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1238 state_len, state_align, &state_offset);
1239
1240 for (i = 0; i < num_viewports; i++) {
1241 const struct ilo_viewport_cso *vp = &viewports[i];
1242
1243 dw[0] = fui(vp->m00);
1244 dw[1] = fui(vp->m11);
1245 dw[2] = fui(vp->m22);
1246 dw[3] = fui(vp->m30);
1247 dw[4] = fui(vp->m31);
1248 dw[5] = fui(vp->m32);
1249 dw[6] = 0;
1250 dw[7] = 0;
1251 dw[8] = fui(vp->min_gbx);
1252 dw[9] = fui(vp->max_gbx);
1253 dw[10] = fui(vp->min_gby);
1254 dw[11] = fui(vp->max_gby);
1255 dw[12] = 0;
1256 dw[13] = 0;
1257 dw[14] = 0;
1258 dw[15] = 0;
1259
1260 dw += 16;
1261 }
1262
1263 return state_offset;
1264 }
1265
1266 void
1267 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
1268 unsigned width, unsigned height,
1269 unsigned depth, unsigned level,
1270 struct ilo_view_surface *surf)
1271 {
1272 uint32_t *dw;
1273
1274 ILO_GPE_VALID_GEN(dev, 7, 7);
1275
1276 /*
1277 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1278 *
1279 * "A null surface is used in instances where an actual surface is not
1280 * bound. When a write message is generated to a null surface, no
1281 * actual surface is written to. When a read message (including any
1282 * sampling engine message) is generated to a null surface, the result
1283 * is all zeros. Note that a null surface type is allowed to be used
1284 * with all messages, even if it is not specificially indicated as
1285 * supported. All of the remaining fields in surface state are ignored
1286 * for null surfaces, with the following exceptions:
1287 *
1288 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1289 * must match the depth buffer's corresponding state for all render
1290 * target surfaces, including null.
1291 * * All sampling engine and data port messages support null surfaces
1292 * with the above behavior, even if not mentioned as specifically
1293 * supported, except for the following:
1294 * * Data Port Media Block Read/Write messages.
1295 * * The Surface Type of a surface used as a render target (accessed
1296 * via the Data Port's Render Target Write message) must be the same
1297 * as the Surface Type of all other render targets and of the depth
1298 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1299 * buffer or render targets are SURFTYPE_NULL."
1300 *
1301 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1302 *
1303 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1304 * true"
1305 */
1306
1307 STATIC_ASSERT(Elements(surf->payload) >= 8);
1308 dw = surf->payload;
1309
1310 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1311 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1312 BRW_SURFACE_TILED << 13;
1313
1314 dw[1] = 0;
1315
1316 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1317 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1318
1319 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1320
1321 dw[4] = 0;
1322 dw[5] = level;
1323
1324 dw[6] = 0;
1325 dw[7] = 0;
1326
1327 surf->bo = NULL;
1328 }
1329
1330 void
1331 ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
1332 const struct ilo_buffer *buf,
1333 unsigned offset, unsigned size,
1334 unsigned struct_size,
1335 enum pipe_format elem_format,
1336 bool is_rt, bool render_cache_rw,
1337 struct ilo_view_surface *surf)
1338 {
1339 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1340 const bool structured = (!typed && struct_size > 1);
1341 const int elem_size = (typed) ?
1342 util_format_get_blocksize(elem_format) : 1;
1343 int width, height, depth, pitch;
1344 int surface_type, surface_format, num_entries;
1345 uint32_t *dw;
1346
1347 ILO_GPE_VALID_GEN(dev, 7, 7);
1348
1349 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1350
1351 surface_format = (typed) ?
1352 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1353
1354 num_entries = size / struct_size;
1355 /* see if there is enough space to fit another element */
1356 if (size % struct_size >= elem_size && !structured)
1357 num_entries++;
1358
1359 /*
1360 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1361 *
1362 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1363 * Address) specifies the base address of first element of the
1364 * surface. The surface is interpreted as a simple array of that
1365 * single element type. The address must be naturally-aligned to the
1366 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1367 * must be 16-byte aligned)
1368 *
1369 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1370 * the base address of the first element of the surface, computed in
1371 * software by adding the surface base address to the byte offset of
1372 * the element in the buffer."
1373 */
1374 if (is_rt)
1375 assert(offset % elem_size == 0);
1376
1377 /*
1378 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1379 *
1380 * "For typed buffer and structured buffer surfaces, the number of
1381 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1382 * surfaces, the number of entries in the buffer is the number of
1383 * bytes which can range from 1 to 2^30."
1384 */
1385 assert(num_entries >= 1 &&
1386 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1387
1388 /*
1389 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1390 *
1391 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1392 * 11 if the Surface Format is RAW (the size of the buffer must be a
1393 * multiple of 4 bytes)."
1394 *
1395 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1396 *
1397 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1398 * field (Surface Pitch) indicates the size of the structure."
1399 *
1400 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1401 * must be a multiple of 4 bytes."
1402 */
1403 if (structured)
1404 assert(struct_size % 4 == 0);
1405 else if (!typed)
1406 assert(num_entries % 4 == 0);
1407
1408 pitch = struct_size;
1409
1410 pitch--;
1411 num_entries--;
1412 /* bits [6:0] */
1413 width = (num_entries & 0x0000007f);
1414 /* bits [20:7] */
1415 height = (num_entries & 0x001fff80) >> 7;
1416 /* bits [30:21] */
1417 depth = (num_entries & 0x7fe00000) >> 21;
1418 /* limit to [26:21] */
1419 if (typed || structured)
1420 depth &= 0x3f;
1421
1422 STATIC_ASSERT(Elements(surf->payload) >= 8);
1423 dw = surf->payload;
1424
1425 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1426 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1427 if (render_cache_rw)
1428 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1429
1430 dw[1] = offset;
1431
1432 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1433 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1434
1435 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1436 pitch;
1437
1438 dw[4] = 0;
1439 dw[5] = 0;
1440
1441 dw[6] = 0;
1442 dw[7] = 0;
1443
1444 /* do not increment reference count */
1445 surf->bo = buf->bo;
1446 }
1447
1448 void
1449 ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
1450 const struct ilo_texture *tex,
1451 enum pipe_format format,
1452 unsigned first_level,
1453 unsigned num_levels,
1454 unsigned first_layer,
1455 unsigned num_layers,
1456 bool is_rt, bool render_cache_rw,
1457 struct ilo_view_surface *surf)
1458 {
1459 int surface_type, surface_format;
1460 int width, height, depth, pitch, lod;
1461 unsigned layer_offset, x_offset, y_offset;
1462 uint32_t *dw;
1463
1464 ILO_GPE_VALID_GEN(dev, 7, 7);
1465
1466 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1467 assert(surface_type != BRW_SURFACE_BUFFER);
1468
1469 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1470 format = PIPE_FORMAT_Z32_FLOAT;
1471
1472 if (is_rt)
1473 surface_format = ilo_translate_render_format(format);
1474 else
1475 surface_format = ilo_translate_texture_format(format);
1476 assert(surface_format >= 0);
1477
1478 width = tex->base.width0;
1479 height = tex->base.height0;
1480 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1481 tex->base.depth0 : num_layers;
1482 pitch = tex->bo_stride;
1483
1484 if (surface_type == BRW_SURFACE_CUBE) {
1485 /*
1486 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1487 *
1488 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
1489 * this field is [0,340], indicating the number of cube array
1490 * elements (equal to the number of underlying 2D array elements
1491 * divided by 6). For other surfaces, this field must be zero."
1492 *
1493 * When is_rt is true, we treat the texture as a 2D one to avoid the
1494 * restriction.
1495 */
1496 if (is_rt) {
1497 surface_type = BRW_SURFACE_2D;
1498 }
1499 else {
1500 assert(num_layers % 6 == 0);
1501 depth = num_layers / 6;
1502 }
1503 }
1504
1505 /* sanity check the size */
1506 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1507 assert(first_layer < 2048 && num_layers <= 2048);
1508 switch (surface_type) {
1509 case BRW_SURFACE_1D:
1510 assert(width <= 16384 && height == 1 && depth <= 2048);
1511 break;
1512 case BRW_SURFACE_2D:
1513 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1514 break;
1515 case BRW_SURFACE_3D:
1516 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1517 if (!is_rt)
1518 assert(first_layer == 0);
1519 break;
1520 case BRW_SURFACE_CUBE:
1521 assert(width <= 16384 && height <= 16384 && depth <= 86);
1522 assert(width == height);
1523 if (is_rt)
1524 assert(first_layer == 0);
1525 break;
1526 default:
1527 assert(!"unexpected surface type");
1528 break;
1529 }
1530
1531 if (is_rt) {
1532 /*
1533 * Compute the offset to the layer manually.
1534 *
1535 * For rendering, the hardware requires LOD to be the same for all
1536 * render targets and the depth buffer. We need to compute the offset
1537 * to the layer manually and always set LOD to 0.
1538 */
1539 if (true) {
1540 /* we lose the capability for layered rendering */
1541 assert(num_layers == 1);
1542
1543 layer_offset = ilo_texture_get_slice_offset(tex,
1544 first_level, first_layer, &x_offset, &y_offset);
1545
1546 assert(x_offset % 4 == 0);
1547 assert(y_offset % 2 == 0);
1548 x_offset /= 4;
1549 y_offset /= 2;
1550
1551 /* derive the size for the LOD */
1552 width = u_minify(width, first_level);
1553 height = u_minify(height, first_level);
1554 if (surface_type == BRW_SURFACE_3D)
1555 depth = u_minify(depth, first_level);
1556 else
1557 depth = 1;
1558
1559 first_level = 0;
1560 first_layer = 0;
1561 lod = 0;
1562 }
1563 else {
1564 layer_offset = 0;
1565 x_offset = 0;
1566 y_offset = 0;
1567 }
1568
1569 assert(num_levels == 1);
1570 lod = first_level;
1571 }
1572 else {
1573 layer_offset = 0;
1574 x_offset = 0;
1575 y_offset = 0;
1576
1577 lod = num_levels - 1;
1578 }
1579
1580 /*
1581 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1582 *
1583 * "The Base Address for linear render target surfaces and surfaces
1584 * accessed with the typed surface read/write data port messages must
1585 * be element-size aligned, for non-YUV surface formats, or a multiple
1586 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1587 * have no alignment requirements (byte alignment is sufficient)."
1588 *
1589 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1590 *
1591 * "For linear render target surfaces and surfaces accessed with the
1592 * typed data port messages, the pitch must be a multiple of the
1593 * element size for non-YUV surface formats. Pitch must be a multiple
1594 * of 2 * element size for YUV surface formats. For linear surfaces
1595 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1596 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1597 * of bytes."
1598 *
1599 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1600 *
1601 * "For linear surfaces, this field (X Offset) must be zero."
1602 */
1603 if (tex->tiling == INTEL_TILING_NONE) {
1604 if (is_rt) {
1605 const int elem_size = util_format_get_blocksize(format);
1606 assert(layer_offset % elem_size == 0);
1607 assert(pitch % elem_size == 0);
1608 }
1609
1610 assert(!x_offset);
1611 }
1612
1613 STATIC_ASSERT(Elements(surf->payload) >= 8);
1614 dw = surf->payload;
1615
1616 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1617 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1618 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
1619
1620 /*
1621 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
1622 *
1623 * "If this field (Surface Array) is enabled, the Surface Type must be
1624 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
1625 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
1626 * SURFTYPE_CUBE, the Depth field must be set to zero."
1627 *
1628 * For non-3D sampler surfaces, resinfo (the sampler message) always
1629 * returns zero for the number of layers when this field is not set.
1630 */
1631 if (surface_type != BRW_SURFACE_3D) {
1632 if (util_resource_is_array_texture(&tex->base))
1633 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1634 else
1635 assert(depth == 1);
1636 }
1637
1638 if (tex->valign_4)
1639 dw[0] |= GEN7_SURFACE_VALIGN_4;
1640
1641 if (tex->halign_8)
1642 dw[0] |= GEN7_SURFACE_HALIGN_8;
1643
1644 if (tex->array_spacing_full)
1645 dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
1646 else
1647 dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
1648
1649 if (render_cache_rw)
1650 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1651
1652 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1653 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1654
1655 dw[1] = layer_offset;
1656
1657 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1658 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1659
1660 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1661 (pitch - 1);
1662
1663 dw[4] = first_layer << 18 |
1664 (num_layers - 1) << 7;
1665
1666 /*
1667 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1668 * means the samples are interleaved. The layouts are the same when the
1669 * number of samples is 1.
1670 */
1671 if (tex->interleaved && tex->base.nr_samples > 1) {
1672 assert(!is_rt);
1673 dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
1674 }
1675 else {
1676 dw[4] |= GEN7_SURFACE_MSFMT_MSS;
1677 }
1678
1679 if (tex->base.nr_samples > 4)
1680 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1681 else if (tex->base.nr_samples > 2)
1682 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1683 else
1684 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1685
1686 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1687 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1688 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1689 lod;
1690
1691 dw[6] = 0;
1692 dw[7] = 0;
1693
1694 /* do not increment reference count */
1695 surf->bo = tex->bo;
1696 }
1697
1698 static int
1699 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1700 enum ilo_gpe_gen7_command cmd,
1701 int arg)
1702 {
1703 static const struct {
1704 int header;
1705 int body;
1706 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1707 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1708 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1709 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1710 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1711 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1712 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1713 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1714 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1715 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1716 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1717 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1718 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1719 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1720 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1721 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1722 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1723 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1724 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1725 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1726 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1727 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1728 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1729 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1730 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1731 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1732 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1733 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1734 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1735 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1736 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1737 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1738 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1739 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1740 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1741 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1742 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1743 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1744 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1745 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1746 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1747 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1748 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1749 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1750 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1751 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1752 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1753 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1754 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1755 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1756 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1757 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1758 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1759 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1760 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1761 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1762 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1763 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1764 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1765 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1766 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1767 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1768 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1769 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1770 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1771 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1772 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1773 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1774 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1775 };
1776 const int header = gen7_command_size_table[cmd].header;
1777 const int body = gen7_command_size_table[cmd].body;
1778 const int count = arg;
1779
1780 ILO_GPE_VALID_GEN(dev, 7, 7);
1781 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1782
1783 return (likely(count)) ? header + body * count : 0;
1784 }
1785
1786 static int
1787 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1788 enum ilo_gpe_gen7_state state,
1789 int arg)
1790 {
1791 static const struct {
1792 int alignment;
1793 int body;
1794 bool is_array;
1795 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1796 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1797 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1798 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1799 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1800 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1801 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1802 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1803 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1804 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1805 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1806 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1807 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1808 };
1809 const int alignment = gen7_state_size_table[state].alignment;
1810 const int body = gen7_state_size_table[state].body;
1811 const bool is_array = gen7_state_size_table[state].is_array;
1812 const int count = arg;
1813 int estimate;
1814
1815 ILO_GPE_VALID_GEN(dev, 7, 7);
1816 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1817
1818 if (likely(count)) {
1819 if (is_array) {
1820 estimate = (alignment - 1) + body * count;
1821 }
1822 else {
1823 estimate = (alignment - 1) + body;
1824 /* all states are aligned */
1825 if (count > 1)
1826 estimate += util_align_npot(body, alignment) * (count - 1);
1827 }
1828 }
1829 else {
1830 estimate = 0;
1831 }
1832
1833 return estimate;
1834 }
1835
1836 static void
1837 gen7_init(struct ilo_gpe_gen7 *gen7)
1838 {
1839 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1840
1841 gen7->estimate_command_size = gen7_estimate_command_size;
1842 gen7->estimate_state_size = gen7_estimate_state_size;
1843
1844 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1845 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1846 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1847 GEN7_USE(gen7, STATE_SIP, gen6);
1848 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1849 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1850 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1851 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1852 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1853 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1854 GEN7_SET(gen7, GPGPU_WALKER);
1855 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1856 GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
1857 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1858 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1859 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1860 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1861 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1862 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1863 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1864 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1865 GEN7_SET(gen7, 3DSTATE_GS);
1866 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1867 GEN7_SET(gen7, 3DSTATE_SF);
1868 GEN7_SET(gen7, 3DSTATE_WM);
1869 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1870 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1871 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1872 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1873 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1874 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1875 GEN7_SET(gen7, 3DSTATE_HS);
1876 GEN7_SET(gen7, 3DSTATE_TE);
1877 GEN7_SET(gen7, 3DSTATE_DS);
1878 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1879 GEN7_SET(gen7, 3DSTATE_SBE);
1880 GEN7_SET(gen7, 3DSTATE_PS);
1881 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1882 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1883 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1884 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1885 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1886 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1887 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1888 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1889 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1890 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1891 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1892 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1893 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1894 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1895 GEN7_SET(gen7, 3DSTATE_URB_VS);
1896 GEN7_SET(gen7, 3DSTATE_URB_HS);
1897 GEN7_SET(gen7, 3DSTATE_URB_DS);
1898 GEN7_SET(gen7, 3DSTATE_URB_GS);
1899 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1900 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1901 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1902 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1903 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1904 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1905 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1906 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1907 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1908 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1909 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1910 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1911 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1912 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1913 GEN7_SET(gen7, 3DPRIMITIVE);
1914 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1915 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1916 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1917 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1918 GEN7_USE(gen7, BLEND_STATE, gen6);
1919 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1920 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1921 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1922 GEN7_USE(gen7, SURFACE_STATE, gen6);
1923 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1924 GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
1925 GEN7_USE(gen7, push_constant_buffer, gen6);
1926 #undef GEN7_USE
1927 #undef GEN7_SET
1928 }
1929
1930 static struct ilo_gpe_gen7 gen7_gpe;
1931
1932 const struct ilo_gpe_gen7 *
1933 ilo_gpe_gen7_get(void)
1934 {
1935 if (!gen7_gpe.estimate_command_size)
1936 gen7_init(&gen7_gpe);
1937
1938 return &gen7_gpe;
1939 }