74ba793dea1569ac2cc6b6dce058cde3c33b36ab
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_resource.h"
29 #include "brw_defines.h"
30 #include "intel_reg.h"
31
32 #include "shader/ilo_shader_internal.h"
33 #include "ilo_cp.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_gpe_gen7.h"
38
39 static void
40 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
41 struct ilo_cp *cp)
42 {
43 assert(!"GPGPU_WALKER unsupported");
44 }
45
46 static void
47 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
48 uint32_t clear_val,
49 struct ilo_cp *cp)
50 {
51 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
52 const uint8_t cmd_len = 3;
53
54 ILO_GPE_VALID_GEN(dev, 7, 7);
55
56 ilo_cp_begin(cp, cmd_len);
57 ilo_cp_write(cp, cmd | (cmd_len - 2));
58 ilo_cp_write(cp, clear_val);
59 ilo_cp_write(cp, 1);
60 ilo_cp_end(cp);
61 }
62
63 static void
64 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
65 int subop, uint32_t pointer,
66 struct ilo_cp *cp)
67 {
68 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
69 const uint8_t cmd_len = 2;
70
71 ILO_GPE_VALID_GEN(dev, 7, 7);
72
73 ilo_cp_begin(cp, cmd_len);
74 ilo_cp_write(cp, cmd | (cmd_len - 2));
75 ilo_cp_write(cp, pointer);
76 ilo_cp_end(cp);
77 }
78
79 static void
80 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
81 uint32_t color_calc_state,
82 struct ilo_cp *cp)
83 {
84 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
85 }
86
87 static void
88 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
89 const struct ilo_shader *gs,
90 int num_samplers,
91 struct ilo_cp *cp)
92 {
93 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
94 const uint8_t cmd_len = 7;
95 uint32_t dw2, dw4, dw5;
96 int max_threads;
97
98 ILO_GPE_VALID_GEN(dev, 7, 7);
99
100 switch (dev->gen) {
101 case ILO_GEN(7):
102 max_threads = (dev->gt == 2) ? 128 : 36;
103 break;
104 default:
105 max_threads = 1;
106 break;
107 }
108
109 if (!gs) {
110 ilo_cp_begin(cp, cmd_len);
111 ilo_cp_write(cp, cmd | (cmd_len - 2));
112 ilo_cp_write(cp, 0);
113 ilo_cp_write(cp, 0);
114 ilo_cp_write(cp, 0);
115 ilo_cp_write(cp, 0);
116 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
117 ilo_cp_write(cp, 0);
118 ilo_cp_end(cp);
119 return;
120 }
121
122 dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
123
124 dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
125 GEN7_GS_INCLUDE_VERTEX_HANDLES |
126 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
127 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
128
129 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
130 GEN6_GS_STATISTICS_ENABLE |
131 GEN6_GS_ENABLE;
132
133 ilo_cp_begin(cp, cmd_len);
134 ilo_cp_write(cp, cmd | (cmd_len - 2));
135 ilo_cp_write(cp, gs->cache_offset);
136 ilo_cp_write(cp, dw2);
137 ilo_cp_write(cp, 0); /* scratch */
138 ilo_cp_write(cp, dw4);
139 ilo_cp_write(cp, dw5);
140 ilo_cp_write(cp, 0);
141 ilo_cp_end(cp);
142 }
143
144 static void
145 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
146 const struct ilo_rasterizer_state *rasterizer,
147 const struct pipe_surface *zs_surf,
148 struct ilo_cp *cp)
149 {
150 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
151 const uint8_t cmd_len = 7;
152 const int num_samples = 1;
153 uint32_t payload[6];
154
155 ILO_GPE_VALID_GEN(dev, 7, 7);
156
157 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
158 &rasterizer->sf, num_samples,
159 (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
160 payload, Elements(payload));
161
162 ilo_cp_begin(cp, cmd_len);
163 ilo_cp_write(cp, cmd | (cmd_len - 2));
164 ilo_cp_write_multi(cp, payload, 6);
165 ilo_cp_end(cp);
166 }
167
168 static void
169 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
170 const struct ilo_shader *fs,
171 const struct pipe_rasterizer_state *rasterizer,
172 bool cc_may_kill,
173 struct ilo_cp *cp)
174 {
175 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
176 const uint8_t cmd_len = 3;
177 const int num_samples = 1;
178 uint32_t dw1, dw2;
179
180 ILO_GPE_VALID_GEN(dev, 7, 7);
181
182 dw1 = GEN7_WM_STATISTICS_ENABLE |
183 GEN7_WM_LINE_AA_WIDTH_2_0;
184
185 if (false) {
186 dw1 |= GEN7_WM_DEPTH_CLEAR;
187 dw1 |= GEN7_WM_DEPTH_RESOLVE;
188 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
189 }
190
191 if (fs) {
192 /*
193 * Set this bit if
194 *
195 * a) fs writes colors and color is not masked, or
196 * b) fs writes depth, or
197 * c) fs or cc kills
198 */
199 dw1 |= GEN7_WM_DISPATCH_ENABLE;
200
201 /*
202 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
203 *
204 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
205 * the PS kernel or color calculator has the ability to kill
206 * (discard) pixels or samples, other than due to depth or stencil
207 * testing. This bit is required to be ENABLED in the following
208 * situations:
209 *
210 * - The API pixel shader program contains "killpix" or "discard"
211 * instructions, or other code in the pixel shader kernel that
212 * can cause the final pixel mask to differ from the pixel mask
213 * received on dispatch.
214 *
215 * - A sampler with chroma key enabled with kill pixel mode is used
216 * by the pixel shader.
217 *
218 * - Any render target has Alpha Test Enable or AlphaToCoverage
219 * Enable enabled.
220 *
221 * - The pixel shader kernel generates and outputs oMask.
222 *
223 * Note: As ClipDistance clipping is fully supported in hardware
224 * and therefore not via PS instructions, there should be no need
225 * to ENABLE this bit due to ClipDistance clipping."
226 */
227 if (fs->has_kill || cc_may_kill)
228 dw1 |= GEN7_WM_KILL_ENABLE;
229
230 if (fs->out.has_pos)
231 dw1 |= GEN7_WM_PSCDEPTH_ON;
232 if (fs->in.has_pos)
233 dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
234
235 dw1 |= fs->in.barycentric_interpolation_mode <<
236 GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
237 }
238 else if (cc_may_kill) {
239 dw1 |= GEN7_WM_DISPATCH_ENABLE |
240 GEN7_WM_KILL_ENABLE;
241 }
242
243 dw1 |= GEN7_WM_POSITION_ZW_PIXEL;
244
245 /* same value as in 3DSTATE_SF */
246 if (rasterizer->line_smooth)
247 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
248
249 if (rasterizer->poly_stipple_enable)
250 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
251 if (rasterizer->line_stipple_enable)
252 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
253
254 if (rasterizer->bottom_edge_rule)
255 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
256
257 if (num_samples > 1) {
258 if (rasterizer->multisample)
259 dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
260 else
261 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
262
263 dw2 = GEN7_WM_MSDISPMODE_PERPIXEL;
264 }
265 else {
266 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
267
268 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
269 }
270
271 ilo_cp_begin(cp, cmd_len);
272 ilo_cp_write(cp, cmd | (cmd_len - 2));
273 ilo_cp_write(cp, dw1);
274 ilo_cp_write(cp, dw2);
275 ilo_cp_end(cp);
276 }
277
278 static void
279 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
280 int subop,
281 const uint32_t *bufs, const int *sizes,
282 int num_bufs,
283 struct ilo_cp *cp)
284 {
285 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
286 const uint8_t cmd_len = 7;
287 uint32_t dw[6];
288 int total_read_length, i;
289
290 ILO_GPE_VALID_GEN(dev, 7, 7);
291
292 /* VS, HS, DS, GS, and PS variants */
293 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
294
295 assert(num_bufs <= 4);
296
297 dw[0] = 0;
298 dw[1] = 0;
299
300 total_read_length = 0;
301 for (i = 0; i < 4; i++) {
302 int read_len;
303
304 /*
305 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
306 *
307 * "Constant buffers must be enabled in order from Constant Buffer 0
308 * to Constant Buffer 3 within this command. For example, it is
309 * not allowed to enable Constant Buffer 1 by programming a
310 * non-zero value in the VS Constant Buffer 1 Read Length without a
311 * non-zero value in VS Constant Buffer 0 Read Length."
312 */
313 if (i >= num_bufs || !sizes[i]) {
314 for (; i < 4; i++) {
315 assert(i >= num_bufs || !sizes[i]);
316 dw[2 + i] = 0;
317 }
318 break;
319 }
320
321 /* read lengths are in 256-bit units */
322 read_len = (sizes[i] + 31) / 32;
323 /* the lower 5 bits are used for memory object control state */
324 assert(bufs[i] % 32 == 0);
325
326 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
327 dw[2 + i] = bufs[i];
328
329 total_read_length += read_len;
330 }
331
332 /*
333 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
334 *
335 * "The sum of all four read length fields must be less than or equal
336 * to the size of 64"
337 */
338 assert(total_read_length <= 64);
339
340 ilo_cp_begin(cp, cmd_len);
341 ilo_cp_write(cp, cmd | (cmd_len - 2));
342 ilo_cp_write_multi(cp, dw, 6);
343 ilo_cp_end(cp);
344 }
345
346 static void
347 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
348 const uint32_t *bufs, const int *sizes,
349 int num_bufs,
350 struct ilo_cp *cp)
351 {
352 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
353 }
354
355 static void
356 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
357 const uint32_t *bufs, const int *sizes,
358 int num_bufs,
359 struct ilo_cp *cp)
360 {
361 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
362 }
363
364 static void
365 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
366 const uint32_t *bufs, const int *sizes,
367 int num_bufs,
368 struct ilo_cp *cp)
369 {
370 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
371 }
372
373 static void
374 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
375 unsigned sample_mask,
376 int num_samples,
377 struct ilo_cp *cp)
378 {
379 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
380 const uint8_t cmd_len = 2;
381 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
382
383 ILO_GPE_VALID_GEN(dev, 7, 7);
384
385 /*
386 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
387 *
388 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
389 * (Sample Mask) must be zero.
390 *
391 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
392 * must be zero."
393 */
394 sample_mask &= valid_mask;
395
396 ilo_cp_begin(cp, cmd_len);
397 ilo_cp_write(cp, cmd | (cmd_len - 2));
398 ilo_cp_write(cp, sample_mask);
399 ilo_cp_end(cp);
400 }
401
402 static void
403 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
404 const uint32_t *bufs, const int *sizes,
405 int num_bufs,
406 struct ilo_cp *cp)
407 {
408 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
409 }
410
411 static void
412 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
413 const uint32_t *bufs, const int *sizes,
414 int num_bufs,
415 struct ilo_cp *cp)
416 {
417 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
418 }
419
420 static void
421 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
422 const struct ilo_shader *hs,
423 int max_threads, int num_samplers,
424 struct ilo_cp *cp)
425 {
426 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
427 const uint8_t cmd_len = 7;
428 uint32_t dw1, dw2, dw5;
429
430 ILO_GPE_VALID_GEN(dev, 7, 7);
431
432 if (!hs) {
433 ilo_cp_begin(cp, cmd_len);
434 ilo_cp_write(cp, cmd | (cmd_len - 2));
435 ilo_cp_write(cp, 0);
436 ilo_cp_write(cp, 0);
437 ilo_cp_write(cp, 0);
438 ilo_cp_write(cp, 0);
439 ilo_cp_write(cp, 0);
440 ilo_cp_write(cp, 0);
441 ilo_cp_end(cp);
442
443 return;
444 }
445
446 dw1 = (num_samplers + 3) / 4 << 27 |
447 0 << 18 |
448 (max_threads - 1);
449 if (false)
450 dw1 |= 1 << 16;
451
452 dw2 = 1 << 31 | /* HS Enable */
453 1 << 29 | /* HS Statistics Enable */
454 0; /* Instance Count */
455
456 dw5 = hs->in.start_grf << 19 |
457 0 << 11 |
458 0 << 4;
459
460 ilo_cp_begin(cp, cmd_len);
461 ilo_cp_write(cp, cmd | (cmd_len - 2));
462 ilo_cp_write(cp, dw1);
463 ilo_cp_write(cp, dw2);
464 ilo_cp_write(cp, hs->cache_offset);
465 ilo_cp_write(cp, 0);
466 ilo_cp_write(cp, dw5);
467 ilo_cp_write(cp, 0);
468 ilo_cp_end(cp);
469 }
470
471 static void
472 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
473 struct ilo_cp *cp)
474 {
475 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
476 const uint8_t cmd_len = 4;
477
478 ILO_GPE_VALID_GEN(dev, 7, 7);
479
480 ilo_cp_begin(cp, cmd_len);
481 ilo_cp_write(cp, cmd | (cmd_len - 2));
482 ilo_cp_write(cp, 0);
483 ilo_cp_write(cp, 0);
484 ilo_cp_write(cp, 0);
485 ilo_cp_end(cp);
486 }
487
488 static void
489 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
490 const struct ilo_shader *ds,
491 int max_threads, int num_samplers,
492 struct ilo_cp *cp)
493 {
494 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
495 const uint8_t cmd_len = 6;
496 uint32_t dw2, dw4, dw5;
497
498 ILO_GPE_VALID_GEN(dev, 7, 7);
499
500 if (!ds) {
501 ilo_cp_begin(cp, cmd_len);
502 ilo_cp_write(cp, cmd | (cmd_len - 2));
503 ilo_cp_write(cp, 0);
504 ilo_cp_write(cp, 0);
505 ilo_cp_write(cp, 0);
506 ilo_cp_write(cp, 0);
507 ilo_cp_write(cp, 0);
508 ilo_cp_end(cp);
509
510 return;
511 }
512
513 dw2 = (num_samplers + 3) / 4 << 27 |
514 0 << 18 |
515 (max_threads - 1);
516 if (false)
517 dw2 |= 1 << 16;
518
519 dw4 = ds->in.start_grf << 20 |
520 0 << 11 |
521 0 << 4;
522
523 dw5 = (max_threads - 1) << 25 |
524 1 << 10 |
525 1;
526
527 ilo_cp_begin(cp, cmd_len);
528 ilo_cp_write(cp, cmd | (cmd_len - 2));
529 ilo_cp_write(cp, ds->cache_offset);
530 ilo_cp_write(cp, dw2);
531 ilo_cp_write(cp, 0);
532 ilo_cp_write(cp, dw4);
533 ilo_cp_write(cp, dw5);
534 ilo_cp_end(cp);
535 }
536
537 static void
538 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
539 unsigned buffer_mask,
540 int vertex_attrib_count,
541 bool rasterizer_discard,
542 struct ilo_cp *cp)
543 {
544 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
545 const uint8_t cmd_len = 3;
546 const bool enable = (buffer_mask != 0);
547 uint32_t dw1, dw2;
548 int read_len;
549
550 ILO_GPE_VALID_GEN(dev, 7, 7);
551
552 if (!enable) {
553 dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
554 if (rasterizer_discard)
555 dw1 |= SO_RENDERING_DISABLE;
556
557 dw2 = 0;
558
559 ilo_cp_begin(cp, cmd_len);
560 ilo_cp_write(cp, cmd | (cmd_len - 2));
561 ilo_cp_write(cp, dw1);
562 ilo_cp_write(cp, dw2);
563 ilo_cp_end(cp);
564 return;
565 }
566
567 read_len = (vertex_attrib_count + 1) / 2;
568 if (!read_len)
569 read_len = 1;
570
571 dw1 = SO_FUNCTION_ENABLE |
572 0 << SO_RENDER_STREAM_SELECT_SHIFT |
573 SO_STATISTICS_ENABLE |
574 buffer_mask << 8;
575
576 if (rasterizer_discard)
577 dw1 |= SO_RENDERING_DISABLE;
578
579 /* API_OPENGL */
580 if (true)
581 dw1 |= SO_REORDER_TRAILING;
582
583 dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
584 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
585 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
586 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
587 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
588 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
589 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
590 (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
591
592 ilo_cp_begin(cp, cmd_len);
593 ilo_cp_write(cp, cmd | (cmd_len - 2));
594 ilo_cp_write(cp, dw1);
595 ilo_cp_write(cp, dw2);
596 ilo_cp_end(cp);
597 }
598
599 static void
600 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
601 const struct pipe_rasterizer_state *rasterizer,
602 const struct ilo_shader *fs,
603 const struct ilo_shader *last_sh,
604 struct ilo_cp *cp)
605 {
606 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
607 const uint8_t cmd_len = 14;
608 uint32_t dw[13];
609
610 ILO_GPE_VALID_GEN(dev, 7, 7);
611
612 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
613 fs, last_sh, dw, Elements(dw));
614
615 ilo_cp_begin(cp, cmd_len);
616 ilo_cp_write(cp, cmd | (cmd_len - 2));
617 ilo_cp_write_multi(cp, dw, 13);
618 ilo_cp_end(cp);
619 }
620
621 static void
622 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
623 const struct ilo_shader *fs,
624 int num_samplers, bool dual_blend,
625 struct ilo_cp *cp)
626 {
627 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
628 const uint8_t cmd_len = 8;
629 uint32_t dw2, dw4, dw5;
630 int max_threads;
631
632 ILO_GPE_VALID_GEN(dev, 7, 7);
633
634 /* see brwCreateContext() */
635 max_threads = (dev->gt == 2) ? 172 : 48;
636
637 if (!fs) {
638 ilo_cp_begin(cp, cmd_len);
639 ilo_cp_write(cp, cmd | (cmd_len - 2));
640 ilo_cp_write(cp, 0);
641 ilo_cp_write(cp, 0);
642 ilo_cp_write(cp, 0);
643 /* GPU hangs if none of the dispatch enable bits is set */
644 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
645 GEN7_PS_8_DISPATCH_ENABLE);
646 ilo_cp_write(cp, 0);
647 ilo_cp_write(cp, 0);
648 ilo_cp_write(cp, 0);
649 ilo_cp_end(cp);
650
651 return;
652 }
653
654 dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
655 0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
656 if (false)
657 dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
658
659 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
660 GEN7_PS_POSOFFSET_NONE;
661
662 if (false)
663 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
664 if (fs->in.count)
665 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
666 if (dual_blend)
667 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
668
669 if (fs->dispatch_16)
670 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
671 else
672 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
673
674 dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
675 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
676 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
677
678 ilo_cp_begin(cp, cmd_len);
679 ilo_cp_write(cp, cmd | (cmd_len - 2));
680 ilo_cp_write(cp, fs->cache_offset);
681 ilo_cp_write(cp, dw2);
682 ilo_cp_write(cp, 0); /* scratch */
683 ilo_cp_write(cp, dw4);
684 ilo_cp_write(cp, dw5);
685 ilo_cp_write(cp, 0); /* kernel 1 */
686 ilo_cp_write(cp, 0); /* kernel 2 */
687 ilo_cp_end(cp);
688 }
689
690 static void
691 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
692 uint32_t sf_clip_viewport,
693 struct ilo_cp *cp)
694 {
695 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
696 }
697
698 static void
699 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
700 uint32_t cc_viewport,
701 struct ilo_cp *cp)
702 {
703 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
704 }
705
706 static void
707 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
708 uint32_t blend_state,
709 struct ilo_cp *cp)
710 {
711 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
712 }
713
714 static void
715 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
716 uint32_t depth_stencil_state,
717 struct ilo_cp *cp)
718 {
719 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
720 }
721
722 static void
723 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
724 uint32_t binding_table,
725 struct ilo_cp *cp)
726 {
727 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
728 }
729
730 static void
731 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
732 uint32_t binding_table,
733 struct ilo_cp *cp)
734 {
735 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
736 }
737
738 static void
739 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
740 uint32_t binding_table,
741 struct ilo_cp *cp)
742 {
743 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
744 }
745
746 static void
747 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
748 uint32_t binding_table,
749 struct ilo_cp *cp)
750 {
751 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
752 }
753
754 static void
755 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
756 uint32_t binding_table,
757 struct ilo_cp *cp)
758 {
759 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
760 }
761
762 static void
763 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
764 uint32_t sampler_state,
765 struct ilo_cp *cp)
766 {
767 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
768 }
769
770 static void
771 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
772 uint32_t sampler_state,
773 struct ilo_cp *cp)
774 {
775 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
776 }
777
778 static void
779 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
780 uint32_t sampler_state,
781 struct ilo_cp *cp)
782 {
783 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
784 }
785
786 static void
787 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
788 uint32_t sampler_state,
789 struct ilo_cp *cp)
790 {
791 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
792 }
793
794 static void
795 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
796 uint32_t sampler_state,
797 struct ilo_cp *cp)
798 {
799 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
800 }
801
802 static void
803 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
804 int subop, int offset, int size,
805 int entry_size,
806 struct ilo_cp *cp)
807 {
808 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
809 const uint8_t cmd_len = 2;
810 const int row_size = 64; /* 512 bits */
811 int alloc_size, num_entries, min_entries, max_entries;
812
813 ILO_GPE_VALID_GEN(dev, 7, 7);
814
815 /* VS, HS, DS, and GS variants */
816 assert(subop >= 0x30 && subop <= 0x33);
817
818 /* in multiples of 8KB */
819 assert(offset % 8192 == 0);
820 offset /= 8192;
821
822 /* in multiple of 512-bit rows */
823 alloc_size = (entry_size + row_size - 1) / row_size;
824 if (!alloc_size)
825 alloc_size = 1;
826
827 /*
828 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
829 *
830 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
831 * cause performance to decrease due to banking in the URB. Element
832 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
833 */
834 if (subop == 0x30 && alloc_size == 5)
835 alloc_size = 6;
836
837 /* in multiples of 8 */
838 num_entries = (size / row_size / alloc_size) & ~7;
839
840 switch (subop) {
841 case 0x30: /* 3DSTATE_URB_VS */
842 min_entries = 32;
843 max_entries = (dev->gt == 2) ? 704 : 512;
844
845 assert(num_entries >= min_entries);
846 if (num_entries > max_entries)
847 num_entries = max_entries;
848 break;
849 case 0x31: /* 3DSTATE_URB_HS */
850 max_entries = (dev->gt == 2) ? 64 : 32;
851 if (num_entries > max_entries)
852 num_entries = max_entries;
853 break;
854 case 0x32: /* 3DSTATE_URB_DS */
855 if (num_entries)
856 assert(num_entries >= 138);
857 break;
858 case 0x33: /* 3DSTATE_URB_GS */
859 max_entries = (dev->gt == 2) ? 320 : 192;
860 if (num_entries > max_entries)
861 num_entries = max_entries;
862 break;
863 default:
864 break;
865 }
866
867 ilo_cp_begin(cp, cmd_len);
868 ilo_cp_write(cp, cmd | (cmd_len - 2));
869 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
870 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
871 num_entries);
872 ilo_cp_end(cp);
873 }
874
875 static void
876 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
877 int offset, int size, int entry_size,
878 struct ilo_cp *cp)
879 {
880 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
881 }
882
883 static void
884 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
885 int offset, int size, int entry_size,
886 struct ilo_cp *cp)
887 {
888 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
889 }
890
891 static void
892 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
893 int offset, int size, int entry_size,
894 struct ilo_cp *cp)
895 {
896 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
897 }
898
899 static void
900 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
901 int offset, int size, int entry_size,
902 struct ilo_cp *cp)
903 {
904 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
905 }
906
907 static void
908 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
909 int subop, int offset, int size,
910 struct ilo_cp *cp)
911 {
912 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
913 const uint8_t cmd_len = 2;
914 int end;
915
916 ILO_GPE_VALID_GEN(dev, 7, 7);
917
918 /* VS, HS, DS, GS, and PS variants */
919 assert(subop >= 0x12 && subop <= 0x16);
920
921 /*
922 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
923 *
924 * "(A table that says the maximum size of each constant buffer is
925 * 16KB")
926 *
927 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
928 *
929 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
930 * may not exceed the maximum value of the Constant Buffer Size."
931 *
932 * Thus, the valid range of buffer end is [0KB, 16KB].
933 */
934 end = (offset + size) / 1024;
935 if (end > 16) {
936 assert(!"invalid constant buffer end");
937 end = 16;
938 }
939
940 /* the valid range of buffer offset is [0KB, 15KB] */
941 offset = (offset + 1023) / 1024;
942 if (offset > 15) {
943 assert(!"invalid constant buffer offset");
944 offset = 15;
945 }
946
947 if (offset > end) {
948 assert(!size);
949 offset = end;
950 }
951
952 /* the valid range of buffer size is [0KB, 15KB] */
953 size = end - offset;
954 if (size > 15) {
955 assert(!"invalid constant buffer size");
956 size = 15;
957 }
958
959 ilo_cp_begin(cp, cmd_len);
960 ilo_cp_write(cp, cmd | (cmd_len - 2));
961 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
962 size);
963 ilo_cp_end(cp);
964 }
965
966 static void
967 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
968 int offset, int size,
969 struct ilo_cp *cp)
970 {
971 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
972 }
973
974 static void
975 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
976 int offset, int size,
977 struct ilo_cp *cp)
978 {
979 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
980 }
981
982 static void
983 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
984 int offset, int size,
985 struct ilo_cp *cp)
986 {
987 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
988 }
989
990 static void
991 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
992 int offset, int size,
993 struct ilo_cp *cp)
994 {
995 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
996 }
997
998 static void
999 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
1000 int offset, int size,
1001 struct ilo_cp *cp)
1002 {
1003 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
1004 }
1005
1006 static void
1007 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
1008 const struct pipe_stream_output_info *so_info,
1009 const struct ilo_shader *sh,
1010 struct ilo_cp *cp)
1011 {
1012 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
1013 uint16_t cmd_len;
1014 int buffer_selects, num_entries, i;
1015 uint16_t so_decls[128];
1016
1017 ILO_GPE_VALID_GEN(dev, 7, 7);
1018
1019 buffer_selects = 0;
1020 num_entries = 0;
1021
1022 if (so_info) {
1023 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
1024
1025 memset(buffer_offsets, 0, sizeof(buffer_offsets));
1026
1027 for (i = 0; i < so_info->num_outputs; i++) {
1028 unsigned decl, buf, attr, mask;
1029
1030 buf = so_info->output[i].output_buffer;
1031
1032 /* pad with holes */
1033 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
1034 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
1035 int num_dwords;
1036
1037 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
1038 if (num_dwords > 4)
1039 num_dwords = 4;
1040
1041 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1042 SO_DECL_HOLE_FLAG |
1043 ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
1044
1045 so_decls[num_entries++] = decl;
1046 buffer_offsets[buf] += num_dwords;
1047 }
1048
1049 /* figure out which attribute is sourced */
1050 for (attr = 0; attr < sh->out.count; attr++) {
1051 const int idx = sh->out.register_indices[attr];
1052 if (idx == so_info->output[i].register_index)
1053 break;
1054 }
1055
1056 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
1057
1058 if (attr < sh->out.count) {
1059 mask = ((1 << so_info->output[i].num_components) - 1) <<
1060 so_info->output[i].start_component;
1061
1062 /* PSIZE is at W channel */
1063 if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
1064 assert(mask == 0x1);
1065 mask = (mask << 3) & 0xf;
1066 }
1067
1068 decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT |
1069 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1070 }
1071 else {
1072 assert(!"stream output an undefined register");
1073 mask = (1 << so_info->output[i].num_components) - 1;
1074 decl |= SO_DECL_HOLE_FLAG |
1075 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1076 }
1077
1078 so_decls[num_entries++] = decl;
1079 buffer_selects |= 1 << buf;
1080 buffer_offsets[buf] += so_info->output[i].num_components;
1081 }
1082 }
1083
1084 /*
1085 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1086 *
1087 * "Errata: All 128 decls for all four streams must be included
1088 * whenever this command is issued. The "Num Entries [n]" fields still
1089 * contain the actual numbers of valid decls."
1090 *
1091 * Also note that "DWord Length" has 9 bits for this command, and the type
1092 * of cmd_len is thus uint16_t.
1093 */
1094 cmd_len = 2 * 128 + 3;
1095
1096 ilo_cp_begin(cp, cmd_len);
1097 ilo_cp_write(cp, cmd | (cmd_len - 2));
1098 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
1099 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1100 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1101 buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
1102 ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
1103 0 << SO_NUM_ENTRIES_2_SHIFT |
1104 0 << SO_NUM_ENTRIES_1_SHIFT |
1105 num_entries << SO_NUM_ENTRIES_0_SHIFT);
1106
1107 for (i = 0; i < num_entries; i++) {
1108 ilo_cp_write(cp, so_decls[i]);
1109 ilo_cp_write(cp, 0);
1110 }
1111 for (; i < 128; i++) {
1112 ilo_cp_write(cp, 0);
1113 ilo_cp_write(cp, 0);
1114 }
1115
1116 ilo_cp_end(cp);
1117 }
1118
1119 static void
1120 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1121 int index, int base, int stride,
1122 const struct pipe_stream_output_target *so_target,
1123 struct ilo_cp *cp)
1124 {
1125 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1126 const uint8_t cmd_len = 4;
1127 struct ilo_buffer *buf;
1128 int end;
1129
1130 ILO_GPE_VALID_GEN(dev, 7, 7);
1131
1132 if (!so_target || !so_target->buffer) {
1133 ilo_cp_begin(cp, cmd_len);
1134 ilo_cp_write(cp, cmd | (cmd_len - 2));
1135 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1136 ilo_cp_write(cp, 0);
1137 ilo_cp_write(cp, 0);
1138 ilo_cp_end(cp);
1139 return;
1140 }
1141
1142 buf = ilo_buffer(so_target->buffer);
1143
1144 /* DWord-aligned */
1145 assert(stride % 4 == 0 && base % 4 == 0);
1146 assert(so_target->buffer_offset % 4 == 0);
1147
1148 stride &= ~3;
1149 base = (base + so_target->buffer_offset) & ~3;
1150 end = (base + so_target->buffer_size) & ~3;
1151
1152 ilo_cp_begin(cp, cmd_len);
1153 ilo_cp_write(cp, cmd | (cmd_len - 2));
1154 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
1155 stride);
1156 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1157 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1158 ilo_cp_end(cp);
1159 }
1160
1161 static void
1162 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1163 const struct pipe_draw_info *info,
1164 bool rectlist,
1165 struct ilo_cp *cp)
1166 {
1167 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1168 const uint8_t cmd_len = 7;
1169 const int prim = (rectlist) ?
1170 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1171 const int vb_access = (info->indexed) ?
1172 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1173 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1174
1175 ILO_GPE_VALID_GEN(dev, 7, 7);
1176
1177 ilo_cp_begin(cp, cmd_len);
1178 ilo_cp_write(cp, cmd | (cmd_len - 2));
1179 ilo_cp_write(cp, vb_access | prim);
1180 ilo_cp_write(cp, info->count);
1181 ilo_cp_write(cp, info->start);
1182 ilo_cp_write(cp, info->instance_count);
1183 ilo_cp_write(cp, info->start_instance);
1184 ilo_cp_write(cp, info->index_bias);
1185 ilo_cp_end(cp);
1186 }
1187
1188 static uint32_t
1189 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1190 const struct ilo_viewport_cso *viewports,
1191 unsigned num_viewports,
1192 struct ilo_cp *cp)
1193 {
1194 const int state_align = 64 / 4;
1195 const int state_len = 16 * num_viewports;
1196 uint32_t state_offset, *dw;
1197 unsigned i;
1198
1199 ILO_GPE_VALID_GEN(dev, 7, 7);
1200
1201 /*
1202 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1203 *
1204 * "The viewport-specific state used by both the SF and CL units
1205 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1206 * of which contains the DWords described below. The start of each
1207 * element is spaced 16 DWords apart. The location of first element of
1208 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1209 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1210 */
1211 assert(num_viewports && num_viewports <= 16);
1212
1213 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1214 state_len, state_align, &state_offset);
1215
1216 for (i = 0; i < num_viewports; i++) {
1217 const struct ilo_viewport_cso *vp = &viewports[i];
1218
1219 dw[0] = fui(vp->m00);
1220 dw[1] = fui(vp->m11);
1221 dw[2] = fui(vp->m22);
1222 dw[3] = fui(vp->m30);
1223 dw[4] = fui(vp->m31);
1224 dw[5] = fui(vp->m32);
1225 dw[6] = 0;
1226 dw[7] = 0;
1227 dw[8] = fui(vp->min_gbx);
1228 dw[9] = fui(vp->max_gbx);
1229 dw[10] = fui(vp->min_gby);
1230 dw[11] = fui(vp->max_gby);
1231 dw[12] = 0;
1232 dw[13] = 0;
1233 dw[14] = 0;
1234 dw[15] = 0;
1235
1236 dw += 16;
1237 }
1238
1239 return state_offset;
1240 }
1241
1242 void
1243 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
1244 unsigned width, unsigned height,
1245 unsigned depth, unsigned level,
1246 struct ilo_view_surface *surf)
1247 {
1248 uint32_t *dw;
1249
1250 ILO_GPE_VALID_GEN(dev, 7, 7);
1251
1252 /*
1253 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1254 *
1255 * "A null surface is used in instances where an actual surface is not
1256 * bound. When a write message is generated to a null surface, no
1257 * actual surface is written to. When a read message (including any
1258 * sampling engine message) is generated to a null surface, the result
1259 * is all zeros. Note that a null surface type is allowed to be used
1260 * with all messages, even if it is not specificially indicated as
1261 * supported. All of the remaining fields in surface state are ignored
1262 * for null surfaces, with the following exceptions:
1263 *
1264 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1265 * must match the depth buffer's corresponding state for all render
1266 * target surfaces, including null.
1267 * * All sampling engine and data port messages support null surfaces
1268 * with the above behavior, even if not mentioned as specifically
1269 * supported, except for the following:
1270 * * Data Port Media Block Read/Write messages.
1271 * * The Surface Type of a surface used as a render target (accessed
1272 * via the Data Port's Render Target Write message) must be the same
1273 * as the Surface Type of all other render targets and of the depth
1274 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1275 * buffer or render targets are SURFTYPE_NULL."
1276 *
1277 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1278 *
1279 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1280 * true"
1281 */
1282
1283 STATIC_ASSERT(Elements(surf->payload) >= 8);
1284 dw = surf->payload;
1285
1286 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1287 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1288 BRW_SURFACE_TILED << 13;
1289
1290 dw[1] = 0;
1291
1292 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1293 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1294
1295 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1296
1297 dw[4] = 0;
1298 dw[5] = level;
1299
1300 dw[6] = 0;
1301 dw[7] = 0;
1302
1303 surf->bo = NULL;
1304 }
1305
1306 void
1307 ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
1308 const struct ilo_buffer *buf,
1309 unsigned offset, unsigned size,
1310 unsigned struct_size,
1311 enum pipe_format elem_format,
1312 bool is_rt, bool render_cache_rw,
1313 struct ilo_view_surface *surf)
1314 {
1315 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1316 const bool structured = (!typed && struct_size > 1);
1317 const int elem_size = (typed) ?
1318 util_format_get_blocksize(elem_format) : 1;
1319 int width, height, depth, pitch;
1320 int surface_type, surface_format, num_entries;
1321 uint32_t *dw;
1322
1323 ILO_GPE_VALID_GEN(dev, 7, 7);
1324
1325 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1326
1327 surface_format = (typed) ?
1328 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1329
1330 num_entries = size / struct_size;
1331 /* see if there is enough space to fit another element */
1332 if (size % struct_size >= elem_size && !structured)
1333 num_entries++;
1334
1335 /*
1336 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1337 *
1338 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1339 * Address) specifies the base address of first element of the
1340 * surface. The surface is interpreted as a simple array of that
1341 * single element type. The address must be naturally-aligned to the
1342 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1343 * must be 16-byte aligned)
1344 *
1345 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1346 * the base address of the first element of the surface, computed in
1347 * software by adding the surface base address to the byte offset of
1348 * the element in the buffer."
1349 */
1350 if (is_rt)
1351 assert(offset % elem_size == 0);
1352
1353 /*
1354 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1355 *
1356 * "For typed buffer and structured buffer surfaces, the number of
1357 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1358 * surfaces, the number of entries in the buffer is the number of
1359 * bytes which can range from 1 to 2^30."
1360 */
1361 assert(num_entries >= 1 &&
1362 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1363
1364 /*
1365 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1366 *
1367 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1368 * 11 if the Surface Format is RAW (the size of the buffer must be a
1369 * multiple of 4 bytes)."
1370 *
1371 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1372 *
1373 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1374 * field (Surface Pitch) indicates the size of the structure."
1375 *
1376 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1377 * must be a multiple of 4 bytes."
1378 */
1379 if (structured)
1380 assert(struct_size % 4 == 0);
1381 else if (!typed)
1382 assert(num_entries % 4 == 0);
1383
1384 pitch = struct_size;
1385
1386 pitch--;
1387 num_entries--;
1388 /* bits [6:0] */
1389 width = (num_entries & 0x0000007f);
1390 /* bits [20:7] */
1391 height = (num_entries & 0x001fff80) >> 7;
1392 /* bits [30:21] */
1393 depth = (num_entries & 0x7fe00000) >> 21;
1394 /* limit to [26:21] */
1395 if (typed || structured)
1396 depth &= 0x3f;
1397
1398 STATIC_ASSERT(Elements(surf->payload) >= 8);
1399 dw = surf->payload;
1400
1401 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1402 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1403 if (render_cache_rw)
1404 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1405
1406 dw[1] = offset;
1407
1408 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1409 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1410
1411 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1412 pitch;
1413
1414 dw[4] = 0;
1415 dw[5] = 0;
1416
1417 dw[6] = 0;
1418 dw[7] = 0;
1419
1420 /* do not increment reference count */
1421 surf->bo = buf->bo;
1422 }
1423
1424 void
1425 ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
1426 const struct ilo_texture *tex,
1427 enum pipe_format format,
1428 unsigned first_level,
1429 unsigned num_levels,
1430 unsigned first_layer,
1431 unsigned num_layers,
1432 bool is_rt, bool render_cache_rw,
1433 struct ilo_view_surface *surf)
1434 {
1435 int surface_type, surface_format;
1436 int width, height, depth, pitch, lod;
1437 unsigned layer_offset, x_offset, y_offset;
1438 uint32_t *dw;
1439
1440 ILO_GPE_VALID_GEN(dev, 7, 7);
1441
1442 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1443 assert(surface_type != BRW_SURFACE_BUFFER);
1444
1445 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1446 format = PIPE_FORMAT_Z32_FLOAT;
1447
1448 if (is_rt)
1449 surface_format = ilo_translate_render_format(format);
1450 else
1451 surface_format = ilo_translate_texture_format(format);
1452 assert(surface_format >= 0);
1453
1454 width = tex->base.width0;
1455 height = tex->base.height0;
1456 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1457 tex->base.depth0 : num_layers;
1458 pitch = tex->bo_stride;
1459
1460 if (surface_type == BRW_SURFACE_CUBE) {
1461 /*
1462 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1463 *
1464 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
1465 * this field is [0,340], indicating the number of cube array
1466 * elements (equal to the number of underlying 2D array elements
1467 * divided by 6). For other surfaces, this field must be zero."
1468 *
1469 * When is_rt is true, we treat the texture as a 2D one to avoid the
1470 * restriction.
1471 */
1472 if (is_rt) {
1473 surface_type = BRW_SURFACE_2D;
1474 }
1475 else {
1476 assert(num_layers % 6 == 0);
1477 depth = num_layers / 6;
1478 }
1479 }
1480
1481 /* sanity check the size */
1482 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1483 assert(first_layer < 2048 && num_layers <= 2048);
1484 switch (surface_type) {
1485 case BRW_SURFACE_1D:
1486 assert(width <= 16384 && height == 1 && depth <= 2048);
1487 break;
1488 case BRW_SURFACE_2D:
1489 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1490 break;
1491 case BRW_SURFACE_3D:
1492 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1493 if (!is_rt)
1494 assert(first_layer == 0);
1495 break;
1496 case BRW_SURFACE_CUBE:
1497 assert(width <= 16384 && height <= 16384 && depth <= 86);
1498 assert(width == height);
1499 if (is_rt)
1500 assert(first_layer == 0);
1501 break;
1502 default:
1503 assert(!"unexpected surface type");
1504 break;
1505 }
1506
1507 if (is_rt) {
1508 /*
1509 * Compute the offset to the layer manually.
1510 *
1511 * For rendering, the hardware requires LOD to be the same for all
1512 * render targets and the depth buffer. We need to compute the offset
1513 * to the layer manually and always set LOD to 0.
1514 */
1515 if (true) {
1516 /* we lose the capability for layered rendering */
1517 assert(num_layers == 1);
1518
1519 layer_offset = ilo_texture_get_slice_offset(tex,
1520 first_level, first_layer, &x_offset, &y_offset);
1521
1522 assert(x_offset % 4 == 0);
1523 assert(y_offset % 2 == 0);
1524 x_offset /= 4;
1525 y_offset /= 2;
1526
1527 /* derive the size for the LOD */
1528 width = u_minify(width, first_level);
1529 height = u_minify(height, first_level);
1530 if (surface_type == BRW_SURFACE_3D)
1531 depth = u_minify(depth, first_level);
1532 else
1533 depth = 1;
1534
1535 first_level = 0;
1536 first_layer = 0;
1537 lod = 0;
1538 }
1539 else {
1540 layer_offset = 0;
1541 x_offset = 0;
1542 y_offset = 0;
1543 }
1544
1545 assert(num_levels == 1);
1546 lod = first_level;
1547 }
1548 else {
1549 layer_offset = 0;
1550 x_offset = 0;
1551 y_offset = 0;
1552
1553 lod = num_levels - 1;
1554 }
1555
1556 /*
1557 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1558 *
1559 * "The Base Address for linear render target surfaces and surfaces
1560 * accessed with the typed surface read/write data port messages must
1561 * be element-size aligned, for non-YUV surface formats, or a multiple
1562 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1563 * have no alignment requirements (byte alignment is sufficient)."
1564 *
1565 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1566 *
1567 * "For linear render target surfaces and surfaces accessed with the
1568 * typed data port messages, the pitch must be a multiple of the
1569 * element size for non-YUV surface formats. Pitch must be a multiple
1570 * of 2 * element size for YUV surface formats. For linear surfaces
1571 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1572 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1573 * of bytes."
1574 *
1575 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1576 *
1577 * "For linear surfaces, this field (X Offset) must be zero."
1578 */
1579 if (tex->tiling == INTEL_TILING_NONE) {
1580 if (is_rt) {
1581 const int elem_size = util_format_get_blocksize(format);
1582 assert(layer_offset % elem_size == 0);
1583 assert(pitch % elem_size == 0);
1584 }
1585
1586 assert(!x_offset);
1587 }
1588
1589 STATIC_ASSERT(Elements(surf->payload) >= 8);
1590 dw = surf->payload;
1591
1592 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1593 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1594 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
1595
1596 /*
1597 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
1598 *
1599 * "If this field (Surface Array) is enabled, the Surface Type must be
1600 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
1601 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
1602 * SURFTYPE_CUBE, the Depth field must be set to zero."
1603 *
1604 * For non-3D sampler surfaces, resinfo (the sampler message) always
1605 * returns zero for the number of layers when this field is not set.
1606 */
1607 if (surface_type != BRW_SURFACE_3D) {
1608 if (util_resource_is_array_texture(&tex->base))
1609 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1610 else
1611 assert(depth == 1);
1612 }
1613
1614 if (tex->valign_4)
1615 dw[0] |= GEN7_SURFACE_VALIGN_4;
1616
1617 if (tex->halign_8)
1618 dw[0] |= GEN7_SURFACE_HALIGN_8;
1619
1620 if (tex->array_spacing_full)
1621 dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
1622 else
1623 dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
1624
1625 if (render_cache_rw)
1626 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1627
1628 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1629 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1630
1631 dw[1] = layer_offset;
1632
1633 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1634 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1635
1636 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1637 (pitch - 1);
1638
1639 dw[4] = first_layer << 18 |
1640 (num_layers - 1) << 7;
1641
1642 /*
1643 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1644 * means the samples are interleaved. The layouts are the same when the
1645 * number of samples is 1.
1646 */
1647 if (tex->interleaved && tex->base.nr_samples > 1) {
1648 assert(!is_rt);
1649 dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
1650 }
1651 else {
1652 dw[4] |= GEN7_SURFACE_MSFMT_MSS;
1653 }
1654
1655 if (tex->base.nr_samples > 4)
1656 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1657 else if (tex->base.nr_samples > 2)
1658 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1659 else
1660 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1661
1662 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1663 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1664 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1665 lod;
1666
1667 dw[6] = 0;
1668 dw[7] = 0;
1669
1670 /* do not increment reference count */
1671 surf->bo = tex->bo;
1672 }
1673
1674 static int
1675 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1676 enum ilo_gpe_gen7_command cmd,
1677 int arg)
1678 {
1679 static const struct {
1680 int header;
1681 int body;
1682 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1683 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1684 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1685 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1686 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1687 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1688 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1689 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1690 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1691 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1692 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1693 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1694 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1695 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1696 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1697 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1698 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1699 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1700 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1701 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1702 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1703 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1704 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1705 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1706 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1707 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1708 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1709 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1710 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1711 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1712 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1713 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1714 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1715 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1716 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1717 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1718 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1719 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1720 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1721 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1722 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1723 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1724 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1725 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1726 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1727 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1728 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1729 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1730 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1731 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1732 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1733 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1734 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1735 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1736 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1737 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1738 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1739 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1740 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1741 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1742 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1743 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1744 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1745 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1746 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1747 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1748 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1749 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1750 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1751 };
1752 const int header = gen7_command_size_table[cmd].header;
1753 const int body = gen7_command_size_table[cmd].body;
1754 const int count = arg;
1755
1756 ILO_GPE_VALID_GEN(dev, 7, 7);
1757 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1758
1759 return (likely(count)) ? header + body * count : 0;
1760 }
1761
1762 static int
1763 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1764 enum ilo_gpe_gen7_state state,
1765 int arg)
1766 {
1767 static const struct {
1768 int alignment;
1769 int body;
1770 bool is_array;
1771 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1772 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1773 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1774 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1775 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1776 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1777 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1778 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1779 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1780 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1781 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1782 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1783 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1784 };
1785 const int alignment = gen7_state_size_table[state].alignment;
1786 const int body = gen7_state_size_table[state].body;
1787 const bool is_array = gen7_state_size_table[state].is_array;
1788 const int count = arg;
1789 int estimate;
1790
1791 ILO_GPE_VALID_GEN(dev, 7, 7);
1792 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1793
1794 if (likely(count)) {
1795 if (is_array) {
1796 estimate = (alignment - 1) + body * count;
1797 }
1798 else {
1799 estimate = (alignment - 1) + body;
1800 /* all states are aligned */
1801 if (count > 1)
1802 estimate += util_align_npot(body, alignment) * (count - 1);
1803 }
1804 }
1805 else {
1806 estimate = 0;
1807 }
1808
1809 return estimate;
1810 }
1811
1812 static void
1813 gen7_init(struct ilo_gpe_gen7 *gen7)
1814 {
1815 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1816
1817 gen7->estimate_command_size = gen7_estimate_command_size;
1818 gen7->estimate_state_size = gen7_estimate_state_size;
1819
1820 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1821 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1822 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1823 GEN7_USE(gen7, STATE_SIP, gen6);
1824 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1825 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1826 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1827 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1828 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1829 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1830 GEN7_SET(gen7, GPGPU_WALKER);
1831 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1832 GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
1833 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1834 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1835 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1836 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1837 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1838 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1839 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1840 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1841 GEN7_SET(gen7, 3DSTATE_GS);
1842 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1843 GEN7_SET(gen7, 3DSTATE_SF);
1844 GEN7_SET(gen7, 3DSTATE_WM);
1845 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1846 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1847 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1848 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1849 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1850 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1851 GEN7_SET(gen7, 3DSTATE_HS);
1852 GEN7_SET(gen7, 3DSTATE_TE);
1853 GEN7_SET(gen7, 3DSTATE_DS);
1854 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1855 GEN7_SET(gen7, 3DSTATE_SBE);
1856 GEN7_SET(gen7, 3DSTATE_PS);
1857 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1858 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1859 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1860 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1861 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1862 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1863 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1864 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1865 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1866 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1867 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1868 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1869 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1870 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1871 GEN7_SET(gen7, 3DSTATE_URB_VS);
1872 GEN7_SET(gen7, 3DSTATE_URB_HS);
1873 GEN7_SET(gen7, 3DSTATE_URB_DS);
1874 GEN7_SET(gen7, 3DSTATE_URB_GS);
1875 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1876 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1877 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1878 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1879 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1880 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1881 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1882 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1883 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1884 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1885 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1886 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1887 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1888 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1889 GEN7_SET(gen7, 3DPRIMITIVE);
1890 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1891 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1892 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1893 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1894 GEN7_USE(gen7, BLEND_STATE, gen6);
1895 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1896 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1897 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1898 GEN7_USE(gen7, SURFACE_STATE, gen6);
1899 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1900 GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
1901 GEN7_USE(gen7, push_constant_buffer, gen6);
1902 #undef GEN7_USE
1903 #undef GEN7_SET
1904 }
1905
1906 static struct ilo_gpe_gen7 gen7_gpe;
1907
1908 const struct ilo_gpe_gen7 *
1909 ilo_gpe_gen7_get(void)
1910 {
1911 if (!gen7_gpe.estimate_command_size)
1912 gen7_init(&gen7_gpe);
1913
1914 return &gen7_gpe;
1915 }