ilo: construct 3DSTATE_SF in create_rasterizer_state()
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "brw_defines.h"
29 #include "intel_reg.h"
30
31 #include "ilo_cp.h"
32 #include "ilo_format.h"
33 #include "ilo_resource.h"
34 #include "ilo_shader.h"
35 #include "ilo_gpe_gen7.h"
36
37 static void
38 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
39 struct ilo_cp *cp)
40 {
41 assert(!"GPGPU_WALKER unsupported");
42 }
43
44 static void
45 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
46 uint32_t clear_val,
47 struct ilo_cp *cp)
48 {
49 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
50 const uint8_t cmd_len = 3;
51
52 ILO_GPE_VALID_GEN(dev, 7, 7);
53
54 ilo_cp_begin(cp, cmd_len);
55 ilo_cp_write(cp, cmd | (cmd_len - 2));
56 ilo_cp_write(cp, clear_val);
57 ilo_cp_write(cp, 1);
58 ilo_cp_end(cp);
59 }
60
61 static void
62 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
63 int subop, uint32_t pointer,
64 struct ilo_cp *cp)
65 {
66 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
67 const uint8_t cmd_len = 2;
68
69 ILO_GPE_VALID_GEN(dev, 7, 7);
70
71 ilo_cp_begin(cp, cmd_len);
72 ilo_cp_write(cp, cmd | (cmd_len - 2));
73 ilo_cp_write(cp, pointer);
74 ilo_cp_end(cp);
75 }
76
77 static void
78 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
79 uint32_t color_calc_state,
80 struct ilo_cp *cp)
81 {
82 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
83 }
84
85 static void
86 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
87 const struct ilo_shader *gs,
88 int num_samplers,
89 struct ilo_cp *cp)
90 {
91 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
92 const uint8_t cmd_len = 7;
93 uint32_t dw2, dw4, dw5;
94 int max_threads;
95
96 ILO_GPE_VALID_GEN(dev, 7, 7);
97
98 switch (dev->gen) {
99 case ILO_GEN(7):
100 max_threads = (dev->gt == 2) ? 128 : 36;
101 break;
102 default:
103 max_threads = 1;
104 break;
105 }
106
107 if (!gs) {
108 ilo_cp_begin(cp, cmd_len);
109 ilo_cp_write(cp, cmd | (cmd_len - 2));
110 ilo_cp_write(cp, 0);
111 ilo_cp_write(cp, 0);
112 ilo_cp_write(cp, 0);
113 ilo_cp_write(cp, 0);
114 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
115 ilo_cp_write(cp, 0);
116 ilo_cp_end(cp);
117 return;
118 }
119
120 dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
121
122 dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
123 GEN7_GS_INCLUDE_VERTEX_HANDLES |
124 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
125 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
126
127 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
128 GEN6_GS_STATISTICS_ENABLE |
129 GEN6_GS_ENABLE;
130
131 ilo_cp_begin(cp, cmd_len);
132 ilo_cp_write(cp, cmd | (cmd_len - 2));
133 ilo_cp_write(cp, gs->cache_offset);
134 ilo_cp_write(cp, dw2);
135 ilo_cp_write(cp, 0); /* scratch */
136 ilo_cp_write(cp, dw4);
137 ilo_cp_write(cp, dw5);
138 ilo_cp_write(cp, 0);
139 ilo_cp_end(cp);
140 }
141
142 static void
143 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
144 const struct ilo_rasterizer_state *rasterizer,
145 const struct pipe_surface *zs_surf,
146 struct ilo_cp *cp)
147 {
148 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
149 const uint8_t cmd_len = 7;
150 const int num_samples = 1;
151 uint32_t payload[6];
152
153 ILO_GPE_VALID_GEN(dev, 7, 7);
154
155 ilo_gpe_gen6_fill_3dstate_sf_raster(dev,
156 &rasterizer->sf, num_samples,
157 (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE,
158 payload, Elements(payload));
159
160 ilo_cp_begin(cp, cmd_len);
161 ilo_cp_write(cp, cmd | (cmd_len - 2));
162 ilo_cp_write_multi(cp, payload, 6);
163 ilo_cp_end(cp);
164 }
165
166 static void
167 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
168 const struct ilo_shader *fs,
169 const struct pipe_rasterizer_state *rasterizer,
170 bool cc_may_kill,
171 struct ilo_cp *cp)
172 {
173 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
174 const uint8_t cmd_len = 3;
175 const int num_samples = 1;
176 uint32_t dw1, dw2;
177
178 ILO_GPE_VALID_GEN(dev, 7, 7);
179
180 dw1 = GEN7_WM_STATISTICS_ENABLE |
181 GEN7_WM_LINE_AA_WIDTH_2_0;
182
183 if (false) {
184 dw1 |= GEN7_WM_DEPTH_CLEAR;
185 dw1 |= GEN7_WM_DEPTH_RESOLVE;
186 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
187 }
188
189 if (fs) {
190 /*
191 * Set this bit if
192 *
193 * a) fs writes colors and color is not masked, or
194 * b) fs writes depth, or
195 * c) fs or cc kills
196 */
197 dw1 |= GEN7_WM_DISPATCH_ENABLE;
198
199 /*
200 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
201 *
202 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
203 * the PS kernel or color calculator has the ability to kill
204 * (discard) pixels or samples, other than due to depth or stencil
205 * testing. This bit is required to be ENABLED in the following
206 * situations:
207 *
208 * - The API pixel shader program contains "killpix" or "discard"
209 * instructions, or other code in the pixel shader kernel that
210 * can cause the final pixel mask to differ from the pixel mask
211 * received on dispatch.
212 *
213 * - A sampler with chroma key enabled with kill pixel mode is used
214 * by the pixel shader.
215 *
216 * - Any render target has Alpha Test Enable or AlphaToCoverage
217 * Enable enabled.
218 *
219 * - The pixel shader kernel generates and outputs oMask.
220 *
221 * Note: As ClipDistance clipping is fully supported in hardware
222 * and therefore not via PS instructions, there should be no need
223 * to ENABLE this bit due to ClipDistance clipping."
224 */
225 if (fs->has_kill || cc_may_kill)
226 dw1 |= GEN7_WM_KILL_ENABLE;
227
228 if (fs->out.has_pos)
229 dw1 |= GEN7_WM_PSCDEPTH_ON;
230 if (fs->in.has_pos)
231 dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
232
233 dw1 |= fs->in.barycentric_interpolation_mode <<
234 GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
235 }
236 else if (cc_may_kill) {
237 dw1 |= GEN7_WM_DISPATCH_ENABLE |
238 GEN7_WM_KILL_ENABLE;
239 }
240
241 dw1 |= GEN7_WM_POSITION_ZW_PIXEL;
242
243 /* same value as in 3DSTATE_SF */
244 if (rasterizer->line_smooth)
245 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
246
247 if (rasterizer->poly_stipple_enable)
248 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
249 if (rasterizer->line_stipple_enable)
250 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
251
252 if (rasterizer->bottom_edge_rule)
253 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
254
255 if (num_samples > 1) {
256 if (rasterizer->multisample)
257 dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
258 else
259 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
260
261 dw2 = GEN7_WM_MSDISPMODE_PERPIXEL;
262 }
263 else {
264 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
265
266 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
267 }
268
269 ilo_cp_begin(cp, cmd_len);
270 ilo_cp_write(cp, cmd | (cmd_len - 2));
271 ilo_cp_write(cp, dw1);
272 ilo_cp_write(cp, dw2);
273 ilo_cp_end(cp);
274 }
275
276 static void
277 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
278 int subop,
279 const uint32_t *bufs, const int *sizes,
280 int num_bufs,
281 struct ilo_cp *cp)
282 {
283 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
284 const uint8_t cmd_len = 7;
285 uint32_t dw[6];
286 int total_read_length, i;
287
288 ILO_GPE_VALID_GEN(dev, 7, 7);
289
290 /* VS, HS, DS, GS, and PS variants */
291 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
292
293 assert(num_bufs <= 4);
294
295 dw[0] = 0;
296 dw[1] = 0;
297
298 total_read_length = 0;
299 for (i = 0; i < 4; i++) {
300 int read_len;
301
302 /*
303 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
304 *
305 * "Constant buffers must be enabled in order from Constant Buffer 0
306 * to Constant Buffer 3 within this command. For example, it is
307 * not allowed to enable Constant Buffer 1 by programming a
308 * non-zero value in the VS Constant Buffer 1 Read Length without a
309 * non-zero value in VS Constant Buffer 0 Read Length."
310 */
311 if (i >= num_bufs || !sizes[i]) {
312 for (; i < 4; i++) {
313 assert(i >= num_bufs || !sizes[i]);
314 dw[2 + i] = 0;
315 }
316 break;
317 }
318
319 /* read lengths are in 256-bit units */
320 read_len = (sizes[i] + 31) / 32;
321 /* the lower 5 bits are used for memory object control state */
322 assert(bufs[i] % 32 == 0);
323
324 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
325 dw[2 + i] = bufs[i];
326
327 total_read_length += read_len;
328 }
329
330 /*
331 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
332 *
333 * "The sum of all four read length fields must be less than or equal
334 * to the size of 64"
335 */
336 assert(total_read_length <= 64);
337
338 ilo_cp_begin(cp, cmd_len);
339 ilo_cp_write(cp, cmd | (cmd_len - 2));
340 ilo_cp_write_multi(cp, dw, 6);
341 ilo_cp_end(cp);
342 }
343
344 static void
345 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
346 const uint32_t *bufs, const int *sizes,
347 int num_bufs,
348 struct ilo_cp *cp)
349 {
350 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
351 }
352
353 static void
354 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
355 const uint32_t *bufs, const int *sizes,
356 int num_bufs,
357 struct ilo_cp *cp)
358 {
359 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
360 }
361
362 static void
363 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
364 const uint32_t *bufs, const int *sizes,
365 int num_bufs,
366 struct ilo_cp *cp)
367 {
368 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
369 }
370
371 static void
372 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
373 unsigned sample_mask,
374 int num_samples,
375 struct ilo_cp *cp)
376 {
377 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
378 const uint8_t cmd_len = 2;
379 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
380
381 ILO_GPE_VALID_GEN(dev, 7, 7);
382
383 /*
384 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
385 *
386 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
387 * (Sample Mask) must be zero.
388 *
389 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
390 * must be zero."
391 */
392 sample_mask &= valid_mask;
393
394 ilo_cp_begin(cp, cmd_len);
395 ilo_cp_write(cp, cmd | (cmd_len - 2));
396 ilo_cp_write(cp, sample_mask);
397 ilo_cp_end(cp);
398 }
399
400 static void
401 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
402 const uint32_t *bufs, const int *sizes,
403 int num_bufs,
404 struct ilo_cp *cp)
405 {
406 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
407 }
408
409 static void
410 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
411 const uint32_t *bufs, const int *sizes,
412 int num_bufs,
413 struct ilo_cp *cp)
414 {
415 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
416 }
417
418 static void
419 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
420 const struct ilo_shader *hs,
421 int max_threads, int num_samplers,
422 struct ilo_cp *cp)
423 {
424 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
425 const uint8_t cmd_len = 7;
426 uint32_t dw1, dw2, dw5;
427
428 ILO_GPE_VALID_GEN(dev, 7, 7);
429
430 if (!hs) {
431 ilo_cp_begin(cp, cmd_len);
432 ilo_cp_write(cp, cmd | (cmd_len - 2));
433 ilo_cp_write(cp, 0);
434 ilo_cp_write(cp, 0);
435 ilo_cp_write(cp, 0);
436 ilo_cp_write(cp, 0);
437 ilo_cp_write(cp, 0);
438 ilo_cp_write(cp, 0);
439 ilo_cp_end(cp);
440
441 return;
442 }
443
444 dw1 = (num_samplers + 3) / 4 << 27 |
445 0 << 18 |
446 (max_threads - 1);
447 if (false)
448 dw1 |= 1 << 16;
449
450 dw2 = 1 << 31 | /* HS Enable */
451 1 << 29 | /* HS Statistics Enable */
452 0; /* Instance Count */
453
454 dw5 = hs->in.start_grf << 19 |
455 0 << 11 |
456 0 << 4;
457
458 ilo_cp_begin(cp, cmd_len);
459 ilo_cp_write(cp, cmd | (cmd_len - 2));
460 ilo_cp_write(cp, dw1);
461 ilo_cp_write(cp, dw2);
462 ilo_cp_write(cp, hs->cache_offset);
463 ilo_cp_write(cp, 0);
464 ilo_cp_write(cp, dw5);
465 ilo_cp_write(cp, 0);
466 ilo_cp_end(cp);
467 }
468
469 static void
470 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
471 struct ilo_cp *cp)
472 {
473 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
474 const uint8_t cmd_len = 4;
475
476 ILO_GPE_VALID_GEN(dev, 7, 7);
477
478 ilo_cp_begin(cp, cmd_len);
479 ilo_cp_write(cp, cmd | (cmd_len - 2));
480 ilo_cp_write(cp, 0);
481 ilo_cp_write(cp, 0);
482 ilo_cp_write(cp, 0);
483 ilo_cp_end(cp);
484 }
485
486 static void
487 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
488 const struct ilo_shader *ds,
489 int max_threads, int num_samplers,
490 struct ilo_cp *cp)
491 {
492 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
493 const uint8_t cmd_len = 6;
494 uint32_t dw2, dw4, dw5;
495
496 ILO_GPE_VALID_GEN(dev, 7, 7);
497
498 if (!ds) {
499 ilo_cp_begin(cp, cmd_len);
500 ilo_cp_write(cp, cmd | (cmd_len - 2));
501 ilo_cp_write(cp, 0);
502 ilo_cp_write(cp, 0);
503 ilo_cp_write(cp, 0);
504 ilo_cp_write(cp, 0);
505 ilo_cp_write(cp, 0);
506 ilo_cp_end(cp);
507
508 return;
509 }
510
511 dw2 = (num_samplers + 3) / 4 << 27 |
512 0 << 18 |
513 (max_threads - 1);
514 if (false)
515 dw2 |= 1 << 16;
516
517 dw4 = ds->in.start_grf << 20 |
518 0 << 11 |
519 0 << 4;
520
521 dw5 = (max_threads - 1) << 25 |
522 1 << 10 |
523 1;
524
525 ilo_cp_begin(cp, cmd_len);
526 ilo_cp_write(cp, cmd | (cmd_len - 2));
527 ilo_cp_write(cp, ds->cache_offset);
528 ilo_cp_write(cp, dw2);
529 ilo_cp_write(cp, 0);
530 ilo_cp_write(cp, dw4);
531 ilo_cp_write(cp, dw5);
532 ilo_cp_end(cp);
533 }
534
535 static void
536 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
537 unsigned buffer_mask,
538 int vertex_attrib_count,
539 bool rasterizer_discard,
540 struct ilo_cp *cp)
541 {
542 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
543 const uint8_t cmd_len = 3;
544 const bool enable = (buffer_mask != 0);
545 uint32_t dw1, dw2;
546 int read_len;
547
548 ILO_GPE_VALID_GEN(dev, 7, 7);
549
550 if (!enable) {
551 dw1 = 0 << SO_RENDER_STREAM_SELECT_SHIFT;
552 if (rasterizer_discard)
553 dw1 |= SO_RENDERING_DISABLE;
554
555 dw2 = 0;
556
557 ilo_cp_begin(cp, cmd_len);
558 ilo_cp_write(cp, cmd | (cmd_len - 2));
559 ilo_cp_write(cp, dw1);
560 ilo_cp_write(cp, dw2);
561 ilo_cp_end(cp);
562 return;
563 }
564
565 read_len = (vertex_attrib_count + 1) / 2;
566 if (!read_len)
567 read_len = 1;
568
569 dw1 = SO_FUNCTION_ENABLE |
570 0 << SO_RENDER_STREAM_SELECT_SHIFT |
571 SO_STATISTICS_ENABLE |
572 buffer_mask << 8;
573
574 if (rasterizer_discard)
575 dw1 |= SO_RENDERING_DISABLE;
576
577 /* API_OPENGL */
578 if (true)
579 dw1 |= SO_REORDER_TRAILING;
580
581 dw2 = 0 << SO_STREAM_3_VERTEX_READ_OFFSET_SHIFT |
582 0 << SO_STREAM_3_VERTEX_READ_LENGTH_SHIFT |
583 0 << SO_STREAM_2_VERTEX_READ_OFFSET_SHIFT |
584 0 << SO_STREAM_2_VERTEX_READ_LENGTH_SHIFT |
585 0 << SO_STREAM_1_VERTEX_READ_OFFSET_SHIFT |
586 0 << SO_STREAM_1_VERTEX_READ_LENGTH_SHIFT |
587 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
588 (read_len - 1) << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
589
590 ilo_cp_begin(cp, cmd_len);
591 ilo_cp_write(cp, cmd | (cmd_len - 2));
592 ilo_cp_write(cp, dw1);
593 ilo_cp_write(cp, dw2);
594 ilo_cp_end(cp);
595 }
596
597 static void
598 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
599 const struct pipe_rasterizer_state *rasterizer,
600 const struct ilo_shader *fs,
601 const struct ilo_shader *last_sh,
602 struct ilo_cp *cp)
603 {
604 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
605 const uint8_t cmd_len = 14;
606 uint32_t dw[13];
607
608 ILO_GPE_VALID_GEN(dev, 7, 7);
609
610 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
611 fs, last_sh, dw, Elements(dw));
612
613 ilo_cp_begin(cp, cmd_len);
614 ilo_cp_write(cp, cmd | (cmd_len - 2));
615 ilo_cp_write_multi(cp, dw, 13);
616 ilo_cp_end(cp);
617 }
618
619 static void
620 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
621 const struct ilo_shader *fs,
622 int num_samplers, bool dual_blend,
623 struct ilo_cp *cp)
624 {
625 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
626 const uint8_t cmd_len = 8;
627 uint32_t dw2, dw4, dw5;
628 int max_threads;
629
630 ILO_GPE_VALID_GEN(dev, 7, 7);
631
632 /* see brwCreateContext() */
633 max_threads = (dev->gt == 2) ? 172 : 48;
634
635 if (!fs) {
636 ilo_cp_begin(cp, cmd_len);
637 ilo_cp_write(cp, cmd | (cmd_len - 2));
638 ilo_cp_write(cp, 0);
639 ilo_cp_write(cp, 0);
640 ilo_cp_write(cp, 0);
641 /* GPU hangs if none of the dispatch enable bits is set */
642 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
643 GEN7_PS_8_DISPATCH_ENABLE);
644 ilo_cp_write(cp, 0);
645 ilo_cp_write(cp, 0);
646 ilo_cp_write(cp, 0);
647 ilo_cp_end(cp);
648
649 return;
650 }
651
652 dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
653 0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
654 if (false)
655 dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
656
657 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
658 GEN7_PS_POSOFFSET_NONE;
659
660 if (false)
661 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
662 if (fs->in.count)
663 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
664 if (dual_blend)
665 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
666
667 if (fs->dispatch_16)
668 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
669 else
670 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
671
672 dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
673 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
674 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
675
676 ilo_cp_begin(cp, cmd_len);
677 ilo_cp_write(cp, cmd | (cmd_len - 2));
678 ilo_cp_write(cp, fs->cache_offset);
679 ilo_cp_write(cp, dw2);
680 ilo_cp_write(cp, 0); /* scratch */
681 ilo_cp_write(cp, dw4);
682 ilo_cp_write(cp, dw5);
683 ilo_cp_write(cp, 0); /* kernel 1 */
684 ilo_cp_write(cp, 0); /* kernel 2 */
685 ilo_cp_end(cp);
686 }
687
688 static void
689 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
690 uint32_t sf_clip_viewport,
691 struct ilo_cp *cp)
692 {
693 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
694 }
695
696 static void
697 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
698 uint32_t cc_viewport,
699 struct ilo_cp *cp)
700 {
701 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
702 }
703
704 static void
705 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
706 uint32_t blend_state,
707 struct ilo_cp *cp)
708 {
709 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
710 }
711
712 static void
713 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
714 uint32_t depth_stencil_state,
715 struct ilo_cp *cp)
716 {
717 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
718 }
719
720 static void
721 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
722 uint32_t binding_table,
723 struct ilo_cp *cp)
724 {
725 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
726 }
727
728 static void
729 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
730 uint32_t binding_table,
731 struct ilo_cp *cp)
732 {
733 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
734 }
735
736 static void
737 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
738 uint32_t binding_table,
739 struct ilo_cp *cp)
740 {
741 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
742 }
743
744 static void
745 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
746 uint32_t binding_table,
747 struct ilo_cp *cp)
748 {
749 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
750 }
751
752 static void
753 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
754 uint32_t binding_table,
755 struct ilo_cp *cp)
756 {
757 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
758 }
759
760 static void
761 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
762 uint32_t sampler_state,
763 struct ilo_cp *cp)
764 {
765 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
766 }
767
768 static void
769 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
770 uint32_t sampler_state,
771 struct ilo_cp *cp)
772 {
773 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
774 }
775
776 static void
777 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
778 uint32_t sampler_state,
779 struct ilo_cp *cp)
780 {
781 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
782 }
783
784 static void
785 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
786 uint32_t sampler_state,
787 struct ilo_cp *cp)
788 {
789 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
790 }
791
792 static void
793 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
794 uint32_t sampler_state,
795 struct ilo_cp *cp)
796 {
797 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
798 }
799
800 static void
801 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
802 int subop, int offset, int size,
803 int entry_size,
804 struct ilo_cp *cp)
805 {
806 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
807 const uint8_t cmd_len = 2;
808 const int row_size = 64; /* 512 bits */
809 int alloc_size, num_entries, min_entries, max_entries;
810
811 ILO_GPE_VALID_GEN(dev, 7, 7);
812
813 /* VS, HS, DS, and GS variants */
814 assert(subop >= 0x30 && subop <= 0x33);
815
816 /* in multiples of 8KB */
817 assert(offset % 8192 == 0);
818 offset /= 8192;
819
820 /* in multiple of 512-bit rows */
821 alloc_size = (entry_size + row_size - 1) / row_size;
822 if (!alloc_size)
823 alloc_size = 1;
824
825 /*
826 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
827 *
828 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
829 * cause performance to decrease due to banking in the URB. Element
830 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
831 */
832 if (subop == 0x30 && alloc_size == 5)
833 alloc_size = 6;
834
835 /* in multiples of 8 */
836 num_entries = (size / row_size / alloc_size) & ~7;
837
838 switch (subop) {
839 case 0x30: /* 3DSTATE_URB_VS */
840 min_entries = 32;
841 max_entries = (dev->gt == 2) ? 704 : 512;
842
843 assert(num_entries >= min_entries);
844 if (num_entries > max_entries)
845 num_entries = max_entries;
846 break;
847 case 0x31: /* 3DSTATE_URB_HS */
848 max_entries = (dev->gt == 2) ? 64 : 32;
849 if (num_entries > max_entries)
850 num_entries = max_entries;
851 break;
852 case 0x32: /* 3DSTATE_URB_DS */
853 if (num_entries)
854 assert(num_entries >= 138);
855 break;
856 case 0x33: /* 3DSTATE_URB_GS */
857 max_entries = (dev->gt == 2) ? 320 : 192;
858 if (num_entries > max_entries)
859 num_entries = max_entries;
860 break;
861 default:
862 break;
863 }
864
865 ilo_cp_begin(cp, cmd_len);
866 ilo_cp_write(cp, cmd | (cmd_len - 2));
867 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
868 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
869 num_entries);
870 ilo_cp_end(cp);
871 }
872
873 static void
874 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
875 int offset, int size, int entry_size,
876 struct ilo_cp *cp)
877 {
878 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
879 }
880
881 static void
882 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
883 int offset, int size, int entry_size,
884 struct ilo_cp *cp)
885 {
886 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
887 }
888
889 static void
890 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
891 int offset, int size, int entry_size,
892 struct ilo_cp *cp)
893 {
894 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
895 }
896
897 static void
898 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
899 int offset, int size, int entry_size,
900 struct ilo_cp *cp)
901 {
902 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
903 }
904
905 static void
906 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
907 int subop, int offset, int size,
908 struct ilo_cp *cp)
909 {
910 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
911 const uint8_t cmd_len = 2;
912 int end;
913
914 ILO_GPE_VALID_GEN(dev, 7, 7);
915
916 /* VS, HS, DS, GS, and PS variants */
917 assert(subop >= 0x12 && subop <= 0x16);
918
919 /*
920 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
921 *
922 * "(A table that says the maximum size of each constant buffer is
923 * 16KB")
924 *
925 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
926 *
927 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
928 * may not exceed the maximum value of the Constant Buffer Size."
929 *
930 * Thus, the valid range of buffer end is [0KB, 16KB].
931 */
932 end = (offset + size) / 1024;
933 if (end > 16) {
934 assert(!"invalid constant buffer end");
935 end = 16;
936 }
937
938 /* the valid range of buffer offset is [0KB, 15KB] */
939 offset = (offset + 1023) / 1024;
940 if (offset > 15) {
941 assert(!"invalid constant buffer offset");
942 offset = 15;
943 }
944
945 if (offset > end) {
946 assert(!size);
947 offset = end;
948 }
949
950 /* the valid range of buffer size is [0KB, 15KB] */
951 size = end - offset;
952 if (size > 15) {
953 assert(!"invalid constant buffer size");
954 size = 15;
955 }
956
957 ilo_cp_begin(cp, cmd_len);
958 ilo_cp_write(cp, cmd | (cmd_len - 2));
959 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
960 size);
961 ilo_cp_end(cp);
962 }
963
964 static void
965 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
966 int offset, int size,
967 struct ilo_cp *cp)
968 {
969 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
970 }
971
972 static void
973 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
974 int offset, int size,
975 struct ilo_cp *cp)
976 {
977 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
978 }
979
980 static void
981 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
982 int offset, int size,
983 struct ilo_cp *cp)
984 {
985 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
986 }
987
988 static void
989 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
990 int offset, int size,
991 struct ilo_cp *cp)
992 {
993 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
994 }
995
996 static void
997 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
998 int offset, int size,
999 struct ilo_cp *cp)
1000 {
1001 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
1002 }
1003
1004 static void
1005 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
1006 const struct pipe_stream_output_info *so_info,
1007 const struct ilo_shader *sh,
1008 struct ilo_cp *cp)
1009 {
1010 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
1011 uint16_t cmd_len;
1012 int buffer_selects, num_entries, i;
1013 uint16_t so_decls[128];
1014
1015 ILO_GPE_VALID_GEN(dev, 7, 7);
1016
1017 buffer_selects = 0;
1018 num_entries = 0;
1019
1020 if (so_info) {
1021 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
1022
1023 memset(buffer_offsets, 0, sizeof(buffer_offsets));
1024
1025 for (i = 0; i < so_info->num_outputs; i++) {
1026 unsigned decl, buf, attr, mask;
1027
1028 buf = so_info->output[i].output_buffer;
1029
1030 /* pad with holes */
1031 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
1032 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
1033 int num_dwords;
1034
1035 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
1036 if (num_dwords > 4)
1037 num_dwords = 4;
1038
1039 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT |
1040 SO_DECL_HOLE_FLAG |
1041 ((1 << num_dwords) - 1) << SO_DECL_COMPONENT_MASK_SHIFT;
1042
1043 so_decls[num_entries++] = decl;
1044 buffer_offsets[buf] += num_dwords;
1045 }
1046
1047 /* figure out which attribute is sourced */
1048 for (attr = 0; attr < sh->out.count; attr++) {
1049 const int idx = sh->out.register_indices[attr];
1050 if (idx == so_info->output[i].register_index)
1051 break;
1052 }
1053
1054 decl = buf << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
1055
1056 if (attr < sh->out.count) {
1057 mask = ((1 << so_info->output[i].num_components) - 1) <<
1058 so_info->output[i].start_component;
1059
1060 /* PSIZE is at W channel */
1061 if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
1062 assert(mask == 0x1);
1063 mask = (mask << 3) & 0xf;
1064 }
1065
1066 decl |= attr << SO_DECL_REGISTER_INDEX_SHIFT |
1067 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1068 }
1069 else {
1070 assert(!"stream output an undefined register");
1071 mask = (1 << so_info->output[i].num_components) - 1;
1072 decl |= SO_DECL_HOLE_FLAG |
1073 mask << SO_DECL_COMPONENT_MASK_SHIFT;
1074 }
1075
1076 so_decls[num_entries++] = decl;
1077 buffer_selects |= 1 << buf;
1078 buffer_offsets[buf] += so_info->output[i].num_components;
1079 }
1080 }
1081
1082 /*
1083 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
1084 *
1085 * "Errata: All 128 decls for all four streams must be included
1086 * whenever this command is issued. The "Num Entries [n]" fields still
1087 * contain the actual numbers of valid decls."
1088 *
1089 * Also note that "DWord Length" has 9 bits for this command, and the type
1090 * of cmd_len is thus uint16_t.
1091 */
1092 cmd_len = 2 * 128 + 3;
1093
1094 ilo_cp_begin(cp, cmd_len);
1095 ilo_cp_write(cp, cmd | (cmd_len - 2));
1096 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT |
1097 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1098 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1099 buffer_selects << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT);
1100 ilo_cp_write(cp, 0 << SO_NUM_ENTRIES_3_SHIFT |
1101 0 << SO_NUM_ENTRIES_2_SHIFT |
1102 0 << SO_NUM_ENTRIES_1_SHIFT |
1103 num_entries << SO_NUM_ENTRIES_0_SHIFT);
1104
1105 for (i = 0; i < num_entries; i++) {
1106 ilo_cp_write(cp, so_decls[i]);
1107 ilo_cp_write(cp, 0);
1108 }
1109 for (; i < 128; i++) {
1110 ilo_cp_write(cp, 0);
1111 ilo_cp_write(cp, 0);
1112 }
1113
1114 ilo_cp_end(cp);
1115 }
1116
1117 static void
1118 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1119 int index, int base, int stride,
1120 const struct pipe_stream_output_target *so_target,
1121 struct ilo_cp *cp)
1122 {
1123 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1124 const uint8_t cmd_len = 4;
1125 struct ilo_buffer *buf;
1126 int end;
1127
1128 ILO_GPE_VALID_GEN(dev, 7, 7);
1129
1130 if (!so_target || !so_target->buffer) {
1131 ilo_cp_begin(cp, cmd_len);
1132 ilo_cp_write(cp, cmd | (cmd_len - 2));
1133 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1134 ilo_cp_write(cp, 0);
1135 ilo_cp_write(cp, 0);
1136 ilo_cp_end(cp);
1137 return;
1138 }
1139
1140 buf = ilo_buffer(so_target->buffer);
1141
1142 /* DWord-aligned */
1143 assert(stride % 4 == 0 && base % 4 == 0);
1144 assert(so_target->buffer_offset % 4 == 0);
1145
1146 stride &= ~3;
1147 base = (base + so_target->buffer_offset) & ~3;
1148 end = (base + so_target->buffer_size) & ~3;
1149
1150 ilo_cp_begin(cp, cmd_len);
1151 ilo_cp_write(cp, cmd | (cmd_len - 2));
1152 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT |
1153 stride);
1154 ilo_cp_write_bo(cp, base, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1155 ilo_cp_write_bo(cp, end, buf->bo, INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1156 ilo_cp_end(cp);
1157 }
1158
1159 static void
1160 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1161 const struct pipe_draw_info *info,
1162 bool rectlist,
1163 struct ilo_cp *cp)
1164 {
1165 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1166 const uint8_t cmd_len = 7;
1167 const int prim = (rectlist) ?
1168 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1169 const int vb_access = (info->indexed) ?
1170 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1171 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1172
1173 ILO_GPE_VALID_GEN(dev, 7, 7);
1174
1175 ilo_cp_begin(cp, cmd_len);
1176 ilo_cp_write(cp, cmd | (cmd_len - 2));
1177 ilo_cp_write(cp, vb_access | prim);
1178 ilo_cp_write(cp, info->count);
1179 ilo_cp_write(cp, info->start);
1180 ilo_cp_write(cp, info->instance_count);
1181 ilo_cp_write(cp, info->start_instance);
1182 ilo_cp_write(cp, info->index_bias);
1183 ilo_cp_end(cp);
1184 }
1185
1186 static uint32_t
1187 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1188 const struct ilo_viewport_cso *viewports,
1189 unsigned num_viewports,
1190 struct ilo_cp *cp)
1191 {
1192 const int state_align = 64 / 4;
1193 const int state_len = 16 * num_viewports;
1194 uint32_t state_offset, *dw;
1195 unsigned i;
1196
1197 ILO_GPE_VALID_GEN(dev, 7, 7);
1198
1199 /*
1200 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1201 *
1202 * "The viewport-specific state used by both the SF and CL units
1203 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1204 * of which contains the DWords described below. The start of each
1205 * element is spaced 16 DWords apart. The location of first element of
1206 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1207 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1208 */
1209 assert(num_viewports && num_viewports <= 16);
1210
1211 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1212 state_len, state_align, &state_offset);
1213
1214 for (i = 0; i < num_viewports; i++) {
1215 const struct ilo_viewport_cso *vp = &viewports[i];
1216
1217 dw[0] = fui(vp->m00);
1218 dw[1] = fui(vp->m11);
1219 dw[2] = fui(vp->m22);
1220 dw[3] = fui(vp->m30);
1221 dw[4] = fui(vp->m31);
1222 dw[5] = fui(vp->m32);
1223 dw[6] = 0;
1224 dw[7] = 0;
1225 dw[8] = fui(vp->min_gbx);
1226 dw[9] = fui(vp->max_gbx);
1227 dw[10] = fui(vp->min_gby);
1228 dw[11] = fui(vp->max_gby);
1229 dw[12] = 0;
1230 dw[13] = 0;
1231 dw[14] = 0;
1232 dw[15] = 0;
1233
1234 dw += 16;
1235 }
1236
1237 return state_offset;
1238 }
1239
1240 void
1241 ilo_gpe_init_view_surface_null_gen7(const struct ilo_dev_info *dev,
1242 unsigned width, unsigned height,
1243 unsigned depth, unsigned level,
1244 struct ilo_view_surface *surf)
1245 {
1246 uint32_t *dw;
1247
1248 ILO_GPE_VALID_GEN(dev, 7, 7);
1249
1250 /*
1251 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1252 *
1253 * "A null surface is used in instances where an actual surface is not
1254 * bound. When a write message is generated to a null surface, no
1255 * actual surface is written to. When a read message (including any
1256 * sampling engine message) is generated to a null surface, the result
1257 * is all zeros. Note that a null surface type is allowed to be used
1258 * with all messages, even if it is not specificially indicated as
1259 * supported. All of the remaining fields in surface state are ignored
1260 * for null surfaces, with the following exceptions:
1261 *
1262 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1263 * must match the depth buffer's corresponding state for all render
1264 * target surfaces, including null.
1265 * * All sampling engine and data port messages support null surfaces
1266 * with the above behavior, even if not mentioned as specifically
1267 * supported, except for the following:
1268 * * Data Port Media Block Read/Write messages.
1269 * * The Surface Type of a surface used as a render target (accessed
1270 * via the Data Port's Render Target Write message) must be the same
1271 * as the Surface Type of all other render targets and of the depth
1272 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1273 * buffer or render targets are SURFTYPE_NULL."
1274 *
1275 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1276 *
1277 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1278 * true"
1279 */
1280
1281 STATIC_ASSERT(Elements(surf->payload) >= 8);
1282 dw = surf->payload;
1283
1284 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1285 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1286 BRW_SURFACE_TILED << 13;
1287
1288 dw[1] = 0;
1289
1290 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1291 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1292
1293 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1294
1295 dw[4] = 0;
1296 dw[5] = level;
1297
1298 dw[6] = 0;
1299 dw[7] = 0;
1300
1301 surf->bo = NULL;
1302 }
1303
1304 void
1305 ilo_gpe_init_view_surface_for_buffer_gen7(const struct ilo_dev_info *dev,
1306 const struct ilo_buffer *buf,
1307 unsigned offset, unsigned size,
1308 unsigned struct_size,
1309 enum pipe_format elem_format,
1310 bool is_rt, bool render_cache_rw,
1311 struct ilo_view_surface *surf)
1312 {
1313 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1314 const bool structured = (!typed && struct_size > 1);
1315 const int elem_size = (typed) ?
1316 util_format_get_blocksize(elem_format) : 1;
1317 int width, height, depth, pitch;
1318 int surface_type, surface_format, num_entries;
1319 uint32_t *dw;
1320
1321 ILO_GPE_VALID_GEN(dev, 7, 7);
1322
1323 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1324
1325 surface_format = (typed) ?
1326 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1327
1328 num_entries = size / struct_size;
1329 /* see if there is enough space to fit another element */
1330 if (size % struct_size >= elem_size && !structured)
1331 num_entries++;
1332
1333 /*
1334 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1335 *
1336 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1337 * Address) specifies the base address of first element of the
1338 * surface. The surface is interpreted as a simple array of that
1339 * single element type. The address must be naturally-aligned to the
1340 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1341 * must be 16-byte aligned)
1342 *
1343 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1344 * the base address of the first element of the surface, computed in
1345 * software by adding the surface base address to the byte offset of
1346 * the element in the buffer."
1347 */
1348 if (is_rt)
1349 assert(offset % elem_size == 0);
1350
1351 /*
1352 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1353 *
1354 * "For typed buffer and structured buffer surfaces, the number of
1355 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1356 * surfaces, the number of entries in the buffer is the number of
1357 * bytes which can range from 1 to 2^30."
1358 */
1359 assert(num_entries >= 1 &&
1360 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1361
1362 /*
1363 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1364 *
1365 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1366 * 11 if the Surface Format is RAW (the size of the buffer must be a
1367 * multiple of 4 bytes)."
1368 *
1369 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1370 *
1371 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1372 * field (Surface Pitch) indicates the size of the structure."
1373 *
1374 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1375 * must be a multiple of 4 bytes."
1376 */
1377 if (structured)
1378 assert(struct_size % 4 == 0);
1379 else if (!typed)
1380 assert(num_entries % 4 == 0);
1381
1382 pitch = struct_size;
1383
1384 pitch--;
1385 num_entries--;
1386 /* bits [6:0] */
1387 width = (num_entries & 0x0000007f);
1388 /* bits [20:7] */
1389 height = (num_entries & 0x001fff80) >> 7;
1390 /* bits [30:21] */
1391 depth = (num_entries & 0x7fe00000) >> 21;
1392 /* limit to [26:21] */
1393 if (typed || structured)
1394 depth &= 0x3f;
1395
1396 STATIC_ASSERT(Elements(surf->payload) >= 8);
1397 dw = surf->payload;
1398
1399 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1400 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1401 if (render_cache_rw)
1402 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1403
1404 dw[1] = offset;
1405
1406 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1407 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1408
1409 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1410 pitch;
1411
1412 dw[4] = 0;
1413 dw[5] = 0;
1414
1415 dw[6] = 0;
1416 dw[7] = 0;
1417
1418 /* do not increment reference count */
1419 surf->bo = buf->bo;
1420 }
1421
1422 void
1423 ilo_gpe_init_view_surface_for_texture_gen7(const struct ilo_dev_info *dev,
1424 const struct ilo_texture *tex,
1425 enum pipe_format format,
1426 unsigned first_level,
1427 unsigned num_levels,
1428 unsigned first_layer,
1429 unsigned num_layers,
1430 bool is_rt, bool render_cache_rw,
1431 struct ilo_view_surface *surf)
1432 {
1433 int surface_type, surface_format;
1434 int width, height, depth, pitch, lod;
1435 unsigned layer_offset, x_offset, y_offset;
1436 uint32_t *dw;
1437
1438 ILO_GPE_VALID_GEN(dev, 7, 7);
1439
1440 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1441 assert(surface_type != BRW_SURFACE_BUFFER);
1442
1443 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1444 format = PIPE_FORMAT_Z32_FLOAT;
1445
1446 if (is_rt)
1447 surface_format = ilo_translate_render_format(format);
1448 else
1449 surface_format = ilo_translate_texture_format(format);
1450 assert(surface_format >= 0);
1451
1452 width = tex->base.width0;
1453 height = tex->base.height0;
1454 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1455 tex->base.depth0 : num_layers;
1456 pitch = tex->bo_stride;
1457
1458 if (surface_type == BRW_SURFACE_CUBE) {
1459 /*
1460 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1461 *
1462 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
1463 * this field is [0,340], indicating the number of cube array
1464 * elements (equal to the number of underlying 2D array elements
1465 * divided by 6). For other surfaces, this field must be zero."
1466 *
1467 * When is_rt is true, we treat the texture as a 2D one to avoid the
1468 * restriction.
1469 */
1470 if (is_rt) {
1471 surface_type = BRW_SURFACE_2D;
1472 }
1473 else {
1474 assert(num_layers % 6 == 0);
1475 depth = num_layers / 6;
1476 }
1477 }
1478
1479 /* sanity check the size */
1480 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1481 assert(first_layer < 2048 && num_layers <= 2048);
1482 switch (surface_type) {
1483 case BRW_SURFACE_1D:
1484 assert(width <= 16384 && height == 1 && depth <= 2048);
1485 break;
1486 case BRW_SURFACE_2D:
1487 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1488 break;
1489 case BRW_SURFACE_3D:
1490 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1491 if (!is_rt)
1492 assert(first_layer == 0);
1493 break;
1494 case BRW_SURFACE_CUBE:
1495 assert(width <= 16384 && height <= 16384 && depth <= 86);
1496 assert(width == height);
1497 if (is_rt)
1498 assert(first_layer == 0);
1499 break;
1500 default:
1501 assert(!"unexpected surface type");
1502 break;
1503 }
1504
1505 if (is_rt) {
1506 /*
1507 * Compute the offset to the layer manually.
1508 *
1509 * For rendering, the hardware requires LOD to be the same for all
1510 * render targets and the depth buffer. We need to compute the offset
1511 * to the layer manually and always set LOD to 0.
1512 */
1513 if (true) {
1514 /* we lose the capability for layered rendering */
1515 assert(num_layers == 1);
1516
1517 layer_offset = ilo_texture_get_slice_offset(tex,
1518 first_level, first_layer, &x_offset, &y_offset);
1519
1520 assert(x_offset % 4 == 0);
1521 assert(y_offset % 2 == 0);
1522 x_offset /= 4;
1523 y_offset /= 2;
1524
1525 /* derive the size for the LOD */
1526 width = u_minify(width, first_level);
1527 height = u_minify(height, first_level);
1528 if (surface_type == BRW_SURFACE_3D)
1529 depth = u_minify(depth, first_level);
1530 else
1531 depth = 1;
1532
1533 first_level = 0;
1534 first_layer = 0;
1535 lod = 0;
1536 }
1537 else {
1538 layer_offset = 0;
1539 x_offset = 0;
1540 y_offset = 0;
1541 }
1542
1543 assert(num_levels == 1);
1544 lod = first_level;
1545 }
1546 else {
1547 layer_offset = 0;
1548 x_offset = 0;
1549 y_offset = 0;
1550
1551 lod = num_levels - 1;
1552 }
1553
1554 /*
1555 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1556 *
1557 * "The Base Address for linear render target surfaces and surfaces
1558 * accessed with the typed surface read/write data port messages must
1559 * be element-size aligned, for non-YUV surface formats, or a multiple
1560 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1561 * have no alignment requirements (byte alignment is sufficient)."
1562 *
1563 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1564 *
1565 * "For linear render target surfaces and surfaces accessed with the
1566 * typed data port messages, the pitch must be a multiple of the
1567 * element size for non-YUV surface formats. Pitch must be a multiple
1568 * of 2 * element size for YUV surface formats. For linear surfaces
1569 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1570 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1571 * of bytes."
1572 *
1573 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1574 *
1575 * "For linear surfaces, this field (X Offset) must be zero."
1576 */
1577 if (tex->tiling == INTEL_TILING_NONE) {
1578 if (is_rt) {
1579 const int elem_size = util_format_get_blocksize(format);
1580 assert(layer_offset % elem_size == 0);
1581 assert(pitch % elem_size == 0);
1582 }
1583
1584 assert(!x_offset);
1585 }
1586
1587 STATIC_ASSERT(Elements(surf->payload) >= 8);
1588 dw = surf->payload;
1589
1590 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1591 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1592 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling) << 13;
1593
1594 if (surface_type != BRW_SURFACE_3D && depth > 1)
1595 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1596
1597 if (tex->valign_4)
1598 dw[0] |= GEN7_SURFACE_VALIGN_4;
1599
1600 if (tex->halign_8)
1601 dw[0] |= GEN7_SURFACE_HALIGN_8;
1602
1603 if (tex->array_spacing_full)
1604 dw[0] |= GEN7_SURFACE_ARYSPC_FULL;
1605 else
1606 dw[0] |= GEN7_SURFACE_ARYSPC_LOD0;
1607
1608 if (render_cache_rw)
1609 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1610
1611 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1612 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1613
1614 dw[1] = layer_offset;
1615
1616 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1617 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1618
1619 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1620 (pitch - 1);
1621
1622 dw[4] = first_layer << 18 |
1623 (num_layers - 1) << 7;
1624
1625 /*
1626 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1627 * means the samples are interleaved. The layouts are the same when the
1628 * number of samples is 1.
1629 */
1630 if (tex->interleaved && tex->base.nr_samples > 1) {
1631 assert(!is_rt);
1632 dw[4] |= GEN7_SURFACE_MSFMT_DEPTH_STENCIL;
1633 }
1634 else {
1635 dw[4] |= GEN7_SURFACE_MSFMT_MSS;
1636 }
1637
1638 if (tex->base.nr_samples > 4)
1639 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1640 else if (tex->base.nr_samples > 2)
1641 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1642 else
1643 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1644
1645 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1646 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1647 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1648 lod;
1649
1650 dw[6] = 0;
1651 dw[7] = 0;
1652
1653 /* do not increment reference count */
1654 surf->bo = tex->bo;
1655 }
1656
1657 static int
1658 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1659 enum ilo_gpe_gen7_command cmd,
1660 int arg)
1661 {
1662 static const struct {
1663 int header;
1664 int body;
1665 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1666 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1667 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1668 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1669 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1670 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1671 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1672 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1673 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1674 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1675 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1676 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1677 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1678 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1679 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1680 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1681 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1682 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1683 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1684 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1685 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1686 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1687 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1688 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1689 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1690 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1691 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1692 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1693 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1694 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1695 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1696 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1697 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1698 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1699 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1700 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1701 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1702 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1703 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1704 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1705 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1706 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1707 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1708 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1709 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1710 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1711 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1712 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1713 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1714 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1715 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1716 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1717 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1718 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1719 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1720 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1721 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1722 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1723 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1724 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1725 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1726 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1727 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1728 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1729 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1730 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1731 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1732 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1733 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1734 };
1735 const int header = gen7_command_size_table[cmd].header;
1736 const int body = gen7_command_size_table[cmd].body;
1737 const int count = arg;
1738
1739 ILO_GPE_VALID_GEN(dev, 7, 7);
1740 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1741
1742 return (likely(count)) ? header + body * count : 0;
1743 }
1744
1745 static int
1746 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1747 enum ilo_gpe_gen7_state state,
1748 int arg)
1749 {
1750 static const struct {
1751 int alignment;
1752 int body;
1753 bool is_array;
1754 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1755 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1756 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1757 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1758 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1759 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1760 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1761 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1762 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1763 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1764 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1765 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1766 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1767 };
1768 const int alignment = gen7_state_size_table[state].alignment;
1769 const int body = gen7_state_size_table[state].body;
1770 const bool is_array = gen7_state_size_table[state].is_array;
1771 const int count = arg;
1772 int estimate;
1773
1774 ILO_GPE_VALID_GEN(dev, 7, 7);
1775 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1776
1777 if (likely(count)) {
1778 if (is_array) {
1779 estimate = (alignment - 1) + body * count;
1780 }
1781 else {
1782 estimate = (alignment - 1) + body;
1783 /* all states are aligned */
1784 if (count > 1)
1785 estimate += util_align_npot(body, alignment) * (count - 1);
1786 }
1787 }
1788 else {
1789 estimate = 0;
1790 }
1791
1792 return estimate;
1793 }
1794
1795 static void
1796 gen7_init(struct ilo_gpe_gen7 *gen7)
1797 {
1798 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1799
1800 gen7->estimate_command_size = gen7_estimate_command_size;
1801 gen7->estimate_state_size = gen7_estimate_state_size;
1802
1803 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1804 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1805 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1806 GEN7_USE(gen7, STATE_SIP, gen6);
1807 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1808 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1809 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1810 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1811 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1812 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1813 GEN7_SET(gen7, GPGPU_WALKER);
1814 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1815 GEN7_USE(gen7, 3DSTATE_DEPTH_BUFFER, gen6);
1816 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1817 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1818 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1819 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1820 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1821 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1822 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1823 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1824 GEN7_SET(gen7, 3DSTATE_GS);
1825 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1826 GEN7_SET(gen7, 3DSTATE_SF);
1827 GEN7_SET(gen7, 3DSTATE_WM);
1828 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1829 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1830 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1831 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1832 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1833 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1834 GEN7_SET(gen7, 3DSTATE_HS);
1835 GEN7_SET(gen7, 3DSTATE_TE);
1836 GEN7_SET(gen7, 3DSTATE_DS);
1837 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1838 GEN7_SET(gen7, 3DSTATE_SBE);
1839 GEN7_SET(gen7, 3DSTATE_PS);
1840 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1841 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1842 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1843 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1844 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1845 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1846 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1847 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1848 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1849 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1850 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1851 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1852 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1853 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1854 GEN7_SET(gen7, 3DSTATE_URB_VS);
1855 GEN7_SET(gen7, 3DSTATE_URB_HS);
1856 GEN7_SET(gen7, 3DSTATE_URB_DS);
1857 GEN7_SET(gen7, 3DSTATE_URB_GS);
1858 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1859 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1860 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1861 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1862 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1863 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1864 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1865 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1866 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1867 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1868 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1869 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1870 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1871 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1872 GEN7_SET(gen7, 3DPRIMITIVE);
1873 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1874 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1875 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1876 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1877 GEN7_USE(gen7, BLEND_STATE, gen6);
1878 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1879 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1880 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1881 GEN7_USE(gen7, SURFACE_STATE, gen6);
1882 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1883 GEN7_USE(gen7, SAMPLER_BORDER_COLOR_STATE, gen6);
1884 GEN7_USE(gen7, push_constant_buffer, gen6);
1885 #undef GEN7_USE
1886 #undef GEN7_SET
1887 }
1888
1889 static struct ilo_gpe_gen7 gen7_gpe;
1890
1891 const struct ilo_gpe_gen7 *
1892 ilo_gpe_gen7_get(void)
1893 {
1894 if (!gen7_gpe.estimate_command_size)
1895 gen7_init(&gen7_gpe);
1896
1897 return &gen7_gpe;
1898 }