613933290a5ab643778a02fb551123788f209d64
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "brw_defines.h"
29 #include "intel_reg.h"
30
31 #include "ilo_cp.h"
32 #include "ilo_format.h"
33 #include "ilo_resource.h"
34 #include "ilo_shader.h"
35 #include "ilo_gpe_gen7.h"
36
37 static void
38 gen7_emit_GPGPU_WALKER(const struct ilo_dev_info *dev,
39 struct ilo_cp *cp)
40 {
41 assert(!"GPGPU_WALKER unsupported");
42 }
43
44 static void
45 gen7_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
46 uint32_t clear_val,
47 struct ilo_cp *cp)
48 {
49 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x04);
50 const uint8_t cmd_len = 3;
51
52 ILO_GPE_VALID_GEN(dev, 7, 7);
53
54 ilo_cp_begin(cp, cmd_len);
55 ilo_cp_write(cp, cmd | (cmd_len - 2));
56 ilo_cp_write(cp, clear_val);
57 ilo_cp_write(cp, 1);
58 ilo_cp_end(cp);
59 }
60
61 static void
62 gen7_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
63 const struct pipe_surface *surface,
64 const struct pipe_depth_stencil_alpha_state *dsa,
65 bool hiz,
66 struct ilo_cp *cp)
67 {
68 ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(dev, surface, dsa, hiz, cp);
69 }
70
71 static void
72 gen7_emit_3dstate_pointer(const struct ilo_dev_info *dev,
73 int subop, uint32_t pointer,
74 struct ilo_cp *cp)
75 {
76 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
77 const uint8_t cmd_len = 2;
78
79 ILO_GPE_VALID_GEN(dev, 7, 7);
80
81 ilo_cp_begin(cp, cmd_len);
82 ilo_cp_write(cp, cmd | (cmd_len - 2));
83 ilo_cp_write(cp, pointer);
84 ilo_cp_end(cp);
85 }
86
87 static void
88 gen7_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
89 uint32_t color_calc_state,
90 struct ilo_cp *cp)
91 {
92 gen7_emit_3dstate_pointer(dev, 0x0e, color_calc_state, cp);
93 }
94
95 static void
96 gen7_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
97 const struct ilo_shader *gs,
98 int num_samplers,
99 struct ilo_cp *cp)
100 {
101 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
102 const uint8_t cmd_len = 7;
103 uint32_t dw2, dw4, dw5;
104 int max_threads;
105
106 ILO_GPE_VALID_GEN(dev, 7, 7);
107
108 switch (dev->gen) {
109 case ILO_GEN(7):
110 max_threads = (dev->gt == 2) ? 128 : 36;
111 break;
112 default:
113 max_threads = 1;
114 break;
115 }
116
117 if (!gs) {
118 ilo_cp_begin(cp, cmd_len);
119 ilo_cp_write(cp, cmd | (cmd_len - 2));
120 ilo_cp_write(cp, 0);
121 ilo_cp_write(cp, 0);
122 ilo_cp_write(cp, 0);
123 ilo_cp_write(cp, 0);
124 ilo_cp_write(cp, GEN6_GS_STATISTICS_ENABLE);
125 ilo_cp_write(cp, 0);
126 ilo_cp_end(cp);
127 return;
128 }
129
130 dw2 = ((num_samplers + 3) / 4) << GEN6_GS_SAMPLER_COUNT_SHIFT;
131
132 dw4 = ((gs->in.count + 1) / 2) << GEN6_GS_URB_READ_LENGTH_SHIFT |
133 GEN7_GS_INCLUDE_VERTEX_HANDLES |
134 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
135 gs->in.start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
136
137 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
138 GEN6_GS_STATISTICS_ENABLE |
139 GEN6_GS_ENABLE;
140
141 ilo_cp_begin(cp, cmd_len);
142 ilo_cp_write(cp, cmd | (cmd_len - 2));
143 ilo_cp_write(cp, gs->cache_offset);
144 ilo_cp_write(cp, dw2);
145 ilo_cp_write(cp, 0); /* scratch */
146 ilo_cp_write(cp, dw4);
147 ilo_cp_write(cp, dw5);
148 ilo_cp_write(cp, 0);
149 ilo_cp_end(cp);
150 }
151
152 static void
153 gen7_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
154 const struct pipe_rasterizer_state *rasterizer,
155 const struct pipe_surface *zs_surf,
156 struct ilo_cp *cp)
157 {
158 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
159 const uint8_t cmd_len = 7;
160 uint32_t dw[6];
161
162 ILO_GPE_VALID_GEN(dev, 7, 7);
163
164 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
165 1, (zs_surf) ? zs_surf->format : PIPE_FORMAT_NONE, true,
166 dw, Elements(dw));
167
168 ilo_cp_begin(cp, cmd_len);
169 ilo_cp_write(cp, cmd | (cmd_len - 2));
170 ilo_cp_write_multi(cp, dw, 6);
171 ilo_cp_end(cp);
172 }
173
174 static void
175 gen7_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
176 const struct ilo_shader *fs,
177 const struct pipe_rasterizer_state *rasterizer,
178 bool cc_may_kill,
179 struct ilo_cp *cp)
180 {
181 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
182 const uint8_t cmd_len = 3;
183 const int num_samples = 1;
184 uint32_t dw1, dw2;
185
186 ILO_GPE_VALID_GEN(dev, 7, 7);
187
188 dw1 = GEN7_WM_STATISTICS_ENABLE |
189 GEN7_WM_LINE_AA_WIDTH_2_0;
190
191 if (false) {
192 dw1 |= GEN7_WM_DEPTH_CLEAR;
193 dw1 |= GEN7_WM_DEPTH_RESOLVE;
194 dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE;
195 }
196
197 if (fs) {
198 /*
199 * Set this bit if
200 *
201 * a) fs writes colors and color is not masked, or
202 * b) fs writes depth, or
203 * c) fs or cc kills
204 */
205 dw1 |= GEN7_WM_DISPATCH_ENABLE;
206
207 /*
208 * From the Ivy Bridge PRM, volume 2 part 1, page 278:
209 *
210 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
211 * the PS kernel or color calculator has the ability to kill
212 * (discard) pixels or samples, other than due to depth or stencil
213 * testing. This bit is required to be ENABLED in the following
214 * situations:
215 *
216 * - The API pixel shader program contains "killpix" or "discard"
217 * instructions, or other code in the pixel shader kernel that
218 * can cause the final pixel mask to differ from the pixel mask
219 * received on dispatch.
220 *
221 * - A sampler with chroma key enabled with kill pixel mode is used
222 * by the pixel shader.
223 *
224 * - Any render target has Alpha Test Enable or AlphaToCoverage
225 * Enable enabled.
226 *
227 * - The pixel shader kernel generates and outputs oMask.
228 *
229 * Note: As ClipDistance clipping is fully supported in hardware
230 * and therefore not via PS instructions, there should be no need
231 * to ENABLE this bit due to ClipDistance clipping."
232 */
233 if (fs->has_kill || cc_may_kill)
234 dw1 |= GEN7_WM_KILL_ENABLE;
235
236 if (fs->out.has_pos)
237 dw1 |= GEN7_WM_PSCDEPTH_ON;
238 if (fs->in.has_pos)
239 dw1 |= GEN7_WM_USES_SOURCE_DEPTH | GEN7_WM_USES_SOURCE_W;
240
241 dw1 |= fs->in.barycentric_interpolation_mode <<
242 GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
243 }
244 else if (cc_may_kill) {
245 dw1 |= GEN7_WM_DISPATCH_ENABLE |
246 GEN7_WM_KILL_ENABLE;
247 }
248
249 dw1 |= GEN7_WM_POSITION_ZW_PIXEL;
250
251 /* same value as in 3DSTATE_SF */
252 if (rasterizer->line_smooth)
253 dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_1_0;
254
255 if (rasterizer->poly_stipple_enable)
256 dw1 |= GEN7_WM_POLYGON_STIPPLE_ENABLE;
257 if (rasterizer->line_stipple_enable)
258 dw1 |= GEN7_WM_LINE_STIPPLE_ENABLE;
259
260 if (rasterizer->bottom_edge_rule)
261 dw1 |= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
262
263 if (num_samples > 1) {
264 if (rasterizer->multisample)
265 dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
266 else
267 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
268
269 dw2 = GEN7_WM_MSDISPMODE_PERPIXEL;
270 }
271 else {
272 dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
273
274 dw2 = GEN7_WM_MSDISPMODE_PERSAMPLE;
275 }
276
277 ilo_cp_begin(cp, cmd_len);
278 ilo_cp_write(cp, cmd | (cmd_len - 2));
279 ilo_cp_write(cp, dw1);
280 ilo_cp_write(cp, dw2);
281 ilo_cp_end(cp);
282 }
283
284 static void
285 gen7_emit_3dstate_constant(const struct ilo_dev_info *dev,
286 int subop,
287 const uint32_t *bufs, const int *sizes,
288 int num_bufs,
289 struct ilo_cp *cp)
290 {
291 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
292 const uint8_t cmd_len = 7;
293 uint32_t dw[6];
294 int total_read_length, i;
295
296 ILO_GPE_VALID_GEN(dev, 7, 7);
297
298 /* VS, HS, DS, GS, and PS variants */
299 assert(subop >= 0x15 && subop <= 0x1a && subop != 0x18);
300
301 assert(num_bufs <= 4);
302
303 dw[0] = 0;
304 dw[1] = 0;
305
306 total_read_length = 0;
307 for (i = 0; i < 4; i++) {
308 int read_len;
309
310 /*
311 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
312 *
313 * "Constant buffers must be enabled in order from Constant Buffer 0
314 * to Constant Buffer 3 within this command. For example, it is
315 * not allowed to enable Constant Buffer 1 by programming a
316 * non-zero value in the VS Constant Buffer 1 Read Length without a
317 * non-zero value in VS Constant Buffer 0 Read Length."
318 */
319 if (i >= num_bufs || !sizes[i]) {
320 for (; i < 4; i++) {
321 assert(i >= num_bufs || !sizes[i]);
322 dw[2 + i] = 0;
323 }
324 break;
325 }
326
327 /* read lengths are in 256-bit units */
328 read_len = (sizes[i] + 31) / 32;
329 /* the lower 5 bits are used for memory object control state */
330 assert(bufs[i] % 32 == 0);
331
332 dw[i / 2] |= read_len << ((i % 2) ? 16 : 0);
333 dw[2 + i] = bufs[i];
334
335 total_read_length += read_len;
336 }
337
338 /*
339 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
340 *
341 * "The sum of all four read length fields must be less than or equal
342 * to the size of 64"
343 */
344 assert(total_read_length <= 64);
345
346 ilo_cp_begin(cp, cmd_len);
347 ilo_cp_write(cp, cmd | (cmd_len - 2));
348 ilo_cp_write_multi(cp, dw, 6);
349 ilo_cp_end(cp);
350 }
351
352 static void
353 gen7_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
354 const uint32_t *bufs, const int *sizes,
355 int num_bufs,
356 struct ilo_cp *cp)
357 {
358 gen7_emit_3dstate_constant(dev, 0x15, bufs, sizes, num_bufs, cp);
359 }
360
361 static void
362 gen7_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
363 const uint32_t *bufs, const int *sizes,
364 int num_bufs,
365 struct ilo_cp *cp)
366 {
367 gen7_emit_3dstate_constant(dev, 0x16, bufs, sizes, num_bufs, cp);
368 }
369
370 static void
371 gen7_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
372 const uint32_t *bufs, const int *sizes,
373 int num_bufs,
374 struct ilo_cp *cp)
375 {
376 gen7_emit_3dstate_constant(dev, 0x17, bufs, sizes, num_bufs, cp);
377 }
378
379 static void
380 gen7_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
381 unsigned sample_mask,
382 int num_samples,
383 struct ilo_cp *cp)
384 {
385 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
386 const uint8_t cmd_len = 2;
387 const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1;
388
389 ILO_GPE_VALID_GEN(dev, 7, 7);
390
391 /*
392 * From the Ivy Bridge PRM, volume 2 part 1, page 294:
393 *
394 * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field
395 * (Sample Mask) must be zero.
396 *
397 * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field
398 * must be zero."
399 */
400 sample_mask &= valid_mask;
401
402 ilo_cp_begin(cp, cmd_len);
403 ilo_cp_write(cp, cmd | (cmd_len - 2));
404 ilo_cp_write(cp, sample_mask);
405 ilo_cp_end(cp);
406 }
407
408 static void
409 gen7_emit_3DSTATE_CONSTANT_HS(const struct ilo_dev_info *dev,
410 const uint32_t *bufs, const int *sizes,
411 int num_bufs,
412 struct ilo_cp *cp)
413 {
414 gen7_emit_3dstate_constant(dev, 0x19, bufs, sizes, num_bufs, cp);
415 }
416
417 static void
418 gen7_emit_3DSTATE_CONSTANT_DS(const struct ilo_dev_info *dev,
419 const uint32_t *bufs, const int *sizes,
420 int num_bufs,
421 struct ilo_cp *cp)
422 {
423 gen7_emit_3dstate_constant(dev, 0x1a, bufs, sizes, num_bufs, cp);
424 }
425
426 static void
427 gen7_emit_3DSTATE_HS(const struct ilo_dev_info *dev,
428 const struct ilo_shader *hs,
429 int max_threads, int num_samplers,
430 struct ilo_cp *cp)
431 {
432 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1b);
433 const uint8_t cmd_len = 7;
434 uint32_t dw1, dw2, dw5;
435
436 ILO_GPE_VALID_GEN(dev, 7, 7);
437
438 if (!hs) {
439 ilo_cp_begin(cp, cmd_len);
440 ilo_cp_write(cp, cmd | (cmd_len - 2));
441 ilo_cp_write(cp, 0);
442 ilo_cp_write(cp, 0);
443 ilo_cp_write(cp, 0);
444 ilo_cp_write(cp, 0);
445 ilo_cp_write(cp, 0);
446 ilo_cp_write(cp, 0);
447 ilo_cp_end(cp);
448
449 return;
450 }
451
452 dw1 = (num_samplers + 3) / 4 << 27 |
453 0 << 18 |
454 (max_threads - 1);
455 if (false)
456 dw1 |= 1 << 16;
457
458 dw2 = 1 << 31 | /* HS Enable */
459 1 << 29 | /* HS Statistics Enable */
460 0; /* Instance Count */
461
462 dw5 = hs->in.start_grf << 19 |
463 0 << 11 |
464 0 << 4;
465
466 ilo_cp_begin(cp, cmd_len);
467 ilo_cp_write(cp, cmd | (cmd_len - 2));
468 ilo_cp_write(cp, dw1);
469 ilo_cp_write(cp, dw2);
470 ilo_cp_write(cp, hs->cache_offset);
471 ilo_cp_write(cp, 0);
472 ilo_cp_write(cp, dw5);
473 ilo_cp_write(cp, 0);
474 ilo_cp_end(cp);
475 }
476
477 static void
478 gen7_emit_3DSTATE_TE(const struct ilo_dev_info *dev,
479 struct ilo_cp *cp)
480 {
481 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1c);
482 const uint8_t cmd_len = 4;
483
484 ILO_GPE_VALID_GEN(dev, 7, 7);
485
486 ilo_cp_begin(cp, cmd_len);
487 ilo_cp_write(cp, cmd | (cmd_len - 2));
488 ilo_cp_write(cp, 0);
489 ilo_cp_write(cp, 0);
490 ilo_cp_write(cp, 0);
491 ilo_cp_end(cp);
492 }
493
494 static void
495 gen7_emit_3DSTATE_DS(const struct ilo_dev_info *dev,
496 const struct ilo_shader *ds,
497 int max_threads, int num_samplers,
498 struct ilo_cp *cp)
499 {
500 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1d);
501 const uint8_t cmd_len = 6;
502 uint32_t dw2, dw4, dw5;
503
504 ILO_GPE_VALID_GEN(dev, 7, 7);
505
506 if (!ds) {
507 ilo_cp_begin(cp, cmd_len);
508 ilo_cp_write(cp, cmd | (cmd_len - 2));
509 ilo_cp_write(cp, 0);
510 ilo_cp_write(cp, 0);
511 ilo_cp_write(cp, 0);
512 ilo_cp_write(cp, 0);
513 ilo_cp_write(cp, 0);
514 ilo_cp_end(cp);
515
516 return;
517 }
518
519 dw2 = (num_samplers + 3) / 4 << 27 |
520 0 << 18 |
521 (max_threads - 1);
522 if (false)
523 dw2 |= 1 << 16;
524
525 dw4 = ds->in.start_grf << 20 |
526 0 << 11 |
527 0 << 4;
528
529 dw5 = (max_threads - 1) << 25 |
530 1 << 10 |
531 1;
532
533 ilo_cp_begin(cp, cmd_len);
534 ilo_cp_write(cp, cmd | (cmd_len - 2));
535 ilo_cp_write(cp, ds->cache_offset);
536 ilo_cp_write(cp, dw2);
537 ilo_cp_write(cp, 0);
538 ilo_cp_write(cp, dw4);
539 ilo_cp_write(cp, dw5);
540 ilo_cp_end(cp);
541 }
542
543 static void
544 gen7_emit_3DSTATE_STREAMOUT(const struct ilo_dev_info *dev,
545 bool enable,
546 bool rasterizer_discard,
547 bool flatshade_first,
548 struct ilo_cp *cp)
549 {
550 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1e);
551 const uint8_t cmd_len = 3;
552 uint32_t dw1, dw2;
553 int i;
554
555 ILO_GPE_VALID_GEN(dev, 7, 7);
556
557 if (!enable) {
558 ilo_cp_begin(cp, cmd_len);
559 ilo_cp_write(cp, cmd | (cmd_len - 2));
560 ilo_cp_write(cp, (rasterizer_discard) ? SO_RENDERING_DISABLE : 0);
561 ilo_cp_write(cp, 0);
562 ilo_cp_end(cp);
563 return;
564 }
565
566 dw1 = SO_FUNCTION_ENABLE |
567 SO_STATISTICS_ENABLE;
568 if (rasterizer_discard)
569 dw1 |= SO_RENDERING_DISABLE;
570 if (!flatshade_first)
571 dw1 |= SO_REORDER_TRAILING;
572 for (i = 0; i < 4; i++)
573 dw1 |= SO_BUFFER_ENABLE(i);
574
575 dw2 = 0 << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT |
576 0 << SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
577
578 ilo_cp_begin(cp, cmd_len);
579 ilo_cp_write(cp, cmd | (cmd_len - 2));
580 ilo_cp_write(cp, dw1);
581 ilo_cp_write(cp, dw2);
582 ilo_cp_end(cp);
583 }
584
585 static void
586 gen7_emit_3DSTATE_SBE(const struct ilo_dev_info *dev,
587 const struct pipe_rasterizer_state *rasterizer,
588 const struct ilo_shader *fs,
589 const struct ilo_shader *last_sh,
590 struct ilo_cp *cp)
591 {
592 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x1f);
593 const uint8_t cmd_len = 14;
594 uint32_t dw[13];
595
596 ILO_GPE_VALID_GEN(dev, 7, 7);
597
598 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
599 fs, last_sh, dw, Elements(dw));
600
601 ilo_cp_begin(cp, cmd_len);
602 ilo_cp_write(cp, cmd | (cmd_len - 2));
603 ilo_cp_write_multi(cp, dw, 13);
604 ilo_cp_end(cp);
605 }
606
607 static void
608 gen7_emit_3DSTATE_PS(const struct ilo_dev_info *dev,
609 const struct ilo_shader *fs,
610 int num_samplers, bool dual_blend,
611 struct ilo_cp *cp)
612 {
613 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x20);
614 const uint8_t cmd_len = 8;
615 uint32_t dw2, dw4, dw5;
616 int max_threads;
617
618 ILO_GPE_VALID_GEN(dev, 7, 7);
619
620 /* see brwCreateContext() */
621 max_threads = (dev->gt == 2) ? 172 : 48;
622
623 if (!fs) {
624 ilo_cp_begin(cp, cmd_len);
625 ilo_cp_write(cp, cmd | (cmd_len - 2));
626 ilo_cp_write(cp, 0);
627 ilo_cp_write(cp, 0);
628 ilo_cp_write(cp, 0);
629 /* GPU hangs if none of the dispatch enable bits is set */
630 ilo_cp_write(cp, (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
631 GEN7_PS_8_DISPATCH_ENABLE);
632 ilo_cp_write(cp, 0);
633 ilo_cp_write(cp, 0);
634 ilo_cp_write(cp, 0);
635 ilo_cp_end(cp);
636
637 return;
638 }
639
640 dw2 = (num_samplers + 3) / 4 << GEN7_PS_SAMPLER_COUNT_SHIFT |
641 0 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
642 if (false)
643 dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;
644
645 dw4 = (max_threads - 1) << IVB_PS_MAX_THREADS_SHIFT |
646 GEN7_PS_POSOFFSET_NONE;
647
648 if (false)
649 dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE;
650 if (fs->in.count)
651 dw4 |= GEN7_PS_ATTRIBUTE_ENABLE;
652 if (dual_blend)
653 dw4 |= GEN7_PS_DUAL_SOURCE_BLEND_ENABLE;
654
655 if (fs->dispatch_16)
656 dw4 |= GEN7_PS_16_DISPATCH_ENABLE;
657 else
658 dw4 |= GEN7_PS_8_DISPATCH_ENABLE;
659
660 dw5 = fs->in.start_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
661 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
662 0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2;
663
664 ilo_cp_begin(cp, cmd_len);
665 ilo_cp_write(cp, cmd | (cmd_len - 2));
666 ilo_cp_write(cp, fs->cache_offset);
667 ilo_cp_write(cp, dw2);
668 ilo_cp_write(cp, 0); /* scratch */
669 ilo_cp_write(cp, dw4);
670 ilo_cp_write(cp, dw5);
671 ilo_cp_write(cp, 0); /* kernel 1 */
672 ilo_cp_write(cp, 0); /* kernel 2 */
673 ilo_cp_end(cp);
674 }
675
676 static void
677 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(const struct ilo_dev_info *dev,
678 uint32_t sf_clip_viewport,
679 struct ilo_cp *cp)
680 {
681 gen7_emit_3dstate_pointer(dev, 0x21, sf_clip_viewport, cp);
682 }
683
684 static void
685 gen7_emit_3DSTATE_VIEWPORT_STATE_POINTERS_CC(const struct ilo_dev_info *dev,
686 uint32_t cc_viewport,
687 struct ilo_cp *cp)
688 {
689 gen7_emit_3dstate_pointer(dev, 0x23, cc_viewport, cp);
690 }
691
692 static void
693 gen7_emit_3DSTATE_BLEND_STATE_POINTERS(const struct ilo_dev_info *dev,
694 uint32_t blend_state,
695 struct ilo_cp *cp)
696 {
697 gen7_emit_3dstate_pointer(dev, 0x24, blend_state, cp);
698 }
699
700 static void
701 gen7_emit_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(const struct ilo_dev_info *dev,
702 uint32_t depth_stencil_state,
703 struct ilo_cp *cp)
704 {
705 gen7_emit_3dstate_pointer(dev, 0x25, depth_stencil_state, cp);
706 }
707
708 static void
709 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_VS(const struct ilo_dev_info *dev,
710 uint32_t binding_table,
711 struct ilo_cp *cp)
712 {
713 gen7_emit_3dstate_pointer(dev, 0x26, binding_table, cp);
714 }
715
716 static void
717 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_HS(const struct ilo_dev_info *dev,
718 uint32_t binding_table,
719 struct ilo_cp *cp)
720 {
721 gen7_emit_3dstate_pointer(dev, 0x27, binding_table, cp);
722 }
723
724 static void
725 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_DS(const struct ilo_dev_info *dev,
726 uint32_t binding_table,
727 struct ilo_cp *cp)
728 {
729 gen7_emit_3dstate_pointer(dev, 0x28, binding_table, cp);
730 }
731
732 static void
733 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_GS(const struct ilo_dev_info *dev,
734 uint32_t binding_table,
735 struct ilo_cp *cp)
736 {
737 gen7_emit_3dstate_pointer(dev, 0x29, binding_table, cp);
738 }
739
740 static void
741 gen7_emit_3DSTATE_BINDING_TABLE_POINTERS_PS(const struct ilo_dev_info *dev,
742 uint32_t binding_table,
743 struct ilo_cp *cp)
744 {
745 gen7_emit_3dstate_pointer(dev, 0x2a, binding_table, cp);
746 }
747
748 static void
749 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_VS(const struct ilo_dev_info *dev,
750 uint32_t sampler_state,
751 struct ilo_cp *cp)
752 {
753 gen7_emit_3dstate_pointer(dev, 0x2b, sampler_state, cp);
754 }
755
756 static void
757 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_HS(const struct ilo_dev_info *dev,
758 uint32_t sampler_state,
759 struct ilo_cp *cp)
760 {
761 gen7_emit_3dstate_pointer(dev, 0x2c, sampler_state, cp);
762 }
763
764 static void
765 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_DS(const struct ilo_dev_info *dev,
766 uint32_t sampler_state,
767 struct ilo_cp *cp)
768 {
769 gen7_emit_3dstate_pointer(dev, 0x2d, sampler_state, cp);
770 }
771
772 static void
773 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_GS(const struct ilo_dev_info *dev,
774 uint32_t sampler_state,
775 struct ilo_cp *cp)
776 {
777 gen7_emit_3dstate_pointer(dev, 0x2e, sampler_state, cp);
778 }
779
780 static void
781 gen7_emit_3DSTATE_SAMPLER_STATE_POINTERS_PS(const struct ilo_dev_info *dev,
782 uint32_t sampler_state,
783 struct ilo_cp *cp)
784 {
785 gen7_emit_3dstate_pointer(dev, 0x2f, sampler_state, cp);
786 }
787
788 static void
789 gen7_emit_3dstate_urb(const struct ilo_dev_info *dev,
790 int subop, int offset, int size,
791 int entry_size,
792 struct ilo_cp *cp)
793 {
794 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, subop);
795 const uint8_t cmd_len = 2;
796 const int row_size = 64; /* 512 bits */
797 int alloc_size, num_entries, min_entries, max_entries;
798
799 ILO_GPE_VALID_GEN(dev, 7, 7);
800
801 /* VS, HS, DS, and GS variants */
802 assert(subop >= 0x30 && subop <= 0x33);
803
804 /* in multiples of 8KB */
805 assert(offset % 8192 == 0);
806 offset /= 8192;
807
808 /* in multiple of 512-bit rows */
809 alloc_size = (entry_size + row_size - 1) / row_size;
810 if (!alloc_size)
811 alloc_size = 1;
812
813 /*
814 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
815 *
816 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
817 * cause performance to decrease due to banking in the URB. Element
818 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
819 */
820 if (subop == 0x30 && alloc_size == 5)
821 alloc_size = 6;
822
823 /* in multiples of 8 */
824 num_entries = (size / row_size / alloc_size) & ~7;
825
826 switch (subop) {
827 case 0x30: /* 3DSTATE_URB_VS */
828 min_entries = 32;
829 max_entries = (dev->gt == 2) ? 704 : 512;
830
831 assert(num_entries >= min_entries);
832 if (num_entries > max_entries)
833 num_entries = max_entries;
834 break;
835 case 0x31: /* 3DSTATE_URB_HS */
836 max_entries = (dev->gt == 2) ? 64 : 32;
837 if (num_entries > max_entries)
838 num_entries = max_entries;
839 break;
840 case 0x32: /* 3DSTATE_URB_DS */
841 if (num_entries)
842 assert(num_entries >= 138);
843 break;
844 case 0x33: /* 3DSTATE_URB_GS */
845 max_entries = (dev->gt == 2) ? 320 : 192;
846 if (num_entries > max_entries)
847 num_entries = max_entries;
848 break;
849 default:
850 break;
851 }
852
853 ilo_cp_begin(cp, cmd_len);
854 ilo_cp_write(cp, cmd | (cmd_len - 2));
855 ilo_cp_write(cp, offset << GEN7_URB_STARTING_ADDRESS_SHIFT |
856 (alloc_size - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
857 num_entries);
858 ilo_cp_end(cp);
859 }
860
861 static void
862 gen7_emit_3DSTATE_URB_VS(const struct ilo_dev_info *dev,
863 int offset, int size, int entry_size,
864 struct ilo_cp *cp)
865 {
866 gen7_emit_3dstate_urb(dev, 0x30, offset, size, entry_size, cp);
867 }
868
869 static void
870 gen7_emit_3DSTATE_URB_HS(const struct ilo_dev_info *dev,
871 int offset, int size, int entry_size,
872 struct ilo_cp *cp)
873 {
874 gen7_emit_3dstate_urb(dev, 0x31, offset, size, entry_size, cp);
875 }
876
877 static void
878 gen7_emit_3DSTATE_URB_DS(const struct ilo_dev_info *dev,
879 int offset, int size, int entry_size,
880 struct ilo_cp *cp)
881 {
882 gen7_emit_3dstate_urb(dev, 0x32, offset, size, entry_size, cp);
883 }
884
885 static void
886 gen7_emit_3DSTATE_URB_GS(const struct ilo_dev_info *dev,
887 int offset, int size, int entry_size,
888 struct ilo_cp *cp)
889 {
890 gen7_emit_3dstate_urb(dev, 0x33, offset, size, entry_size, cp);
891 }
892
893 static void
894 gen7_emit_3dstate_push_constant_alloc(const struct ilo_dev_info *dev,
895 int subop, int offset, int size,
896 struct ilo_cp *cp)
897 {
898 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, subop);
899 const uint8_t cmd_len = 2;
900 int end;
901
902 ILO_GPE_VALID_GEN(dev, 7, 7);
903
904 /* VS, HS, DS, GS, and PS variants */
905 assert(subop >= 0x12 && subop <= 0x16);
906
907 /*
908 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
909 *
910 * "(A table that says the maximum size of each constant buffer is
911 * 16KB")
912 *
913 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
914 *
915 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
916 * may not exceed the maximum value of the Constant Buffer Size."
917 *
918 * Thus, the valid range of buffer end is [0KB, 16KB].
919 */
920 end = (offset + size) / 1024;
921 if (end > 16) {
922 assert(!"invalid constant buffer end");
923 end = 16;
924 }
925
926 /* the valid range of buffer offset is [0KB, 15KB] */
927 offset = (offset + 1023) / 1024;
928 if (offset > 15) {
929 assert(!"invalid constant buffer offset");
930 offset = 15;
931 }
932
933 if (offset > end) {
934 assert(!size);
935 offset = end;
936 }
937
938 /* the valid range of buffer size is [0KB, 15KB] */
939 size = end - offset;
940 if (size > 15) {
941 assert(!"invalid constant buffer size");
942 size = 15;
943 }
944
945 ilo_cp_begin(cp, cmd_len);
946 ilo_cp_write(cp, cmd | (cmd_len - 2));
947 ilo_cp_write(cp, offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT |
948 size);
949 ilo_cp_end(cp);
950 }
951
952 static void
953 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_VS(const struct ilo_dev_info *dev,
954 int offset, int size,
955 struct ilo_cp *cp)
956 {
957 gen7_emit_3dstate_push_constant_alloc(dev, 0x12, offset, size, cp);
958 }
959
960 static void
961 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_HS(const struct ilo_dev_info *dev,
962 int offset, int size,
963 struct ilo_cp *cp)
964 {
965 gen7_emit_3dstate_push_constant_alloc(dev, 0x13, offset, size, cp);
966 }
967
968 static void
969 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_DS(const struct ilo_dev_info *dev,
970 int offset, int size,
971 struct ilo_cp *cp)
972 {
973 gen7_emit_3dstate_push_constant_alloc(dev, 0x14, offset, size, cp);
974 }
975
976 static void
977 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_GS(const struct ilo_dev_info *dev,
978 int offset, int size,
979 struct ilo_cp *cp)
980 {
981 gen7_emit_3dstate_push_constant_alloc(dev, 0x15, offset, size, cp);
982 }
983
984 static void
985 gen7_emit_3DSTATE_PUSH_CONSTANT_ALLOC_PS(const struct ilo_dev_info *dev,
986 int offset, int size,
987 struct ilo_cp *cp)
988 {
989 gen7_emit_3dstate_push_constant_alloc(dev, 0x16, offset, size, cp);
990 }
991
992 static void
993 gen7_emit_3DSTATE_SO_DECL_LIST(const struct ilo_dev_info *dev,
994 struct ilo_cp *cp)
995 {
996 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x17);
997 uint8_t cmd_len;
998 uint16_t decls[128];
999 int num_decls, i;
1000
1001 ILO_GPE_VALID_GEN(dev, 7, 7);
1002
1003 memset(decls, 0, sizeof(decls));
1004 num_decls = 0;
1005
1006 cmd_len = 2 * num_decls + 3;
1007
1008 ilo_cp_begin(cp, cmd_len);
1009 ilo_cp_write(cp, cmd | (cmd_len - 2));
1010 ilo_cp_write(cp, 0 << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT |
1011 0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT |
1012 0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT |
1013 0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT);
1014 ilo_cp_write(cp, num_decls << SO_NUM_ENTRIES_0_SHIFT |
1015 0 << SO_NUM_ENTRIES_1_SHIFT |
1016 0 << SO_NUM_ENTRIES_2_SHIFT |
1017 0 << SO_NUM_ENTRIES_3_SHIFT);
1018
1019 for (i = 0; i < num_decls; i++) {
1020 ilo_cp_write(cp, decls[i]);
1021 ilo_cp_write(cp, 0);
1022 }
1023
1024 ilo_cp_end(cp);
1025 }
1026
1027 static void
1028 gen7_emit_3DSTATE_SO_BUFFER(const struct ilo_dev_info *dev,
1029 int index,
1030 bool enable,
1031 struct ilo_cp *cp)
1032 {
1033 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x18);
1034 const uint8_t cmd_len = 4;
1035 int start, end;
1036
1037 ILO_GPE_VALID_GEN(dev, 7, 7);
1038
1039 if (!enable) {
1040 ilo_cp_begin(cp, cmd_len);
1041 ilo_cp_write(cp, cmd | (cmd_len - 2));
1042 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1043 ilo_cp_write(cp, 0);
1044 ilo_cp_write(cp, 0);
1045 ilo_cp_end(cp);
1046 return;
1047 }
1048
1049 start = end = 0;
1050
1051 ilo_cp_begin(cp, cmd_len);
1052 ilo_cp_write(cp, cmd | (cmd_len - 2));
1053 ilo_cp_write(cp, index << SO_BUFFER_INDEX_SHIFT);
1054 ilo_cp_write(cp, start);
1055 ilo_cp_write(cp, end);
1056 ilo_cp_end(cp);
1057 }
1058
1059 static void
1060 gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1061 const struct pipe_draw_info *info,
1062 bool rectlist,
1063 struct ilo_cp *cp)
1064 {
1065 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1066 const uint8_t cmd_len = 7;
1067 const int prim = (rectlist) ?
1068 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1069 const int vb_access = (info->indexed) ?
1070 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1071 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1072
1073 ILO_GPE_VALID_GEN(dev, 7, 7);
1074
1075 ilo_cp_begin(cp, cmd_len);
1076 ilo_cp_write(cp, cmd | (cmd_len - 2));
1077 ilo_cp_write(cp, vb_access | prim);
1078 ilo_cp_write(cp, info->count);
1079 ilo_cp_write(cp, info->start);
1080 ilo_cp_write(cp, info->instance_count);
1081 ilo_cp_write(cp, info->start_instance);
1082 ilo_cp_write(cp, info->index_bias);
1083 ilo_cp_end(cp);
1084 }
1085
1086 static uint32_t
1087 gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1088 const struct pipe_viewport_state *viewports,
1089 int num_viewports,
1090 struct ilo_cp *cp)
1091 {
1092 const int state_align = 64 / 4;
1093 const int state_len = 16 * num_viewports;
1094 uint32_t state_offset, *dw;
1095 int i;
1096
1097 ILO_GPE_VALID_GEN(dev, 7, 7);
1098
1099 /*
1100 * From the Ivy Bridge PRM, volume 2 part 1, page 270:
1101 *
1102 * "The viewport-specific state used by both the SF and CL units
1103 * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each
1104 * of which contains the DWords described below. The start of each
1105 * element is spaced 16 DWords apart. The location of first element of
1106 * the array, as specified by both Pointer to SF_VIEWPORT and Pointer
1107 * to CLIP_VIEWPORT, is aligned to a 64-byte boundary."
1108 */
1109 assert(num_viewports && num_viewports <= 16);
1110
1111 dw = ilo_cp_steal_ptr(cp, "SF_CLIP_VIEWPORT",
1112 state_len, state_align, &state_offset);
1113
1114 for (i = 0; i < num_viewports; i++) {
1115 const struct pipe_viewport_state *vp = &viewports[i];
1116
1117 ilo_gpe_gen6_fill_SF_VIEWPORT(dev, vp, 1, dw, 8);
1118
1119 ilo_gpe_gen6_fill_CLIP_VIEWPORT(dev, vp, 1, dw + 8, 4);
1120
1121 dw[12] = 0;
1122 dw[13] = 0;
1123 dw[14] = 0;
1124 dw[15] = 0;
1125
1126 dw += 16;
1127 }
1128
1129 return state_offset;
1130 }
1131
1132 static void
1133 gen7_fill_null_SURFACE_STATE(const struct ilo_dev_info *dev,
1134 unsigned width, unsigned height,
1135 unsigned depth, unsigned lod,
1136 uint32_t *dw, int num_dwords)
1137 {
1138 ILO_GPE_VALID_GEN(dev, 7, 7);
1139 assert(num_dwords == 8);
1140
1141 /*
1142 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
1143 *
1144 * "A null surface is used in instances where an actual surface is not
1145 * bound. When a write message is generated to a null surface, no
1146 * actual surface is written to. When a read message (including any
1147 * sampling engine message) is generated to a null surface, the result
1148 * is all zeros. Note that a null surface type is allowed to be used
1149 * with all messages, even if it is not specificially indicated as
1150 * supported. All of the remaining fields in surface state are ignored
1151 * for null surfaces, with the following exceptions:
1152 *
1153 * * Width, Height, Depth, LOD, and Render Target View Extent fields
1154 * must match the depth buffer's corresponding state for all render
1155 * target surfaces, including null.
1156 * * All sampling engine and data port messages support null surfaces
1157 * with the above behavior, even if not mentioned as specifically
1158 * supported, except for the following:
1159 * * Data Port Media Block Read/Write messages.
1160 * * The Surface Type of a surface used as a render target (accessed
1161 * via the Data Port's Render Target Write message) must be the same
1162 * as the Surface Type of all other render targets and of the depth
1163 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
1164 * buffer or render targets are SURFTYPE_NULL."
1165 *
1166 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1167 *
1168 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1169 * true"
1170 */
1171
1172 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1173 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT |
1174 BRW_SURFACE_TILED << 13;
1175
1176 dw[1] = 0;
1177
1178 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1179 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1180
1181 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH);
1182
1183 dw[4] = 0;
1184 dw[5] = lod;
1185 dw[6] = 0;
1186 dw[7] = 0;
1187 }
1188
1189 static void
1190 gen7_fill_buffer_SURFACE_STATE(const struct ilo_dev_info *dev,
1191 const struct ilo_resource *res,
1192 unsigned offset, unsigned size,
1193 unsigned struct_size,
1194 enum pipe_format elem_format,
1195 bool is_rt, bool render_cache_rw,
1196 uint32_t *dw, int num_dwords)
1197 {
1198 const bool typed = (elem_format != PIPE_FORMAT_NONE);
1199 const bool structured = (!typed && struct_size > 1);
1200 const int elem_size = (typed) ?
1201 util_format_get_blocksize(elem_format) : 1;
1202 int width, height, depth, pitch;
1203 int surface_type, surface_format, num_entries;
1204
1205 ILO_GPE_VALID_GEN(dev, 7, 7);
1206 assert(num_dwords == 8);
1207
1208 surface_type = (structured) ? 5 : BRW_SURFACE_BUFFER;
1209
1210 surface_format = (typed) ?
1211 ilo_translate_color_format(elem_format) : BRW_SURFACEFORMAT_RAW;
1212
1213 num_entries = size / struct_size;
1214 /* see if there is enough space to fit another element */
1215 if (size % struct_size >= elem_size && !structured)
1216 num_entries++;
1217
1218 /*
1219 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
1220 *
1221 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1222 * Address) specifies the base address of first element of the
1223 * surface. The surface is interpreted as a simple array of that
1224 * single element type. The address must be naturally-aligned to the
1225 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1226 * must be 16-byte aligned)
1227 *
1228 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1229 * the base address of the first element of the surface, computed in
1230 * software by adding the surface base address to the byte offset of
1231 * the element in the buffer."
1232 */
1233 if (is_rt)
1234 assert(offset % elem_size == 0);
1235
1236 /*
1237 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1238 *
1239 * "For typed buffer and structured buffer surfaces, the number of
1240 * entries in the buffer ranges from 1 to 2^27. For raw buffer
1241 * surfaces, the number of entries in the buffer is the number of
1242 * bytes which can range from 1 to 2^30."
1243 */
1244 assert(num_entries >= 1 &&
1245 num_entries <= 1 << ((typed || structured) ? 27 : 30));
1246
1247 /*
1248 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
1249 *
1250 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
1251 * 11 if the Surface Format is RAW (the size of the buffer must be a
1252 * multiple of 4 bytes)."
1253 *
1254 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1255 *
1256 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
1257 * field (Surface Pitch) indicates the size of the structure."
1258 *
1259 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
1260 * must be a multiple of 4 bytes."
1261 */
1262 if (structured)
1263 assert(struct_size % 4 == 0);
1264 else if (!typed)
1265 assert(num_entries % 4 == 0);
1266
1267 pitch = struct_size;
1268
1269 /*
1270 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
1271 *
1272 * "If Surface Type is SURFTYPE_BUFFER, this field (Tiled Surface) must
1273 * be false (because buffers are supported only in linear memory)."
1274 */
1275 assert(res->tiling == INTEL_TILING_NONE);
1276
1277 pitch--;
1278 num_entries--;
1279 /* bits [6:0] */
1280 width = (num_entries & 0x0000007f);
1281 /* bits [20:7] */
1282 height = (num_entries & 0x001fff80) >> 7;
1283 /* bits [30:21] */
1284 depth = (num_entries & 0x7fe00000) >> 21;
1285 /* limit to [26:21] */
1286 if (typed || structured)
1287 depth &= 0x3f;
1288
1289 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1290 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1291 if (render_cache_rw)
1292 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1293
1294 dw[1] = offset;
1295
1296 dw[2] = SET_FIELD(height, GEN7_SURFACE_HEIGHT) |
1297 SET_FIELD(width, GEN7_SURFACE_WIDTH);
1298
1299 dw[3] = SET_FIELD(depth, BRW_SURFACE_DEPTH) |
1300 pitch;
1301
1302 dw[4] = 0;
1303 dw[5] = 0;
1304 dw[6] = 0;
1305 dw[7] = 0;
1306 }
1307
1308 static void
1309 gen7_fill_normal_SURFACE_STATE(const struct ilo_dev_info *dev,
1310 struct ilo_resource *res,
1311 enum pipe_format format,
1312 unsigned first_level, unsigned num_levels,
1313 unsigned first_layer, unsigned num_layers,
1314 bool is_rt, bool render_cache_rw,
1315 uint32_t *dw, int num_dwords)
1316 {
1317 int surface_type, surface_format;
1318 int width, height, depth, pitch, lod;
1319 unsigned layer_offset, x_offset, y_offset;
1320
1321 ILO_GPE_VALID_GEN(dev, 7, 7);
1322 assert(num_dwords == 8);
1323
1324 surface_type = ilo_gpe_gen6_translate_texture(res->base.target);
1325 assert(surface_type != BRW_SURFACE_BUFFER);
1326
1327 if (is_rt)
1328 surface_format = ilo_translate_render_format(format);
1329 else
1330 surface_format = ilo_translate_texture_format(format);
1331 assert(surface_format >= 0);
1332
1333 width = res->base.width0;
1334 height = res->base.height0;
1335 pitch = res->bo_stride;
1336
1337 switch (res->base.target) {
1338 case PIPE_TEXTURE_3D:
1339 depth = res->base.depth0;
1340 break;
1341 case PIPE_TEXTURE_CUBE:
1342 case PIPE_TEXTURE_CUBE_ARRAY:
1343 /*
1344 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1345 *
1346 * "For SURFTYPE_CUBE: For Sampling Engine Surfaces, the range of
1347 * this field is [0,340], indicating the number of cube array
1348 * elements (equal to the number of underlying 2D array elements
1349 * divided by 6). For other surfaces, this field must be zero."
1350 *
1351 * "Errata: For SURFTYPE_CUBE sampling engine surfaces, the range of
1352 * this field is limited to [0,85]."
1353 */
1354 if (!is_rt) {
1355 assert(num_layers % 6 == 0);
1356 depth = num_layers / 6;
1357 break;
1358 }
1359 assert(num_layers == 1);
1360 /* fall through */
1361 default:
1362 depth = num_layers;
1363 break;
1364 }
1365
1366 /* sanity check the size */
1367 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1368 switch (surface_type) {
1369 case BRW_SURFACE_1D:
1370 assert(width <= 16384 && height == 1 && depth <= 2048);
1371 break;
1372 case BRW_SURFACE_2D:
1373 assert(width <= 16384 && height <= 16384 && depth <= 2048);
1374 break;
1375 case BRW_SURFACE_3D:
1376 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1377 break;
1378 case BRW_SURFACE_CUBE:
1379 assert(width <= 16384 && height <= 16384 && depth <= 86);
1380 assert(width == height);
1381 break;
1382 default:
1383 assert(!"unexpected surface type");
1384 break;
1385 }
1386
1387 /*
1388 * Compute the offset to the layer manually.
1389 *
1390 * For rendering, the hardware requires LOD to be the same for all render
1391 * targets and the depth buffer. We need to compute the offset to the
1392 * layer manually and always set LOD to 0.
1393 */
1394 if (is_rt) {
1395 /* we lose the capability for layered rendering */
1396 assert(num_levels == 1 && num_layers == 1);
1397
1398 layer_offset = ilo_resource_get_slice_offset(res,
1399 first_level, first_layer, true, &x_offset, &y_offset);
1400
1401 assert(x_offset % 4 == 0);
1402 assert(y_offset % 2 == 0);
1403 x_offset /= 4;
1404 y_offset /= 2;
1405
1406 /* derive the size for the LOD */
1407 width = u_minify(res->base.width0, first_level);
1408 height = u_minify(res->base.height0, first_level);
1409 if (surface_type == BRW_SURFACE_3D)
1410 depth = u_minify(res->base.depth0, first_level);
1411
1412 first_level = 0;
1413 first_layer = 0;
1414 lod = 0;
1415 }
1416 else {
1417 layer_offset = 0;
1418 x_offset = 0;
1419 y_offset = 0;
1420 lod = num_levels - 1;
1421 }
1422
1423 /*
1424 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1425 *
1426 * "The Base Address for linear render target surfaces and surfaces
1427 * accessed with the typed surface read/write data port messages must
1428 * be element-size aligned, for non-YUV surface formats, or a multiple
1429 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1430 * have no alignment requirements (byte alignment is sufficient)."
1431 *
1432 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1433 *
1434 * "For linear render target surfaces and surfaces accessed with the
1435 * typed data port messages, the pitch must be a multiple of the
1436 * element size for non-YUV surface formats. Pitch must be a multiple
1437 * of 2 * element size for YUV surface formats. For linear surfaces
1438 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1439 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1440 * of bytes."
1441 *
1442 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1443 *
1444 * "For linear surfaces, this field (X Offset) must be zero."
1445 */
1446 if (res->tiling == INTEL_TILING_NONE) {
1447 if (is_rt) {
1448 const int elem_size = util_format_get_blocksize(format);
1449 assert(layer_offset % elem_size == 0);
1450 assert(pitch % elem_size == 0);
1451 }
1452
1453 assert(!x_offset);
1454 }
1455
1456 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
1457 surface_format << BRW_SURFACE_FORMAT_SHIFT |
1458 ilo_gpe_gen6_translate_winsys_tiling(res->tiling) << 13 |
1459 GEN7_SURFACE_ARYSPC_FULL;
1460
1461 if (surface_type != BRW_SURFACE_3D && depth > 1)
1462 dw[0] |= GEN7_SURFACE_IS_ARRAY;
1463
1464 if (res->valign_4)
1465 dw[0] |= GEN7_SURFACE_VALIGN_4;
1466
1467 if (res->halign_8)
1468 dw[0] |= GEN7_SURFACE_HALIGN_8;
1469
1470 if (render_cache_rw)
1471 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1472
1473 if (surface_type == BRW_SURFACE_CUBE && !is_rt)
1474 dw[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
1475
1476 dw[1] = layer_offset;
1477
1478 dw[2] = SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT) |
1479 SET_FIELD(width - 1, GEN7_SURFACE_WIDTH);
1480
1481 dw[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
1482 (pitch - 1);
1483
1484 dw[4] = first_layer << 18 |
1485 (depth - 1) << 7;
1486
1487 if (res->base.nr_samples > 4)
1488 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_8;
1489 else if (res->base.nr_samples > 2)
1490 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_4;
1491 else
1492 dw[4] |= GEN7_SURFACE_MULTISAMPLECOUNT_1;
1493
1494 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
1495 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT |
1496 SET_FIELD(first_level, GEN7_SURFACE_MIN_LOD) |
1497 lod;
1498
1499 dw[6] = 0;
1500 dw[7] = 0;
1501 }
1502
1503 static uint32_t
1504 gen7_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
1505 struct intel_bo *bo, bool for_render,
1506 const uint32_t *dw, int num_dwords,
1507 struct ilo_cp *cp)
1508 {
1509 const int state_align = 32 / 4;
1510 const int state_len = 8;
1511 uint32_t state_offset;
1512 uint32_t read_domains, write_domain;
1513
1514 ILO_GPE_VALID_GEN(dev, 7, 7);
1515 assert(num_dwords == state_len);
1516
1517 if (for_render) {
1518 read_domains = INTEL_DOMAIN_RENDER;
1519 write_domain = INTEL_DOMAIN_RENDER;
1520 }
1521 else {
1522 read_domains = INTEL_DOMAIN_SAMPLER;
1523 write_domain = 0;
1524 }
1525
1526 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
1527 ilo_cp_write(cp, dw[0]);
1528 ilo_cp_write_bo(cp, dw[1], bo, read_domains, write_domain);
1529 ilo_cp_write(cp, dw[2]);
1530 ilo_cp_write(cp, dw[3]);
1531 ilo_cp_write(cp, dw[4]);
1532 ilo_cp_write(cp, dw[5]);
1533 ilo_cp_write(cp, dw[6]);
1534 ilo_cp_write(cp, dw[7]);
1535 ilo_cp_end(cp);
1536
1537 return state_offset;
1538 }
1539
1540 static uint32_t
1541 gen7_emit_surf_SURFACE_STATE(const struct ilo_dev_info *dev,
1542 const struct pipe_surface *surface,
1543 struct ilo_cp *cp)
1544 {
1545 struct intel_bo *bo;
1546 uint32_t dw[8];
1547
1548 ILO_GPE_VALID_GEN(dev, 7, 7);
1549
1550 if (surface && surface->texture) {
1551 struct ilo_resource *res = ilo_resource(surface->texture);
1552
1553 bo = res->bo;
1554
1555 /*
1556 * classic i965 sets render_cache_rw for constant buffers and sol
1557 * surfaces but not render buffers. Why?
1558 */
1559 gen7_fill_normal_SURFACE_STATE(dev, res, surface->format,
1560 surface->u.tex.level, 1,
1561 surface->u.tex.first_layer,
1562 surface->u.tex.last_layer - surface->u.tex.first_layer + 1,
1563 true, true, dw, Elements(dw));
1564 }
1565 else {
1566 bo = NULL;
1567 gen7_fill_null_SURFACE_STATE(dev,
1568 surface->width, surface->height, 1, 0, dw, Elements(dw));
1569 }
1570
1571 return gen7_emit_SURFACE_STATE(dev, bo, true, dw, Elements(dw), cp);
1572 }
1573
1574 static uint32_t
1575 gen7_emit_view_SURFACE_STATE(const struct ilo_dev_info *dev,
1576 const struct pipe_sampler_view *view,
1577 struct ilo_cp *cp)
1578 {
1579 struct ilo_resource *res = ilo_resource(view->texture);
1580 uint32_t dw[8];
1581
1582 ILO_GPE_VALID_GEN(dev, 7, 7);
1583
1584 gen7_fill_normal_SURFACE_STATE(dev, res, view->format,
1585 view->u.tex.first_level,
1586 view->u.tex.last_level - view->u.tex.first_level + 1,
1587 view->u.tex.first_layer,
1588 view->u.tex.last_layer - view->u.tex.first_layer + 1,
1589 false, false, dw, Elements(dw));
1590
1591 return gen7_emit_SURFACE_STATE(dev, res->bo, false, dw, Elements(dw), cp);
1592 }
1593
1594 static uint32_t
1595 gen7_emit_cbuf_SURFACE_STATE(const struct ilo_dev_info *dev,
1596 const struct pipe_constant_buffer *cbuf,
1597 struct ilo_cp *cp)
1598 {
1599 const enum pipe_format elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
1600 struct ilo_resource *res = ilo_resource(cbuf->buffer);
1601 uint32_t dw[8];
1602
1603 ILO_GPE_VALID_GEN(dev, 7, 7);
1604
1605 gen7_fill_buffer_SURFACE_STATE(dev, res,
1606 cbuf->buffer_offset, cbuf->buffer_size,
1607 util_format_get_blocksize(elem_format), elem_format,
1608 false, false, dw, Elements(dw));
1609
1610 return gen7_emit_SURFACE_STATE(dev, res->bo, false, dw, Elements(dw), cp);
1611 }
1612
1613 static uint32_t
1614 gen7_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
1615 const union pipe_color_union *color,
1616 struct ilo_cp *cp)
1617 {
1618 const int state_align = 32 / 4;
1619 const int state_len = 4;
1620 uint32_t state_offset, *dw;
1621
1622 ILO_GPE_VALID_GEN(dev, 7, 7);
1623
1624 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
1625 state_len, state_align, &state_offset);
1626 memcpy(dw, color->f, 4 * 4);
1627
1628 return state_offset;
1629 }
1630
1631 static int
1632 gen7_estimate_command_size(const struct ilo_dev_info *dev,
1633 enum ilo_gpe_gen7_command cmd,
1634 int arg)
1635 {
1636 static const struct {
1637 int header;
1638 int body;
1639 } gen7_command_size_table[ILO_GPE_GEN7_COMMAND_COUNT] = {
1640 [ILO_GPE_GEN7_STATE_BASE_ADDRESS] = { 0, 10 },
1641 [ILO_GPE_GEN7_STATE_SIP] = { 0, 2 },
1642 [ILO_GPE_GEN7_3DSTATE_VF_STATISTICS] = { 0, 1 },
1643 [ILO_GPE_GEN7_PIPELINE_SELECT] = { 0, 1 },
1644 [ILO_GPE_GEN7_MEDIA_VFE_STATE] = { 0, 8 },
1645 [ILO_GPE_GEN7_MEDIA_CURBE_LOAD] = { 0, 4 },
1646 [ILO_GPE_GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
1647 [ILO_GPE_GEN7_MEDIA_STATE_FLUSH] = { 0, 2 },
1648 [ILO_GPE_GEN7_GPGPU_WALKER] = { 0, 11 },
1649 [ILO_GPE_GEN7_3DSTATE_CLEAR_PARAMS] = { 0, 3 },
1650 [ILO_GPE_GEN7_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
1651 [ILO_GPE_GEN7_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
1652 [ILO_GPE_GEN7_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
1653 [ILO_GPE_GEN7_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
1654 [ILO_GPE_GEN7_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
1655 [ILO_GPE_GEN7_3DSTATE_INDEX_BUFFER] = { 0, 3 },
1656 [ILO_GPE_GEN7_3DSTATE_CC_STATE_POINTERS] = { 0, 2 },
1657 [ILO_GPE_GEN7_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
1658 [ILO_GPE_GEN7_3DSTATE_VS] = { 0, 6 },
1659 [ILO_GPE_GEN7_3DSTATE_GS] = { 0, 7 },
1660 [ILO_GPE_GEN7_3DSTATE_CLIP] = { 0, 4 },
1661 [ILO_GPE_GEN7_3DSTATE_SF] = { 0, 7 },
1662 [ILO_GPE_GEN7_3DSTATE_WM] = { 0, 3 },
1663 [ILO_GPE_GEN7_3DSTATE_CONSTANT_VS] = { 0, 7 },
1664 [ILO_GPE_GEN7_3DSTATE_CONSTANT_GS] = { 0, 7 },
1665 [ILO_GPE_GEN7_3DSTATE_CONSTANT_PS] = { 0, 7 },
1666 [ILO_GPE_GEN7_3DSTATE_SAMPLE_MASK] = { 0, 2 },
1667 [ILO_GPE_GEN7_3DSTATE_CONSTANT_HS] = { 0, 7 },
1668 [ILO_GPE_GEN7_3DSTATE_CONSTANT_DS] = { 0, 7 },
1669 [ILO_GPE_GEN7_3DSTATE_HS] = { 0, 7 },
1670 [ILO_GPE_GEN7_3DSTATE_TE] = { 0, 4 },
1671 [ILO_GPE_GEN7_3DSTATE_DS] = { 0, 6 },
1672 [ILO_GPE_GEN7_3DSTATE_STREAMOUT] = { 0, 3 },
1673 [ILO_GPE_GEN7_3DSTATE_SBE] = { 0, 14 },
1674 [ILO_GPE_GEN7_3DSTATE_PS] = { 0, 8 },
1675 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP] = { 0, 2 },
1676 [ILO_GPE_GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC] = { 0, 2 },
1677 [ILO_GPE_GEN7_3DSTATE_BLEND_STATE_POINTERS] = { 0, 2 },
1678 [ILO_GPE_GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS] = { 0, 2 },
1679 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS] = { 0, 2 },
1680 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS] = { 0, 2 },
1681 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS] = { 0, 2 },
1682 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS] = { 0, 2 },
1683 [ILO_GPE_GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS] = { 0, 2 },
1684 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS] = { 0, 2 },
1685 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS] = { 0, 2 },
1686 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS] = { 0, 2 },
1687 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS] = { 0, 2 },
1688 [ILO_GPE_GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS] = { 0, 2 },
1689 [ILO_GPE_GEN7_3DSTATE_URB_VS] = { 0, 2 },
1690 [ILO_GPE_GEN7_3DSTATE_URB_HS] = { 0, 2 },
1691 [ILO_GPE_GEN7_3DSTATE_URB_DS] = { 0, 2 },
1692 [ILO_GPE_GEN7_3DSTATE_URB_GS] = { 0, 2 },
1693 [ILO_GPE_GEN7_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
1694 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
1695 [ILO_GPE_GEN7_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33, },
1696 [ILO_GPE_GEN7_3DSTATE_LINE_STIPPLE] = { 0, 3 },
1697 [ILO_GPE_GEN7_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
1698 [ILO_GPE_GEN7_3DSTATE_MULTISAMPLE] = { 0, 4 },
1699 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS] = { 0, 2 },
1700 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS] = { 0, 2 },
1701 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS] = { 0, 2 },
1702 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS] = { 0, 2 },
1703 [ILO_GPE_GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS] = { 0, 2 },
1704 [ILO_GPE_GEN7_3DSTATE_SO_DECL_LIST] = { 3, 2 },
1705 [ILO_GPE_GEN7_3DSTATE_SO_BUFFER] = { 0, 4 },
1706 [ILO_GPE_GEN7_PIPE_CONTROL] = { 0, 5 },
1707 [ILO_GPE_GEN7_3DPRIMITIVE] = { 0, 7 },
1708 };
1709 const int header = gen7_command_size_table[cmd].header;
1710 const int body = gen7_command_size_table[cmd].body;
1711 const int count = arg;
1712
1713 ILO_GPE_VALID_GEN(dev, 7, 7);
1714 assert(cmd < ILO_GPE_GEN7_COMMAND_COUNT);
1715
1716 return (likely(count)) ? header + body * count : 0;
1717 }
1718
1719 static int
1720 gen7_estimate_state_size(const struct ilo_dev_info *dev,
1721 enum ilo_gpe_gen7_state state,
1722 int arg)
1723 {
1724 static const struct {
1725 int alignment;
1726 int body;
1727 bool is_array;
1728 } gen7_state_size_table[ILO_GPE_GEN7_STATE_COUNT] = {
1729 [ILO_GPE_GEN7_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
1730 [ILO_GPE_GEN7_SF_CLIP_VIEWPORT] = { 16, 16, true },
1731 [ILO_GPE_GEN7_CC_VIEWPORT] = { 8, 2, true },
1732 [ILO_GPE_GEN7_COLOR_CALC_STATE] = { 16, 6, false },
1733 [ILO_GPE_GEN7_BLEND_STATE] = { 16, 2, true },
1734 [ILO_GPE_GEN7_DEPTH_STENCIL_STATE] = { 16, 3, false },
1735 [ILO_GPE_GEN7_SCISSOR_RECT] = { 8, 2, true },
1736 [ILO_GPE_GEN7_BINDING_TABLE_STATE] = { 8, 1, true },
1737 [ILO_GPE_GEN7_SURFACE_STATE] = { 8, 8, false },
1738 [ILO_GPE_GEN7_SAMPLER_STATE] = { 8, 4, true },
1739 [ILO_GPE_GEN7_SAMPLER_BORDER_COLOR_STATE] = { 8, 4, false },
1740 [ILO_GPE_GEN7_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
1741 };
1742 const int alignment = gen7_state_size_table[state].alignment;
1743 const int body = gen7_state_size_table[state].body;
1744 const bool is_array = gen7_state_size_table[state].is_array;
1745 const int count = arg;
1746 int estimate;
1747
1748 ILO_GPE_VALID_GEN(dev, 7, 7);
1749 assert(state < ILO_GPE_GEN7_STATE_COUNT);
1750
1751 if (likely(count)) {
1752 if (is_array) {
1753 estimate = (alignment - 1) + body * count;
1754 }
1755 else {
1756 estimate = (alignment - 1) + body;
1757 /* all states are aligned */
1758 if (count > 1)
1759 estimate += util_align_npot(body, alignment) * (count - 1);
1760 }
1761 }
1762 else {
1763 estimate = 0;
1764 }
1765
1766 return estimate;
1767 }
1768
1769 static void
1770 gen7_init(struct ilo_gpe_gen7 *gen7)
1771 {
1772 const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get();
1773
1774 gen7->estimate_command_size = gen7_estimate_command_size;
1775 gen7->estimate_state_size = gen7_estimate_state_size;
1776
1777 #define GEN7_USE(gen7, name, from) gen7->emit_ ## name = from->emit_ ## name
1778 #define GEN7_SET(gen7, name) gen7->emit_ ## name = gen7_emit_ ## name
1779 GEN7_USE(gen7, STATE_BASE_ADDRESS, gen6);
1780 GEN7_USE(gen7, STATE_SIP, gen6);
1781 GEN7_USE(gen7, 3DSTATE_VF_STATISTICS, gen6);
1782 GEN7_USE(gen7, PIPELINE_SELECT, gen6);
1783 GEN7_USE(gen7, MEDIA_VFE_STATE, gen6);
1784 GEN7_USE(gen7, MEDIA_CURBE_LOAD, gen6);
1785 GEN7_USE(gen7, MEDIA_INTERFACE_DESCRIPTOR_LOAD, gen6);
1786 GEN7_USE(gen7, MEDIA_STATE_FLUSH, gen6);
1787 GEN7_SET(gen7, GPGPU_WALKER);
1788 GEN7_SET(gen7, 3DSTATE_CLEAR_PARAMS);
1789 GEN7_SET(gen7, 3DSTATE_DEPTH_BUFFER);
1790 GEN7_USE(gen7, 3DSTATE_STENCIL_BUFFER, gen6);
1791 GEN7_USE(gen7, 3DSTATE_HIER_DEPTH_BUFFER, gen6);
1792 GEN7_USE(gen7, 3DSTATE_VERTEX_BUFFERS, gen6);
1793 GEN7_USE(gen7, 3DSTATE_VERTEX_ELEMENTS, gen6);
1794 GEN7_USE(gen7, 3DSTATE_INDEX_BUFFER, gen6);
1795 GEN7_SET(gen7, 3DSTATE_CC_STATE_POINTERS);
1796 GEN7_USE(gen7, 3DSTATE_SCISSOR_STATE_POINTERS, gen6);
1797 GEN7_USE(gen7, 3DSTATE_VS, gen6);
1798 GEN7_SET(gen7, 3DSTATE_GS);
1799 GEN7_USE(gen7, 3DSTATE_CLIP, gen6);
1800 GEN7_SET(gen7, 3DSTATE_SF);
1801 GEN7_SET(gen7, 3DSTATE_WM);
1802 GEN7_SET(gen7, 3DSTATE_CONSTANT_VS);
1803 GEN7_SET(gen7, 3DSTATE_CONSTANT_GS);
1804 GEN7_SET(gen7, 3DSTATE_CONSTANT_PS);
1805 GEN7_SET(gen7, 3DSTATE_SAMPLE_MASK);
1806 GEN7_SET(gen7, 3DSTATE_CONSTANT_HS);
1807 GEN7_SET(gen7, 3DSTATE_CONSTANT_DS);
1808 GEN7_SET(gen7, 3DSTATE_HS);
1809 GEN7_SET(gen7, 3DSTATE_TE);
1810 GEN7_SET(gen7, 3DSTATE_DS);
1811 GEN7_SET(gen7, 3DSTATE_STREAMOUT);
1812 GEN7_SET(gen7, 3DSTATE_SBE);
1813 GEN7_SET(gen7, 3DSTATE_PS);
1814 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
1815 GEN7_SET(gen7, 3DSTATE_VIEWPORT_STATE_POINTERS_CC);
1816 GEN7_SET(gen7, 3DSTATE_BLEND_STATE_POINTERS);
1817 GEN7_SET(gen7, 3DSTATE_DEPTH_STENCIL_STATE_POINTERS);
1818 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_VS);
1819 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_HS);
1820 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_DS);
1821 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_GS);
1822 GEN7_SET(gen7, 3DSTATE_BINDING_TABLE_POINTERS_PS);
1823 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_VS);
1824 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_HS);
1825 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_DS);
1826 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_GS);
1827 GEN7_SET(gen7, 3DSTATE_SAMPLER_STATE_POINTERS_PS);
1828 GEN7_SET(gen7, 3DSTATE_URB_VS);
1829 GEN7_SET(gen7, 3DSTATE_URB_HS);
1830 GEN7_SET(gen7, 3DSTATE_URB_DS);
1831 GEN7_SET(gen7, 3DSTATE_URB_GS);
1832 GEN7_USE(gen7, 3DSTATE_DRAWING_RECTANGLE, gen6);
1833 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_OFFSET, gen6);
1834 GEN7_USE(gen7, 3DSTATE_POLY_STIPPLE_PATTERN, gen6);
1835 GEN7_USE(gen7, 3DSTATE_LINE_STIPPLE, gen6);
1836 GEN7_USE(gen7, 3DSTATE_AA_LINE_PARAMETERS, gen6);
1837 GEN7_USE(gen7, 3DSTATE_MULTISAMPLE, gen6);
1838 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_VS);
1839 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_HS);
1840 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_DS);
1841 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_GS);
1842 GEN7_SET(gen7, 3DSTATE_PUSH_CONSTANT_ALLOC_PS);
1843 GEN7_SET(gen7, 3DSTATE_SO_DECL_LIST);
1844 GEN7_SET(gen7, 3DSTATE_SO_BUFFER);
1845 GEN7_USE(gen7, PIPE_CONTROL, gen6);
1846 GEN7_SET(gen7, 3DPRIMITIVE);
1847 GEN7_USE(gen7, INTERFACE_DESCRIPTOR_DATA, gen6);
1848 GEN7_SET(gen7, SF_CLIP_VIEWPORT);
1849 GEN7_USE(gen7, CC_VIEWPORT, gen6);
1850 GEN7_USE(gen7, COLOR_CALC_STATE, gen6);
1851 GEN7_USE(gen7, BLEND_STATE, gen6);
1852 GEN7_USE(gen7, DEPTH_STENCIL_STATE, gen6);
1853 GEN7_USE(gen7, SCISSOR_RECT, gen6);
1854 GEN7_USE(gen7, BINDING_TABLE_STATE, gen6);
1855 GEN7_SET(gen7, surf_SURFACE_STATE);
1856 GEN7_SET(gen7, view_SURFACE_STATE);
1857 GEN7_SET(gen7, cbuf_SURFACE_STATE);
1858 GEN7_USE(gen7, SAMPLER_STATE, gen6);
1859 GEN7_SET(gen7, SAMPLER_BORDER_COLOR_STATE);
1860 GEN7_USE(gen7, push_constant_buffer, gen6);
1861 #undef GEN7_USE
1862 #undef GEN7_SET
1863 }
1864
1865 static struct ilo_gpe_gen7 gen7_gpe;
1866
1867 const struct ilo_gpe_gen7 *
1868 ilo_gpe_gen7_get(void)
1869 {
1870 if (!gen7_gpe.estimate_command_size)
1871 gen7_init(&gen7_gpe);
1872
1873 return &gen7_gpe;
1874 }