aec4fd6d8a684660b8431c18b6d8034190f55621
[mesa.git] / src / gallium / drivers / ilo / core / ilo_state_shader.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2015 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_state_shader.h"
30
31 enum vertex_stage {
32 STAGE_VS,
33 STAGE_HS,
34 STAGE_DS,
35 STAGE_GS,
36 };
37
38 struct vertex_ff {
39 uint8_t grf_start;
40
41 uint8_t per_thread_scratch_space;
42 uint32_t per_thread_scratch_size;
43
44 uint8_t sampler_count;
45 uint8_t surface_count;
46 bool has_uav;
47
48 uint8_t vue_read_offset;
49 uint8_t vue_read_len;
50
51 uint8_t user_clip_enables;
52 };
53
54 static bool
55 vertex_validate_gen6_kernel(const struct ilo_dev *dev,
56 enum vertex_stage stage,
57 const struct ilo_state_shader_kernel_info *kernel)
58 {
59 /*
60 * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for
61 * others.
62 */
63 const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
64
65 ILO_DEV_ASSERT(dev, 6, 8);
66
67 /* we do not want to save it */
68 assert(!kernel->offset);
69
70 assert(kernel->grf_start < max_grf_start);
71
72 return true;
73 }
74
75 static bool
76 vertex_validate_gen6_urb(const struct ilo_dev *dev,
77 enum vertex_stage stage,
78 const struct ilo_state_shader_urb_info *urb)
79 {
80 /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */
81 const uint8_t max_read_base = 63 * 2;
82 /*
83 * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for
84 * others, in pairs
85 */
86 const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2;
87
88 ILO_DEV_ASSERT(dev, 6, 8);
89
90 assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count);
91
92 assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base);
93
94 /*
95 * There is no need to worry about reading past entries, as URB entries are
96 * aligned to 1024-bits (Gen6) or 512-bits (Gen7+).
97 */
98 assert(urb->read_count <= max_read_count);
99
100 return true;
101 }
102
103 static bool
104 vertex_get_gen6_ff(const struct ilo_dev *dev,
105 enum vertex_stage stage,
106 const struct ilo_state_shader_kernel_info *kernel,
107 const struct ilo_state_shader_resource_info *resource,
108 const struct ilo_state_shader_urb_info *urb,
109 uint32_t per_thread_scratch_size,
110 struct vertex_ff *ff)
111 {
112 ILO_DEV_ASSERT(dev, 6, 8);
113
114 memset(ff, 0, sizeof(*ff));
115
116 if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
117 !vertex_validate_gen6_urb(dev, stage, urb))
118 return false;
119
120 ff->grf_start = kernel->grf_start;
121
122 if (per_thread_scratch_size) {
123 /*
124 * From the Sandy Bridge PRM, volume 2 part 1, page 134:
125 *
126 * "(Per-Thread Scratch Space)
127 * Range [0,11] indicating [1K Bytes, 2M Bytes]"
128 */
129 assert(per_thread_scratch_size <= 2 * 1024 * 1024);
130
131 /* next power of two, starting from 1KB */
132 ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
133 (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
134 ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
135 }
136
137 ff->sampler_count = (resource->sampler_count <= 12) ?
138 (resource->sampler_count + 3) / 4 : 4;
139 ff->surface_count = resource->surface_count;
140 ff->has_uav = resource->has_uav;
141
142 ff->vue_read_offset = urb->read_base / 2;
143 ff->vue_read_len = (urb->read_count + 1) / 2;
144
145 /* need to read something unless VUE handles are included */
146 switch (stage) {
147 case STAGE_VS:
148 if (!ff->vue_read_len)
149 ff->vue_read_len = 1;
150
151 /* one GRF per attribute */
152 assert(kernel->grf_start + urb->read_count * 2 <= 128);
153 break;
154 case STAGE_GS:
155 if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len)
156 ff->vue_read_len = 1;
157 break;
158 default:
159 break;
160 }
161
162 ff->user_clip_enables = urb->user_clip_enables;
163
164 return true;
165 }
166
167 static uint16_t
168 vs_get_gen6_thread_count(const struct ilo_dev *dev,
169 const struct ilo_state_vs_info *info)
170 {
171 uint16_t thread_count;
172
173 ILO_DEV_ASSERT(dev, 6, 8);
174
175 /* Maximum Number of Threads of 3DSTATE_VS */
176 switch (ilo_dev_gen(dev)) {
177 case ILO_GEN(8):
178 thread_count = 504;
179 break;
180 case ILO_GEN(7.5):
181 thread_count = (dev->gt >= 2) ? 280 : 70;
182 break;
183 case ILO_GEN(7):
184 case ILO_GEN(6):
185 default:
186 thread_count = dev->thread_count;
187 break;
188 }
189
190 return thread_count - 1;
191 }
192
193 static bool
194 vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
195 const struct ilo_dev *dev,
196 const struct ilo_state_vs_info *info)
197 {
198 struct vertex_ff ff;
199 uint16_t thread_count;
200 uint32_t dw2, dw3, dw4, dw5;
201
202 ILO_DEV_ASSERT(dev, 6, 8);
203
204 if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
205 &info->urb, info->per_thread_scratch_size, &ff))
206 return false;
207
208 thread_count = vs_get_gen6_thread_count(dev, info);
209
210 dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
211 ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
212
213 if (false)
214 dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
215
216 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
217 dw2 |= GEN75_THREADDISP_ACCESS_UAV;
218
219 dw3 = ff.per_thread_scratch_space <<
220 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
221
222 dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
223 ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
224 ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
225
226 dw5 = 0;
227
228 if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
229 dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT;
230 else
231 dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT;
232
233 if (info->stats_enable)
234 dw5 |= GEN6_VS_DW5_STATISTICS;
235 if (info->dispatch_enable)
236 dw5 |= GEN6_VS_DW5_VS_ENABLE;
237
238 STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5);
239 vs->vs[0] = dw2;
240 vs->vs[1] = dw3;
241 vs->vs[2] = dw4;
242 vs->vs[3] = dw5;
243
244 if (ilo_dev_gen(dev) >= ILO_GEN(8))
245 vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
246
247 vs->scratch_size = ff.per_thread_scratch_size * thread_count;
248
249 return true;
250 }
251
252 static uint16_t
253 hs_get_gen7_thread_count(const struct ilo_dev *dev,
254 const struct ilo_state_hs_info *info)
255 {
256 uint16_t thread_count;
257
258 ILO_DEV_ASSERT(dev, 7, 8);
259
260 /* Maximum Number of Threads of 3DSTATE_HS */
261 switch (ilo_dev_gen(dev)) {
262 case ILO_GEN(8):
263 thread_count = 504;
264 break;
265 case ILO_GEN(7.5):
266 thread_count = (dev->gt >= 2) ? 256 : 70;
267 break;
268 case ILO_GEN(7):
269 default:
270 thread_count = dev->thread_count;
271 break;
272 }
273
274 return thread_count - 1;
275 }
276
277 static bool
278 hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
279 const struct ilo_dev *dev,
280 const struct ilo_state_hs_info *info)
281 {
282 struct vertex_ff ff;
283 uint16_t thread_count;
284 uint32_t dw1, dw2, dw4, dw5;
285
286 ILO_DEV_ASSERT(dev, 7, 8);
287
288 if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
289 &info->urb, info->per_thread_scratch_size, &ff))
290 return false;
291
292 thread_count = hs_get_gen7_thread_count(dev, info);
293
294 dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
295 ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
296
297 dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
298
299 if (ilo_dev_gen(dev) >= ILO_GEN(8))
300 dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
301 else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
302 dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
303 else
304 dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
305
306 if (info->dispatch_enable)
307 dw2 |= GEN7_HS_DW2_HS_ENABLE;
308 if (info->stats_enable)
309 dw2 |= GEN7_HS_DW2_STATISTICS;
310
311 dw4 = ff.per_thread_scratch_space <<
312 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
313
314 dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
315 ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
316 ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT |
317 ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT;
318
319 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
320 dw5 |= GEN75_HS_DW5_ACCESS_UAV;
321
322 STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4);
323 hs->hs[0] = dw1;
324 hs->hs[1] = dw2;
325 hs->hs[2] = dw4;
326 hs->hs[3] = dw5;
327
328 hs->scratch_size = ff.per_thread_scratch_size * thread_count;
329
330 return true;
331 }
332
333 static bool
334 ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds,
335 const struct ilo_dev *dev,
336 const struct ilo_state_ds_info *info)
337 {
338 uint32_t dw1;
339
340 ILO_DEV_ASSERT(dev, 7, 8);
341
342 dw1 = 0;
343
344 if (info->dispatch_enable) {
345 dw1 |= GEN7_TE_DW1_MODE_HW |
346 GEN7_TE_DW1_TE_ENABLE;
347 }
348
349 STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3);
350 ds->te[0] = dw1;
351 ds->te[1] = fui(63.0f);
352 ds->te[2] = fui(64.0f);
353
354 return true;
355 }
356
357 static uint16_t
358 ds_get_gen7_thread_count(const struct ilo_dev *dev,
359 const struct ilo_state_ds_info *info)
360 {
361 uint16_t thread_count;
362
363 ILO_DEV_ASSERT(dev, 7, 8);
364
365 /* Maximum Number of Threads of 3DSTATE_DS */
366 switch (ilo_dev_gen(dev)) {
367 case ILO_GEN(8):
368 thread_count = 504;
369 break;
370 case ILO_GEN(7.5):
371 thread_count = (dev->gt >= 2) ? 280 : 70;
372 break;
373 case ILO_GEN(7):
374 default:
375 thread_count = dev->thread_count;
376 break;
377 }
378
379 return thread_count - 1;
380 }
381
382 static bool
383 ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
384 const struct ilo_dev *dev,
385 const struct ilo_state_ds_info *info)
386 {
387 struct vertex_ff ff;
388 uint16_t thread_count;
389 uint32_t dw2, dw3, dw4, dw5;
390
391 ILO_DEV_ASSERT(dev, 7, 8);
392
393 if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
394 &info->urb, info->per_thread_scratch_size, &ff))
395 return false;
396
397 thread_count = ds_get_gen7_thread_count(dev, info);
398
399 dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
400 ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
401
402 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
403 dw2 |= GEN75_THREADDISP_ACCESS_UAV;
404
405 dw3 = ff.per_thread_scratch_space <<
406 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
407
408 dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
409 ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
410 ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT;
411
412 dw5 = 0;
413
414 if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
415 dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT;
416 else
417 dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT;
418
419 if (info->stats_enable)
420 dw5 |= GEN7_DS_DW5_STATISTICS;
421 if (info->dispatch_enable)
422 dw5 |= GEN7_DS_DW5_DS_ENABLE;
423
424 STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5);
425 ds->ds[0] = dw2;
426 ds->ds[1] = dw3;
427 ds->ds[2] = dw4;
428 ds->ds[3] = dw5;
429
430 if (ilo_dev_gen(dev) >= ILO_GEN(8))
431 ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
432
433 ds->scratch_size = ff.per_thread_scratch_size * thread_count;
434
435 return true;
436 }
437
438 static bool
439 gs_get_gen6_ff(const struct ilo_dev *dev,
440 const struct ilo_state_gs_info *info,
441 struct vertex_ff *ff)
442 {
443 const struct ilo_state_shader_urb_info *urb = &info->urb;
444 const struct ilo_state_gs_sol_info *sol = &info->sol;
445
446 ILO_DEV_ASSERT(dev, 6, 8);
447
448 if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
449 &info->urb, info->per_thread_scratch_size, ff))
450 return false;
451
452 /*
453 * From the Ivy Bridge PRM, volume 2 part 1, page 168-169:
454 *
455 * "[0,62] indicating [1,63] 16B units"
456 *
457 * "Programming Restrictions: The vertex size must be programmed as a
458 * multiple of 32B units with the following exception: Rendering is
459 * disabled (as per SOL stage state) and the vertex size output by the
460 * GS thread is 16B.
461 *
462 * If rendering is enabled (as per SOL state) the vertex size must be
463 * programmed as a multiple of 32B units. In other words, the only
464 * time software can program a vertex size with an odd number of 16B
465 * units is when rendering is disabled."
466 */
467 assert(urb->output_attr_count <= 63);
468 if (!sol->render_disable)
469 assert(urb->output_attr_count % 2 == 0);
470
471 return true;
472 }
473
474 static uint16_t
475 gs_get_gen6_thread_count(const struct ilo_dev *dev,
476 const struct ilo_state_gs_info *info)
477 {
478 const struct ilo_state_gs_sol_info *sol = &info->sol;
479 uint16_t thread_count;
480
481 ILO_DEV_ASSERT(dev, 6, 8);
482
483 /* Maximum Number of Threads of 3DSTATE_GS */
484 switch (ilo_dev_gen(dev)) {
485 case ILO_GEN(8):
486 thread_count = 504;
487 break;
488 case ILO_GEN(7.5):
489 thread_count = (dev->gt >= 2) ? 256 : 70;
490 break;
491 case ILO_GEN(7):
492 case ILO_GEN(6):
493 default:
494 thread_count = dev->thread_count;
495
496 /*
497 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
498 *
499 * "Maximum Number of Threads valid range is [0,27] when Rendering
500 * Enabled bit is set."
501 *
502 * According to the classic driver, [0, 20] for GT1.
503 */
504 if (!sol->render_disable)
505 thread_count = (dev->gt == 2) ? 27 : 20;
506 break;
507 }
508
509 return thread_count - 1;
510 }
511
512 static bool
513 gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
514 const struct ilo_dev *dev,
515 const struct ilo_state_gs_info *info)
516 {
517 const struct ilo_state_gs_sol_info *sol = &info->sol;
518 struct vertex_ff ff;
519 uint16_t thread_count;
520 uint32_t dw2, dw3, dw4, dw5, dw6;
521
522 ILO_DEV_ASSERT(dev, 6, 6);
523
524 if (!gs_get_gen6_ff(dev, info, &ff))
525 return false;
526
527 thread_count = gs_get_gen6_thread_count(dev, info);
528
529 dw2 = GEN6_THREADDISP_SPF |
530 ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
531 ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
532
533 dw3 = ff.per_thread_scratch_space <<
534 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
535
536 dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
537 ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
538 ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
539
540 dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT;
541
542 if (info->stats_enable)
543 dw5 |= GEN6_GS_DW5_STATISTICS;
544 if (sol->stats_enable)
545 dw5 |= GEN6_GS_DW5_SO_STATISTICS;
546 if (!sol->render_disable)
547 dw5 |= GEN6_GS_DW5_RENDER_ENABLE;
548
549 dw6 = 0;
550
551 /* GEN7_REORDER_TRAILING is handled by the kernel */
552 if (sol->tristrip_reorder == GEN7_REORDER_LEADING)
553 dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE;
554
555 if (sol->sol_enable) {
556 dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
557
558 if (sol->svbi_post_inc) {
559 dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
560 sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
561 }
562 }
563
564 if (info->dispatch_enable)
565 dw6 |= GEN6_GS_DW6_GS_ENABLE;
566
567 STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
568 gs->gs[0] = dw2;
569 gs->gs[1] = dw3;
570 gs->gs[2] = dw4;
571 gs->gs[3] = dw5;
572 gs->gs[4] = dw6;
573
574 gs->scratch_size = ff.per_thread_scratch_size * thread_count;
575
576 return true;
577 }
578
579 static uint8_t
580 gs_get_gen7_vertex_size(const struct ilo_dev *dev,
581 const struct ilo_state_gs_info *info)
582 {
583 const struct ilo_state_shader_urb_info *urb = &info->urb;
584
585 ILO_DEV_ASSERT(dev, 7, 8);
586
587 return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0;
588 }
589
590 static bool
591 gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
592 const struct ilo_dev *dev,
593 const struct ilo_state_gs_info *info)
594 {
595 struct vertex_ff ff;
596 uint16_t thread_count;
597 uint8_t vertex_size;
598 uint32_t dw2, dw3, dw4, dw5;
599
600 ILO_DEV_ASSERT(dev, 7, 8);
601
602 if (!gs_get_gen6_ff(dev, info, &ff))
603 return false;
604
605 thread_count = gs_get_gen6_thread_count(dev, info);
606 vertex_size = gs_get_gen7_vertex_size(dev, info);
607
608 dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
609 ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
610
611 if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
612 dw2 |= GEN75_THREADDISP_ACCESS_UAV;
613
614 dw3 = ff.per_thread_scratch_space <<
615 GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
616
617 dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
618 0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
619 ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
620 GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
621 ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
622 ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
623
624 dw5 = 0;
625
626 if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
627 dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT;
628 else
629 dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT;
630
631 if (info->stats_enable)
632 dw5 |= GEN7_GS_DW5_STATISTICS;
633 if (info->dispatch_enable)
634 dw5 |= GEN7_GS_DW5_GS_ENABLE;
635
636 STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
637 gs->gs[0] = dw2;
638 gs->gs[1] = dw3;
639 gs->gs[2] = dw4;
640 gs->gs[3] = dw5;
641
642 if (ilo_dev_gen(dev) >= ILO_GEN(8))
643 gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
644
645 gs->scratch_size = ff.per_thread_scratch_size * thread_count;
646
647 return true;
648 }
649
650 bool
651 ilo_state_vs_init(struct ilo_state_vs *vs,
652 const struct ilo_dev *dev,
653 const struct ilo_state_vs_info *info)
654 {
655 bool ret = true;
656
657 assert(ilo_is_zeroed(vs, sizeof(*vs)));
658
659 ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info);
660
661 assert(ret);
662
663 return ret;
664 }
665
666 bool
667 ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
668 const struct ilo_dev *dev)
669 {
670 struct ilo_state_vs_info info;
671
672 memset(&info, 0, sizeof(info));
673
674 return ilo_state_vs_init(vs, dev, &info);
675 }
676
677 bool
678 ilo_state_hs_init(struct ilo_state_hs *hs,
679 const struct ilo_dev *dev,
680 const struct ilo_state_hs_info *info)
681 {
682 bool ret = true;
683
684 assert(ilo_is_zeroed(hs, sizeof(*hs)));
685
686 if (ilo_dev_gen(dev) >= ILO_GEN(7))
687 ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info);
688
689 assert(ret);
690
691 return ret;
692 }
693
694 bool
695 ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
696 const struct ilo_dev *dev)
697 {
698 struct ilo_state_hs_info info;
699
700 memset(&info, 0, sizeof(info));
701
702 return ilo_state_hs_init(hs, dev, &info);
703 }
704
705 bool
706 ilo_state_ds_init(struct ilo_state_ds *ds,
707 const struct ilo_dev *dev,
708 const struct ilo_state_ds_info *info)
709 {
710 bool ret = true;
711
712 assert(ilo_is_zeroed(ds, sizeof(*ds)));
713
714 if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
715 ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info);
716 ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info);
717 }
718
719 assert(ret);
720
721 return ret;
722 }
723
724 bool
725 ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
726 const struct ilo_dev *dev)
727 {
728 struct ilo_state_ds_info info;
729
730 memset(&info, 0, sizeof(info));
731
732 return ilo_state_ds_init(ds, dev, &info);
733 }
734
735 bool
736 ilo_state_gs_init(struct ilo_state_gs *gs,
737 const struct ilo_dev *dev,
738 const struct ilo_state_gs_info *info)
739 {
740 bool ret = true;
741
742 assert(ilo_is_zeroed(gs, sizeof(*gs)));
743
744 if (ilo_dev_gen(dev) >= ILO_GEN(7))
745 ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info);
746 else
747 ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info);
748
749 assert(ret);
750
751 return ret;
752 }
753
754 bool
755 ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
756 const struct ilo_dev *dev)
757 {
758 struct ilo_state_gs_info info;
759
760 memset(&info, 0, sizeof(info));
761
762 return ilo_state_gs_init(gs, dev, &info);
763 }