intel: Split gen_device_info out into libintel_dev
[mesa.git] / src / intel / dev / gen_device_info.c
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include "gen_device_info.h"
30 #include "compiler/shader_enums.h"
31 #include "util/macros.h"
32
33 /**
34 * Get the PCI ID for the device name.
35 *
36 * Returns -1 if the device is not known.
37 */
38 int
39 gen_device_name_to_pci_device_id(const char *name)
40 {
41 static const struct {
42 const char *name;
43 int pci_id;
44 } name_map[] = {
45 { "brw", 0x2a02 },
46 { "g4x", 0x2a42 },
47 { "ilk", 0x0042 },
48 { "snb", 0x0126 },
49 { "ivb", 0x016a },
50 { "hsw", 0x0d2e },
51 { "byt", 0x0f33 },
52 { "bdw", 0x162e },
53 { "chv", 0x22B3 },
54 { "skl", 0x1912 },
55 { "bxt", 0x5A85 },
56 { "kbl", 0x5912 },
57 { "glk", 0x3185 },
58 { "cnl", 0x5a52 },
59 { "icl", 0x8a52 },
60 };
61
62 for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
63 if (!strcmp(name_map[i].name, name))
64 return name_map[i].pci_id;
65 }
66
67 return -1;
68 }
69
70 /**
71 * Get the overridden PCI ID for the device. This is set with the
72 * INTEL_DEVID_OVERRIDE environment variable.
73 *
74 * Returns -1 if the override is not set.
75 */
76 int
77 gen_get_pci_device_id_override(void)
78 {
79 if (geteuid() == getuid()) {
80 const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
81 if (devid_override) {
82 const int id = gen_device_name_to_pci_device_id(devid_override);
83 return id >= 0 ? id : strtol(devid_override, NULL, 0);
84 }
85 }
86
87 return -1;
88 }
89
90 static const struct gen_device_info gen_device_info_i965 = {
91 .gen = 4,
92 .has_negative_rhw_bug = true,
93 .num_slices = 1,
94 .num_subslices = { 1, },
95 .num_thread_per_eu = 4,
96 .max_vs_threads = 16,
97 .max_gs_threads = 2,
98 .max_wm_threads = 8 * 4,
99 .urb = {
100 .size = 256,
101 },
102 .timestamp_frequency = 12500000,
103 };
104
105 static const struct gen_device_info gen_device_info_g4x = {
106 .gen = 4,
107 .has_pln = true,
108 .has_compr4 = true,
109 .has_surface_tile_offset = true,
110 .is_g4x = true,
111 .num_slices = 1,
112 .num_subslices = { 1, },
113 .num_thread_per_eu = 5,
114 .max_vs_threads = 32,
115 .max_gs_threads = 2,
116 .max_wm_threads = 10 * 5,
117 .urb = {
118 .size = 384,
119 },
120 .timestamp_frequency = 12500000,
121 };
122
123 static const struct gen_device_info gen_device_info_ilk = {
124 .gen = 5,
125 .has_pln = true,
126 .has_compr4 = true,
127 .has_surface_tile_offset = true,
128 .num_slices = 1,
129 .num_subslices = { 1, },
130 .num_thread_per_eu = 6,
131 .max_vs_threads = 72,
132 .max_gs_threads = 32,
133 .max_wm_threads = 12 * 6,
134 .urb = {
135 .size = 1024,
136 },
137 .timestamp_frequency = 12500000,
138 };
139
140 static const struct gen_device_info gen_device_info_snb_gt1 = {
141 .gen = 6,
142 .gt = 1,
143 .has_hiz_and_separate_stencil = true,
144 .has_llc = true,
145 .has_pln = true,
146 .has_surface_tile_offset = true,
147 .needs_unlit_centroid_workaround = true,
148 .num_slices = 1,
149 .num_subslices = { 1, },
150 .num_thread_per_eu = 6, /* Not confirmed */
151 .max_vs_threads = 24,
152 .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
153 .max_wm_threads = 40,
154 .urb = {
155 .size = 32,
156 .min_entries = {
157 [MESA_SHADER_VERTEX] = 24,
158 },
159 .max_entries = {
160 [MESA_SHADER_VERTEX] = 256,
161 [MESA_SHADER_GEOMETRY] = 256,
162 },
163 },
164 .timestamp_frequency = 12500000,
165 };
166
167 static const struct gen_device_info gen_device_info_snb_gt2 = {
168 .gen = 6,
169 .gt = 2,
170 .has_hiz_and_separate_stencil = true,
171 .has_llc = true,
172 .has_pln = true,
173 .has_surface_tile_offset = true,
174 .needs_unlit_centroid_workaround = true,
175 .num_slices = 1,
176 .num_subslices = { 1, },
177 .num_thread_per_eu = 6, /* Not confirmed */
178 .max_vs_threads = 60,
179 .max_gs_threads = 60,
180 .max_wm_threads = 80,
181 .urb = {
182 .size = 64,
183 .min_entries = {
184 [MESA_SHADER_VERTEX] = 24,
185 },
186 .max_entries = {
187 [MESA_SHADER_VERTEX] = 256,
188 [MESA_SHADER_GEOMETRY] = 256,
189 },
190 },
191 .timestamp_frequency = 12500000,
192 };
193
194 #define GEN7_FEATURES \
195 .gen = 7, \
196 .has_hiz_and_separate_stencil = true, \
197 .must_use_separate_stencil = true, \
198 .has_llc = true, \
199 .has_pln = true, \
200 .has_64bit_types = true, \
201 .has_surface_tile_offset = true, \
202 .timestamp_frequency = 12500000
203
204 static const struct gen_device_info gen_device_info_ivb_gt1 = {
205 GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
206 .num_slices = 1,
207 .num_subslices = { 1, },
208 .num_thread_per_eu = 6,
209 .l3_banks = 2,
210 .max_vs_threads = 36,
211 .max_tcs_threads = 36,
212 .max_tes_threads = 36,
213 .max_gs_threads = 36,
214 .max_wm_threads = 48,
215 .max_cs_threads = 36,
216 .urb = {
217 .size = 128,
218 .min_entries = {
219 [MESA_SHADER_VERTEX] = 32,
220 [MESA_SHADER_TESS_EVAL] = 10,
221 },
222 .max_entries = {
223 [MESA_SHADER_VERTEX] = 512,
224 [MESA_SHADER_TESS_CTRL] = 32,
225 [MESA_SHADER_TESS_EVAL] = 288,
226 [MESA_SHADER_GEOMETRY] = 192,
227 },
228 },
229 };
230
231 static const struct gen_device_info gen_device_info_ivb_gt2 = {
232 GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
233 .num_slices = 1,
234 .num_subslices = { 1, },
235 .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
236 * @max_wm_threads ... */
237 .l3_banks = 4,
238 .max_vs_threads = 128,
239 .max_tcs_threads = 128,
240 .max_tes_threads = 128,
241 .max_gs_threads = 128,
242 .max_wm_threads = 172,
243 .max_cs_threads = 64,
244 .urb = {
245 .size = 256,
246 .min_entries = {
247 [MESA_SHADER_VERTEX] = 32,
248 [MESA_SHADER_TESS_EVAL] = 10,
249 },
250 .max_entries = {
251 [MESA_SHADER_VERTEX] = 704,
252 [MESA_SHADER_TESS_CTRL] = 64,
253 [MESA_SHADER_TESS_EVAL] = 448,
254 [MESA_SHADER_GEOMETRY] = 320,
255 },
256 },
257 };
258
259 static const struct gen_device_info gen_device_info_byt = {
260 GEN7_FEATURES, .is_baytrail = true, .gt = 1,
261 .num_slices = 1,
262 .num_subslices = { 1, },
263 .num_thread_per_eu = 8,
264 .l3_banks = 1,
265 .has_llc = false,
266 .max_vs_threads = 36,
267 .max_tcs_threads = 36,
268 .max_tes_threads = 36,
269 .max_gs_threads = 36,
270 .max_wm_threads = 48,
271 .max_cs_threads = 32,
272 .urb = {
273 .size = 128,
274 .min_entries = {
275 [MESA_SHADER_VERTEX] = 32,
276 [MESA_SHADER_TESS_EVAL] = 10,
277 },
278 .max_entries = {
279 [MESA_SHADER_VERTEX] = 512,
280 [MESA_SHADER_TESS_CTRL] = 32,
281 [MESA_SHADER_TESS_EVAL] = 288,
282 [MESA_SHADER_GEOMETRY] = 192,
283 },
284 },
285 };
286
287 #define HSW_FEATURES \
288 GEN7_FEATURES, \
289 .is_haswell = true, \
290 .supports_simd16_3src = true, \
291 .has_resource_streamer = true
292
293 static const struct gen_device_info gen_device_info_hsw_gt1 = {
294 HSW_FEATURES, .gt = 1,
295 .num_slices = 1,
296 .num_subslices = { 1, },
297 .num_thread_per_eu = 7,
298 .l3_banks = 2,
299 .max_vs_threads = 70,
300 .max_tcs_threads = 70,
301 .max_tes_threads = 70,
302 .max_gs_threads = 70,
303 .max_wm_threads = 102,
304 .max_cs_threads = 70,
305 .urb = {
306 .size = 128,
307 .min_entries = {
308 [MESA_SHADER_VERTEX] = 32,
309 [MESA_SHADER_TESS_EVAL] = 10,
310 },
311 .max_entries = {
312 [MESA_SHADER_VERTEX] = 640,
313 [MESA_SHADER_TESS_CTRL] = 64,
314 [MESA_SHADER_TESS_EVAL] = 384,
315 [MESA_SHADER_GEOMETRY] = 256,
316 },
317 },
318 };
319
320 static const struct gen_device_info gen_device_info_hsw_gt2 = {
321 HSW_FEATURES, .gt = 2,
322 .num_slices = 1,
323 .num_subslices = { 2, },
324 .num_thread_per_eu = 7,
325 .l3_banks = 4,
326 .max_vs_threads = 280,
327 .max_tcs_threads = 256,
328 .max_tes_threads = 280,
329 .max_gs_threads = 256,
330 .max_wm_threads = 204,
331 .max_cs_threads = 70,
332 .urb = {
333 .size = 256,
334 .min_entries = {
335 [MESA_SHADER_VERTEX] = 64,
336 [MESA_SHADER_TESS_EVAL] = 10,
337 },
338 .max_entries = {
339 [MESA_SHADER_VERTEX] = 1664,
340 [MESA_SHADER_TESS_CTRL] = 128,
341 [MESA_SHADER_TESS_EVAL] = 960,
342 [MESA_SHADER_GEOMETRY] = 640,
343 },
344 },
345 };
346
347 static const struct gen_device_info gen_device_info_hsw_gt3 = {
348 HSW_FEATURES, .gt = 3,
349 .num_slices = 2,
350 .num_subslices = { 2, },
351 .num_thread_per_eu = 7,
352 .l3_banks = 8,
353 .max_vs_threads = 280,
354 .max_tcs_threads = 256,
355 .max_tes_threads = 280,
356 .max_gs_threads = 256,
357 .max_wm_threads = 408,
358 .max_cs_threads = 70,
359 .urb = {
360 .size = 512,
361 .min_entries = {
362 [MESA_SHADER_VERTEX] = 64,
363 [MESA_SHADER_TESS_EVAL] = 10,
364 },
365 .max_entries = {
366 [MESA_SHADER_VERTEX] = 1664,
367 [MESA_SHADER_TESS_CTRL] = 128,
368 [MESA_SHADER_TESS_EVAL] = 960,
369 [MESA_SHADER_GEOMETRY] = 640,
370 },
371 },
372 };
373
374 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
375 * so keep things conservative for now and set has_sample_with_hiz = false.
376 */
377 #define GEN8_FEATURES \
378 .gen = 8, \
379 .has_hiz_and_separate_stencil = true, \
380 .has_resource_streamer = true, \
381 .must_use_separate_stencil = true, \
382 .has_llc = true, \
383 .has_sample_with_hiz = false, \
384 .has_pln = true, \
385 .has_integer_dword_mul = true, \
386 .has_64bit_types = true, \
387 .supports_simd16_3src = true, \
388 .has_surface_tile_offset = true, \
389 .max_vs_threads = 504, \
390 .max_tcs_threads = 504, \
391 .max_tes_threads = 504, \
392 .max_gs_threads = 504, \
393 .max_wm_threads = 384, \
394 .timestamp_frequency = 12500000
395
396 static const struct gen_device_info gen_device_info_bdw_gt1 = {
397 GEN8_FEATURES, .gt = 1,
398 .is_broadwell = true,
399 .num_slices = 1,
400 .num_subslices = { 2, },
401 .num_thread_per_eu = 7,
402 .l3_banks = 2,
403 .max_cs_threads = 42,
404 .urb = {
405 .size = 192,
406 .min_entries = {
407 [MESA_SHADER_VERTEX] = 64,
408 [MESA_SHADER_TESS_EVAL] = 34,
409 },
410 .max_entries = {
411 [MESA_SHADER_VERTEX] = 2560,
412 [MESA_SHADER_TESS_CTRL] = 504,
413 [MESA_SHADER_TESS_EVAL] = 1536,
414 [MESA_SHADER_GEOMETRY] = 960,
415 },
416 }
417 };
418
419 static const struct gen_device_info gen_device_info_bdw_gt2 = {
420 GEN8_FEATURES, .gt = 2,
421 .is_broadwell = true,
422 .num_slices = 1,
423 .num_subslices = { 3, },
424 .num_thread_per_eu = 7,
425 .l3_banks = 4,
426 .max_cs_threads = 56,
427 .urb = {
428 .size = 384,
429 .min_entries = {
430 [MESA_SHADER_VERTEX] = 64,
431 [MESA_SHADER_TESS_EVAL] = 34,
432 },
433 .max_entries = {
434 [MESA_SHADER_VERTEX] = 2560,
435 [MESA_SHADER_TESS_CTRL] = 504,
436 [MESA_SHADER_TESS_EVAL] = 1536,
437 [MESA_SHADER_GEOMETRY] = 960,
438 },
439 }
440 };
441
442 static const struct gen_device_info gen_device_info_bdw_gt3 = {
443 GEN8_FEATURES, .gt = 3,
444 .is_broadwell = true,
445 .num_slices = 2,
446 .num_subslices = { 3, 3, },
447 .num_thread_per_eu = 7,
448 .l3_banks = 8,
449 .max_cs_threads = 56,
450 .urb = {
451 .size = 384,
452 .min_entries = {
453 [MESA_SHADER_VERTEX] = 64,
454 [MESA_SHADER_TESS_EVAL] = 34,
455 },
456 .max_entries = {
457 [MESA_SHADER_VERTEX] = 2560,
458 [MESA_SHADER_TESS_CTRL] = 504,
459 [MESA_SHADER_TESS_EVAL] = 1536,
460 [MESA_SHADER_GEOMETRY] = 960,
461 },
462 }
463 };
464
465 static const struct gen_device_info gen_device_info_chv = {
466 GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
467 .has_llc = false,
468 .has_integer_dword_mul = false,
469 .num_slices = 1,
470 .num_subslices = { 2, },
471 .num_thread_per_eu = 7,
472 .l3_banks = 2,
473 .max_vs_threads = 80,
474 .max_tcs_threads = 80,
475 .max_tes_threads = 80,
476 .max_gs_threads = 80,
477 .max_wm_threads = 128,
478 .max_cs_threads = 6 * 7,
479 .urb = {
480 .size = 192,
481 .min_entries = {
482 [MESA_SHADER_VERTEX] = 34,
483 [MESA_SHADER_TESS_EVAL] = 34,
484 },
485 .max_entries = {
486 [MESA_SHADER_VERTEX] = 640,
487 [MESA_SHADER_TESS_CTRL] = 80,
488 [MESA_SHADER_TESS_EVAL] = 384,
489 [MESA_SHADER_GEOMETRY] = 256,
490 },
491 }
492 };
493
494 #define GEN9_HW_INFO \
495 .gen = 9, \
496 .max_vs_threads = 336, \
497 .max_gs_threads = 336, \
498 .max_tcs_threads = 336, \
499 .max_tes_threads = 336, \
500 .max_cs_threads = 56, \
501 .timestamp_frequency = 12000000, \
502 .urb = { \
503 .size = 384, \
504 .min_entries = { \
505 [MESA_SHADER_VERTEX] = 64, \
506 [MESA_SHADER_TESS_EVAL] = 34, \
507 }, \
508 .max_entries = { \
509 [MESA_SHADER_VERTEX] = 1856, \
510 [MESA_SHADER_TESS_CTRL] = 672, \
511 [MESA_SHADER_TESS_EVAL] = 1120, \
512 [MESA_SHADER_GEOMETRY] = 640, \
513 }, \
514 }
515
516 #define GEN9_LP_FEATURES \
517 GEN8_FEATURES, \
518 GEN9_HW_INFO, \
519 .has_integer_dword_mul = false, \
520 .gt = 1, \
521 .has_llc = false, \
522 .has_sample_with_hiz = true, \
523 .num_slices = 1, \
524 .num_thread_per_eu = 6, \
525 .max_vs_threads = 112, \
526 .max_tcs_threads = 112, \
527 .max_tes_threads = 112, \
528 .max_gs_threads = 112, \
529 .max_cs_threads = 6 * 6, \
530 .timestamp_frequency = 19200000, \
531 .urb = { \
532 .size = 192, \
533 .min_entries = { \
534 [MESA_SHADER_VERTEX] = 34, \
535 [MESA_SHADER_TESS_EVAL] = 34, \
536 }, \
537 .max_entries = { \
538 [MESA_SHADER_VERTEX] = 704, \
539 [MESA_SHADER_TESS_CTRL] = 256, \
540 [MESA_SHADER_TESS_EVAL] = 416, \
541 [MESA_SHADER_GEOMETRY] = 256, \
542 }, \
543 }
544
545 #define GEN9_LP_FEATURES_3X6 \
546 GEN9_LP_FEATURES, \
547 .num_subslices = { 3, }
548
549 #define GEN9_LP_FEATURES_2X6 \
550 GEN9_LP_FEATURES, \
551 .num_subslices = { 2, }, \
552 .max_vs_threads = 56, \
553 .max_tcs_threads = 56, \
554 .max_tes_threads = 56, \
555 .max_gs_threads = 56, \
556 .max_cs_threads = 6 * 6, \
557 .urb = { \
558 .size = 128, \
559 .min_entries = { \
560 [MESA_SHADER_VERTEX] = 34, \
561 [MESA_SHADER_TESS_EVAL] = 34, \
562 }, \
563 .max_entries = { \
564 [MESA_SHADER_VERTEX] = 352, \
565 [MESA_SHADER_TESS_CTRL] = 128, \
566 [MESA_SHADER_TESS_EVAL] = 208, \
567 [MESA_SHADER_GEOMETRY] = 128, \
568 }, \
569 }
570
571 #define GEN9_FEATURES \
572 GEN8_FEATURES, \
573 GEN9_HW_INFO, \
574 .has_sample_with_hiz = true, \
575 .num_thread_per_eu = 7
576
577 static const struct gen_device_info gen_device_info_skl_gt1 = {
578 GEN9_FEATURES, .gt = 1,
579 .is_skylake = true,
580 .num_slices = 1,
581 .num_subslices = { 2, },
582 .l3_banks = 2,
583 .urb.size = 192,
584 };
585
586 static const struct gen_device_info gen_device_info_skl_gt2 = {
587 GEN9_FEATURES, .gt = 2,
588 .is_skylake = true,
589 .num_slices = 1,
590 .num_subslices = { 3, },
591 .l3_banks = 4,
592 };
593
594 static const struct gen_device_info gen_device_info_skl_gt3 = {
595 GEN9_FEATURES, .gt = 3,
596 .is_skylake = true,
597 .num_slices = 2,
598 .num_subslices = { 3, 3, },
599 .l3_banks = 8,
600 };
601
602 static const struct gen_device_info gen_device_info_skl_gt4 = {
603 GEN9_FEATURES, .gt = 4,
604 .is_skylake = true,
605 .num_slices = 3,
606 .num_subslices = { 3, 3, 3, },
607 .l3_banks = 12,
608 /* From the "L3 Allocation and Programming" documentation:
609 *
610 * "URB is limited to 1008KB due to programming restrictions. This is not a
611 * restriction of the L3 implementation, but of the FF and other clients.
612 * Therefore, in a GT4 implementation it is possible for the programmed
613 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
614 * only 1008KB of this will be used."
615 */
616 .urb.size = 1008 / 3,
617 };
618
619 static const struct gen_device_info gen_device_info_bxt = {
620 GEN9_LP_FEATURES_3X6,
621 .is_broxton = true,
622 .l3_banks = 2,
623 };
624
625 static const struct gen_device_info gen_device_info_bxt_2x6 = {
626 GEN9_LP_FEATURES_2X6,
627 .is_broxton = true,
628 .l3_banks = 1,
629 };
630 /*
631 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
632 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
633 */
634
635 static const struct gen_device_info gen_device_info_kbl_gt1 = {
636 GEN9_FEATURES,
637 .is_kabylake = true,
638 .gt = 1,
639
640 .max_cs_threads = 7 * 6,
641 .urb.size = 192,
642 .num_slices = 1,
643 .num_subslices = { 2, },
644 .l3_banks = 2,
645 };
646
647 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
648 GEN9_FEATURES,
649 .is_kabylake = true,
650 .gt = 1,
651
652 .max_cs_threads = 7 * 6,
653 .num_slices = 1,
654 .num_subslices = { 3, },
655 .l3_banks = 4,
656 };
657
658 static const struct gen_device_info gen_device_info_kbl_gt2 = {
659 GEN9_FEATURES,
660 .is_kabylake = true,
661 .gt = 2,
662
663 .num_slices = 1,
664 .num_subslices = { 3, },
665 .l3_banks = 4,
666 };
667
668 static const struct gen_device_info gen_device_info_kbl_gt3 = {
669 GEN9_FEATURES,
670 .is_kabylake = true,
671 .gt = 3,
672
673 .num_slices = 2,
674 .num_subslices = { 3, 3, },
675 .l3_banks = 8,
676 };
677
678 static const struct gen_device_info gen_device_info_kbl_gt4 = {
679 GEN9_FEATURES,
680 .is_kabylake = true,
681 .gt = 4,
682
683 /*
684 * From the "L3 Allocation and Programming" documentation:
685 *
686 * "URB is limited to 1008KB due to programming restrictions. This
687 * is not a restriction of the L3 implementation, but of the FF and
688 * other clients. Therefore, in a GT4 implementation it is
689 * possible for the programmed allocation of the L3 data array to
690 * provide 3*384KB=1152KB for URB, but only 1008KB of this
691 * will be used."
692 */
693 .urb.size = 1008 / 3,
694 .num_slices = 3,
695 .num_subslices = { 3, 3, 3, },
696 .l3_banks = 12,
697 };
698
699 static const struct gen_device_info gen_device_info_glk = {
700 GEN9_LP_FEATURES_3X6,
701 .is_geminilake = true,
702 .l3_banks = 2,
703 };
704
705 /*TODO: Initialize l3_banks when we know the number. */
706 static const struct gen_device_info gen_device_info_glk_2x6 = {
707 GEN9_LP_FEATURES_2X6,
708 .is_geminilake = true,
709 };
710
711 static const struct gen_device_info gen_device_info_cfl_gt1 = {
712 GEN9_FEATURES,
713 .is_coffeelake = true,
714 .gt = 1,
715
716 .num_slices = 1,
717 .num_subslices = { 2, },
718 .l3_banks = 2,
719 };
720 static const struct gen_device_info gen_device_info_cfl_gt2 = {
721 GEN9_FEATURES,
722 .is_coffeelake = true,
723 .gt = 2,
724
725 .num_slices = 1,
726 .num_subslices = { 3, },
727 .l3_banks = 4,
728 };
729
730 static const struct gen_device_info gen_device_info_cfl_gt3 = {
731 GEN9_FEATURES,
732 .is_coffeelake = true,
733 .gt = 3,
734
735 .num_slices = 2,
736 .num_subslices = { 3, 3, },
737 .l3_banks = 8,
738 };
739
740 #define GEN10_HW_INFO \
741 .gen = 10, \
742 .num_thread_per_eu = 7, \
743 .max_vs_threads = 728, \
744 .max_gs_threads = 432, \
745 .max_tcs_threads = 432, \
746 .max_tes_threads = 624, \
747 .max_cs_threads = 56, \
748 .timestamp_frequency = 19200000, \
749 .urb = { \
750 .size = 256, \
751 .min_entries = { \
752 [MESA_SHADER_VERTEX] = 64, \
753 [MESA_SHADER_TESS_EVAL] = 34, \
754 }, \
755 .max_entries = { \
756 [MESA_SHADER_VERTEX] = 3936, \
757 [MESA_SHADER_TESS_CTRL] = 896, \
758 [MESA_SHADER_TESS_EVAL] = 2064, \
759 [MESA_SHADER_GEOMETRY] = 832, \
760 }, \
761 }
762
763 #define subslices(args...) { args, }
764
765 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
766 GEN8_FEATURES, \
767 GEN10_HW_INFO, \
768 .has_sample_with_hiz = true, \
769 .gt = _gt, \
770 .num_slices = _slices, \
771 .num_subslices = _subslices, \
772 .l3_banks = _l3
773
774 static const struct gen_device_info gen_device_info_cnl_2x8 = {
775 /* GT0.5 */
776 GEN10_FEATURES(1, 1, subslices(2), 2),
777 .is_cannonlake = true,
778 };
779
780 static const struct gen_device_info gen_device_info_cnl_3x8 = {
781 /* GT1 */
782 GEN10_FEATURES(1, 1, subslices(3), 3),
783 .is_cannonlake = true,
784 };
785
786 static const struct gen_device_info gen_device_info_cnl_4x8 = {
787 /* GT 1.5 */
788 GEN10_FEATURES(1, 2, subslices(2, 2), 6),
789 .is_cannonlake = true,
790 };
791
792 static const struct gen_device_info gen_device_info_cnl_5x8 = {
793 /* GT2 */
794 GEN10_FEATURES(2, 2, subslices(3, 2), 6),
795 .is_cannonlake = true,
796 };
797
798 #define GEN11_HW_INFO \
799 .gen = 11, \
800 .has_pln = false, \
801 .max_vs_threads = 364, \
802 .max_gs_threads = 224, \
803 .max_tcs_threads = 224, \
804 .max_tes_threads = 364, \
805 .max_cs_threads = 56, \
806 .urb = { \
807 .size = 1024, \
808 .min_entries = { \
809 [MESA_SHADER_VERTEX] = 64, \
810 [MESA_SHADER_TESS_EVAL] = 34, \
811 }, \
812 .max_entries = { \
813 [MESA_SHADER_VERTEX] = 2384, \
814 [MESA_SHADER_TESS_CTRL] = 1032, \
815 [MESA_SHADER_TESS_EVAL] = 2384, \
816 [MESA_SHADER_GEOMETRY] = 1032, \
817 }, \
818 }
819
820 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
821 GEN8_FEATURES, \
822 GEN11_HW_INFO, \
823 .has_64bit_types = false, \
824 .has_integer_dword_mul = false, \
825 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
826 .num_subslices = _subslices
827
828 static const struct gen_device_info gen_device_info_icl_8x8 = {
829 GEN11_FEATURES(2, 1, subslices(8), 8),
830 };
831
832 static const struct gen_device_info gen_device_info_icl_6x8 = {
833 GEN11_FEATURES(1, 1, subslices(6), 6),
834 };
835
836 static const struct gen_device_info gen_device_info_icl_4x8 = {
837 GEN11_FEATURES(1, 1, subslices(4), 6),
838 };
839
840 static const struct gen_device_info gen_device_info_icl_1x8 = {
841 GEN11_FEATURES(1, 1, subslices(1), 6),
842 };
843
844 bool
845 gen_get_device_info(int devid, struct gen_device_info *devinfo)
846 {
847 switch (devid) {
848 #undef CHIPSET
849 #define CHIPSET(id, family, name) \
850 case id: *devinfo = gen_device_info_##family; break;
851 #include "pci_ids/i965_pci_ids.h"
852 default:
853 fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid);
854 return false;
855 }
856
857 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
858 *
859 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
860 * allocate scratch space enough so that each slice has 4 slices allowed."
861 *
862 * The equivalent internal documentation says that this programming note
863 * applies to all Gen9+ platforms.
864 *
865 * The hardware typically calculates the scratch space pointer by taking
866 * the base address, and adding per-thread-scratch-space * thread ID.
867 * Extra padding can be necessary depending how the thread IDs are
868 * calculated for a particular shader stage.
869 */
870
871 switch(devinfo->gen) {
872 case 9:
873 case 10:
874 devinfo->max_wm_threads = 64 /* threads-per-PSD */
875 * devinfo->num_slices
876 * 4; /* effective subslices per slice */
877 break;
878 case 11:
879 devinfo->max_wm_threads = 128 /* threads-per-PSD */
880 * devinfo->num_slices
881 * 8; /* subslices per slice */
882 break;
883 default:
884 break;
885 }
886
887 assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
888
889 return true;
890 }
891
892 const char *
893 gen_get_device_name(int devid)
894 {
895 switch (devid) {
896 #undef CHIPSET
897 #define CHIPSET(id, family, name) case id: return name;
898 #include "pci_ids/i965_pci_ids.h"
899 default:
900 return NULL;
901 }
902 }