intel/dev: print out error when platform is not found by name
[mesa.git] / src / intel / dev / gen_device_info.c
1 /*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <unistd.h>
30 #include "gen_device_info.h"
31 #include "compiler/shader_enums.h"
32 #include "intel/common/gen_gem.h"
33 #include "util/bitscan.h"
34 #include "util/macros.h"
35
36 #include "drm-uapi/i915_drm.h"
37
38 /**
39 * Get the PCI ID for the device name.
40 *
41 * Returns -1 if the device is not known.
42 */
43 int
44 gen_device_name_to_pci_device_id(const char *name)
45 {
46 static const struct {
47 const char *name;
48 int pci_id;
49 } name_map[] = {
50 { "brw", 0x2a02 },
51 { "g4x", 0x2a42 },
52 { "ilk", 0x0042 },
53 { "snb", 0x0126 },
54 { "ivb", 0x016a },
55 { "hsw", 0x0d2e },
56 { "byt", 0x0f33 },
57 { "bdw", 0x162e },
58 { "chv", 0x22B3 },
59 { "skl", 0x1912 },
60 { "bxt", 0x5A85 },
61 { "kbl", 0x5912 },
62 { "aml", 0x591C },
63 { "glk", 0x3185 },
64 { "cfl", 0x3E9B },
65 { "whl", 0x3EA1 },
66 { "cml", 0x9b41 },
67 { "cnl", 0x5a52 },
68 { "icl", 0x8a52 },
69 { "ehl", 0x4500 },
70 { "jsl", 0x4E71 },
71 { "tgl", 0x9a49 },
72 };
73
74 for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
75 if (!strcmp(name_map[i].name, name))
76 return name_map[i].pci_id;
77 }
78
79 fprintf(stderr, "Unknown platform '%s'. Supported names: %s",
80 name, name_map[0].name);
81 for (unsigned i = 1; i < ARRAY_SIZE(name_map); i++)
82 fprintf(stderr, ", %s", name_map[i].name);
83 fprintf(stderr, "\n");
84
85 return -1;
86 }
87
88 /**
89 * Get the overridden PCI ID for the device. This is set with the
90 * INTEL_DEVID_OVERRIDE environment variable.
91 *
92 * Returns -1 if the override is not set.
93 */
94 static int
95 get_pci_device_id_override(void)
96 {
97 if (geteuid() == getuid()) {
98 const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
99 if (devid_override) {
100 const int id = gen_device_name_to_pci_device_id(devid_override);
101 return id >= 0 ? id : strtol(devid_override, NULL, 0);
102 }
103 }
104
105 return -1;
106 }
107
108 static const struct gen_device_info gen_device_info_i965 = {
109 .gen = 4,
110 .has_negative_rhw_bug = true,
111 .num_slices = 1,
112 .num_subslices = { 1, },
113 .num_eu_per_subslice = 8,
114 .num_thread_per_eu = 4,
115 .max_vs_threads = 16,
116 .max_gs_threads = 2,
117 .max_wm_threads = 8 * 4,
118 .urb = {
119 .size = 256,
120 },
121 .timestamp_frequency = 12500000,
122 .simulator_id = -1,
123 };
124
125 static const struct gen_device_info gen_device_info_g4x = {
126 .gen = 4,
127 .has_pln = true,
128 .has_compr4 = true,
129 .has_surface_tile_offset = true,
130 .is_g4x = true,
131 .num_slices = 1,
132 .num_subslices = { 1, },
133 .num_eu_per_subslice = 10,
134 .num_thread_per_eu = 5,
135 .max_vs_threads = 32,
136 .max_gs_threads = 2,
137 .max_wm_threads = 10 * 5,
138 .urb = {
139 .size = 384,
140 },
141 .timestamp_frequency = 12500000,
142 .simulator_id = -1,
143 };
144
145 static const struct gen_device_info gen_device_info_ilk = {
146 .gen = 5,
147 .has_pln = true,
148 .has_compr4 = true,
149 .has_surface_tile_offset = true,
150 .num_slices = 1,
151 .num_subslices = { 1, },
152 .num_eu_per_subslice = 12,
153 .num_thread_per_eu = 6,
154 .max_vs_threads = 72,
155 .max_gs_threads = 32,
156 .max_wm_threads = 12 * 6,
157 .urb = {
158 .size = 1024,
159 },
160 .timestamp_frequency = 12500000,
161 .simulator_id = -1,
162 };
163
164 static const struct gen_device_info gen_device_info_snb_gt1 = {
165 .gen = 6,
166 .gt = 1,
167 .has_hiz_and_separate_stencil = true,
168 .has_llc = true,
169 .has_pln = true,
170 .has_surface_tile_offset = true,
171 .needs_unlit_centroid_workaround = true,
172 .num_slices = 1,
173 .num_subslices = { 1, },
174 .num_eu_per_subslice = 6,
175 .num_thread_per_eu = 6, /* Not confirmed */
176 .max_vs_threads = 24,
177 .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
178 .max_wm_threads = 40,
179 .urb = {
180 .size = 32,
181 .min_entries = {
182 [MESA_SHADER_VERTEX] = 24,
183 },
184 .max_entries = {
185 [MESA_SHADER_VERTEX] = 256,
186 [MESA_SHADER_GEOMETRY] = 256,
187 },
188 },
189 .timestamp_frequency = 12500000,
190 .simulator_id = -1,
191 };
192
193 static const struct gen_device_info gen_device_info_snb_gt2 = {
194 .gen = 6,
195 .gt = 2,
196 .has_hiz_and_separate_stencil = true,
197 .has_llc = true,
198 .has_pln = true,
199 .has_surface_tile_offset = true,
200 .needs_unlit_centroid_workaround = true,
201 .num_slices = 1,
202 .num_subslices = { 1, },
203 .num_eu_per_subslice = 12,
204 .num_thread_per_eu = 6, /* Not confirmed */
205 .max_vs_threads = 60,
206 .max_gs_threads = 60,
207 .max_wm_threads = 80,
208 .urb = {
209 .size = 64,
210 .min_entries = {
211 [MESA_SHADER_VERTEX] = 24,
212 },
213 .max_entries = {
214 [MESA_SHADER_VERTEX] = 256,
215 [MESA_SHADER_GEOMETRY] = 256,
216 },
217 },
218 .timestamp_frequency = 12500000,
219 .simulator_id = -1,
220 };
221
222 #define GEN7_FEATURES \
223 .gen = 7, \
224 .has_hiz_and_separate_stencil = true, \
225 .must_use_separate_stencil = true, \
226 .has_llc = true, \
227 .has_pln = true, \
228 .has_64bit_float = true, \
229 .has_surface_tile_offset = true, \
230 .timestamp_frequency = 12500000
231
232 static const struct gen_device_info gen_device_info_ivb_gt1 = {
233 GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
234 .num_slices = 1,
235 .num_subslices = { 1, },
236 .num_eu_per_subslice = 6,
237 .num_thread_per_eu = 6,
238 .l3_banks = 2,
239 .max_vs_threads = 36,
240 .max_tcs_threads = 36,
241 .max_tes_threads = 36,
242 .max_gs_threads = 36,
243 .max_wm_threads = 48,
244 .max_cs_threads = 36,
245 .urb = {
246 .size = 128,
247 .min_entries = {
248 [MESA_SHADER_VERTEX] = 32,
249 [MESA_SHADER_TESS_EVAL] = 10,
250 },
251 .max_entries = {
252 [MESA_SHADER_VERTEX] = 512,
253 [MESA_SHADER_TESS_CTRL] = 32,
254 [MESA_SHADER_TESS_EVAL] = 288,
255 [MESA_SHADER_GEOMETRY] = 192,
256 },
257 },
258 .simulator_id = 7,
259 };
260
261 static const struct gen_device_info gen_device_info_ivb_gt2 = {
262 GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
263 .num_slices = 1,
264 .num_subslices = { 1, },
265 .num_eu_per_subslice = 12,
266 .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
267 * @max_wm_threads ... */
268 .l3_banks = 4,
269 .max_vs_threads = 128,
270 .max_tcs_threads = 128,
271 .max_tes_threads = 128,
272 .max_gs_threads = 128,
273 .max_wm_threads = 172,
274 .max_cs_threads = 64,
275 .urb = {
276 .size = 256,
277 .min_entries = {
278 [MESA_SHADER_VERTEX] = 32,
279 [MESA_SHADER_TESS_EVAL] = 10,
280 },
281 .max_entries = {
282 [MESA_SHADER_VERTEX] = 704,
283 [MESA_SHADER_TESS_CTRL] = 64,
284 [MESA_SHADER_TESS_EVAL] = 448,
285 [MESA_SHADER_GEOMETRY] = 320,
286 },
287 },
288 .simulator_id = 7,
289 };
290
291 static const struct gen_device_info gen_device_info_byt = {
292 GEN7_FEATURES, .is_baytrail = true, .gt = 1,
293 .num_slices = 1,
294 .num_subslices = { 1, },
295 .num_eu_per_subslice = 4,
296 .num_thread_per_eu = 8,
297 .l3_banks = 1,
298 .has_llc = false,
299 .max_vs_threads = 36,
300 .max_tcs_threads = 36,
301 .max_tes_threads = 36,
302 .max_gs_threads = 36,
303 .max_wm_threads = 48,
304 .max_cs_threads = 32,
305 .urb = {
306 .size = 128,
307 .min_entries = {
308 [MESA_SHADER_VERTEX] = 32,
309 [MESA_SHADER_TESS_EVAL] = 10,
310 },
311 .max_entries = {
312 [MESA_SHADER_VERTEX] = 512,
313 [MESA_SHADER_TESS_CTRL] = 32,
314 [MESA_SHADER_TESS_EVAL] = 288,
315 [MESA_SHADER_GEOMETRY] = 192,
316 },
317 },
318 .simulator_id = 10,
319 };
320
321 #define HSW_FEATURES \
322 GEN7_FEATURES, \
323 .is_haswell = true, \
324 .supports_simd16_3src = true, \
325 .has_resource_streamer = true
326
327 static const struct gen_device_info gen_device_info_hsw_gt1 = {
328 HSW_FEATURES, .gt = 1,
329 .num_slices = 1,
330 .num_subslices = { 1, },
331 .num_eu_per_subslice = 10,
332 .num_thread_per_eu = 7,
333 .l3_banks = 2,
334 .max_vs_threads = 70,
335 .max_tcs_threads = 70,
336 .max_tes_threads = 70,
337 .max_gs_threads = 70,
338 .max_wm_threads = 102,
339 .max_cs_threads = 70,
340 .urb = {
341 .size = 128,
342 .min_entries = {
343 [MESA_SHADER_VERTEX] = 32,
344 [MESA_SHADER_TESS_EVAL] = 10,
345 },
346 .max_entries = {
347 [MESA_SHADER_VERTEX] = 640,
348 [MESA_SHADER_TESS_CTRL] = 64,
349 [MESA_SHADER_TESS_EVAL] = 384,
350 [MESA_SHADER_GEOMETRY] = 256,
351 },
352 },
353 .simulator_id = 9,
354 };
355
356 static const struct gen_device_info gen_device_info_hsw_gt2 = {
357 HSW_FEATURES, .gt = 2,
358 .num_slices = 1,
359 .num_subslices = { 2, },
360 .num_eu_per_subslice = 10,
361 .num_thread_per_eu = 7,
362 .l3_banks = 4,
363 .max_vs_threads = 280,
364 .max_tcs_threads = 256,
365 .max_tes_threads = 280,
366 .max_gs_threads = 256,
367 .max_wm_threads = 204,
368 .max_cs_threads = 70,
369 .urb = {
370 .size = 256,
371 .min_entries = {
372 [MESA_SHADER_VERTEX] = 64,
373 [MESA_SHADER_TESS_EVAL] = 10,
374 },
375 .max_entries = {
376 [MESA_SHADER_VERTEX] = 1664,
377 [MESA_SHADER_TESS_CTRL] = 128,
378 [MESA_SHADER_TESS_EVAL] = 960,
379 [MESA_SHADER_GEOMETRY] = 640,
380 },
381 },
382 .simulator_id = 9,
383 };
384
385 static const struct gen_device_info gen_device_info_hsw_gt3 = {
386 HSW_FEATURES, .gt = 3,
387 .num_slices = 2,
388 .num_subslices = { 2, },
389 .num_eu_per_subslice = 10,
390 .num_thread_per_eu = 7,
391 .l3_banks = 8,
392 .max_vs_threads = 280,
393 .max_tcs_threads = 256,
394 .max_tes_threads = 280,
395 .max_gs_threads = 256,
396 .max_wm_threads = 408,
397 .max_cs_threads = 70,
398 .urb = {
399 .size = 512,
400 .min_entries = {
401 [MESA_SHADER_VERTEX] = 64,
402 [MESA_SHADER_TESS_EVAL] = 10,
403 },
404 .max_entries = {
405 [MESA_SHADER_VERTEX] = 1664,
406 [MESA_SHADER_TESS_CTRL] = 128,
407 [MESA_SHADER_TESS_EVAL] = 960,
408 [MESA_SHADER_GEOMETRY] = 640,
409 },
410 },
411 .simulator_id = 9,
412 };
413
414 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
415 * so keep things conservative for now and set has_sample_with_hiz = false.
416 */
417 #define GEN8_FEATURES \
418 .gen = 8, \
419 .has_hiz_and_separate_stencil = true, \
420 .has_resource_streamer = true, \
421 .must_use_separate_stencil = true, \
422 .has_llc = true, \
423 .has_sample_with_hiz = false, \
424 .has_pln = true, \
425 .has_integer_dword_mul = true, \
426 .has_64bit_float = true, \
427 .has_64bit_int = true, \
428 .supports_simd16_3src = true, \
429 .has_surface_tile_offset = true, \
430 .num_thread_per_eu = 7, \
431 .max_vs_threads = 504, \
432 .max_tcs_threads = 504, \
433 .max_tes_threads = 504, \
434 .max_gs_threads = 504, \
435 .max_wm_threads = 384, \
436 .timestamp_frequency = 12500000
437
438 static const struct gen_device_info gen_device_info_bdw_gt1 = {
439 GEN8_FEATURES, .gt = 1,
440 .is_broadwell = true,
441 .num_slices = 1,
442 .num_subslices = { 2, },
443 .num_eu_per_subslice = 6,
444 .l3_banks = 2,
445 .max_cs_threads = 42,
446 .urb = {
447 .size = 192,
448 .min_entries = {
449 [MESA_SHADER_VERTEX] = 64,
450 [MESA_SHADER_TESS_EVAL] = 34,
451 },
452 .max_entries = {
453 [MESA_SHADER_VERTEX] = 2560,
454 [MESA_SHADER_TESS_CTRL] = 504,
455 [MESA_SHADER_TESS_EVAL] = 1536,
456 /* Reduced from 960, seems to be similar to the bug on Gen9 GT1. */
457 [MESA_SHADER_GEOMETRY] = 690,
458 },
459 },
460 .simulator_id = 11,
461 };
462
463 static const struct gen_device_info gen_device_info_bdw_gt2 = {
464 GEN8_FEATURES, .gt = 2,
465 .is_broadwell = true,
466 .num_slices = 1,
467 .num_subslices = { 3, },
468 .num_eu_per_subslice = 8,
469 .l3_banks = 4,
470 .max_cs_threads = 56,
471 .urb = {
472 .size = 384,
473 .min_entries = {
474 [MESA_SHADER_VERTEX] = 64,
475 [MESA_SHADER_TESS_EVAL] = 34,
476 },
477 .max_entries = {
478 [MESA_SHADER_VERTEX] = 2560,
479 [MESA_SHADER_TESS_CTRL] = 504,
480 [MESA_SHADER_TESS_EVAL] = 1536,
481 [MESA_SHADER_GEOMETRY] = 960,
482 },
483 },
484 .simulator_id = 11,
485 };
486
487 static const struct gen_device_info gen_device_info_bdw_gt3 = {
488 GEN8_FEATURES, .gt = 3,
489 .is_broadwell = true,
490 .num_slices = 2,
491 .num_subslices = { 3, 3, },
492 .num_eu_per_subslice = 8,
493 .l3_banks = 8,
494 .max_cs_threads = 56,
495 .urb = {
496 .size = 384,
497 .min_entries = {
498 [MESA_SHADER_VERTEX] = 64,
499 [MESA_SHADER_TESS_EVAL] = 34,
500 },
501 .max_entries = {
502 [MESA_SHADER_VERTEX] = 2560,
503 [MESA_SHADER_TESS_CTRL] = 504,
504 [MESA_SHADER_TESS_EVAL] = 1536,
505 [MESA_SHADER_GEOMETRY] = 960,
506 },
507 },
508 .simulator_id = 11,
509 };
510
511 static const struct gen_device_info gen_device_info_chv = {
512 GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
513 .has_llc = false,
514 .has_integer_dword_mul = false,
515 .num_slices = 1,
516 .num_subslices = { 2, },
517 .num_eu_per_subslice = 8,
518 .l3_banks = 2,
519 .max_vs_threads = 80,
520 .max_tcs_threads = 80,
521 .max_tes_threads = 80,
522 .max_gs_threads = 80,
523 .max_wm_threads = 128,
524 .max_cs_threads = 6 * 7,
525 .urb = {
526 .size = 192,
527 .min_entries = {
528 [MESA_SHADER_VERTEX] = 34,
529 [MESA_SHADER_TESS_EVAL] = 34,
530 },
531 .max_entries = {
532 [MESA_SHADER_VERTEX] = 640,
533 [MESA_SHADER_TESS_CTRL] = 80,
534 [MESA_SHADER_TESS_EVAL] = 384,
535 [MESA_SHADER_GEOMETRY] = 256,
536 },
537 },
538 .simulator_id = 13,
539 };
540
541 #define GEN9_HW_INFO \
542 .gen = 9, \
543 .max_vs_threads = 336, \
544 .max_gs_threads = 336, \
545 .max_tcs_threads = 336, \
546 .max_tes_threads = 336, \
547 .max_cs_threads = 56, \
548 .timestamp_frequency = 12000000, \
549 .urb = { \
550 .size = 384, \
551 .min_entries = { \
552 [MESA_SHADER_VERTEX] = 64, \
553 [MESA_SHADER_TESS_EVAL] = 34, \
554 }, \
555 .max_entries = { \
556 [MESA_SHADER_VERTEX] = 1856, \
557 [MESA_SHADER_TESS_CTRL] = 672, \
558 [MESA_SHADER_TESS_EVAL] = 1120, \
559 [MESA_SHADER_GEOMETRY] = 640, \
560 }, \
561 }
562
563 #define GEN9_LP_FEATURES \
564 GEN8_FEATURES, \
565 GEN9_HW_INFO, \
566 .has_integer_dword_mul = false, \
567 .gt = 1, \
568 .has_llc = false, \
569 .has_sample_with_hiz = true, \
570 .num_slices = 1, \
571 .num_thread_per_eu = 6, \
572 .max_vs_threads = 112, \
573 .max_tcs_threads = 112, \
574 .max_tes_threads = 112, \
575 .max_gs_threads = 112, \
576 .max_cs_threads = 6 * 6, \
577 .timestamp_frequency = 19200000, \
578 .urb = { \
579 .size = 192, \
580 .min_entries = { \
581 [MESA_SHADER_VERTEX] = 34, \
582 [MESA_SHADER_TESS_EVAL] = 34, \
583 }, \
584 .max_entries = { \
585 [MESA_SHADER_VERTEX] = 704, \
586 [MESA_SHADER_TESS_CTRL] = 256, \
587 [MESA_SHADER_TESS_EVAL] = 416, \
588 [MESA_SHADER_GEOMETRY] = 256, \
589 }, \
590 }
591
592 #define GEN9_LP_FEATURES_3X6 \
593 GEN9_LP_FEATURES, \
594 .num_subslices = { 3, }, \
595 .num_eu_per_subslice = 6
596
597 #define GEN9_LP_FEATURES_2X6 \
598 GEN9_LP_FEATURES, \
599 .num_subslices = { 2, }, \
600 .num_eu_per_subslice = 6, \
601 .max_vs_threads = 56, \
602 .max_tcs_threads = 56, \
603 .max_tes_threads = 56, \
604 .max_gs_threads = 56, \
605 .max_cs_threads = 6 * 6, \
606 .urb = { \
607 .size = 128, \
608 .min_entries = { \
609 [MESA_SHADER_VERTEX] = 34, \
610 [MESA_SHADER_TESS_EVAL] = 34, \
611 }, \
612 .max_entries = { \
613 [MESA_SHADER_VERTEX] = 352, \
614 [MESA_SHADER_TESS_CTRL] = 128, \
615 [MESA_SHADER_TESS_EVAL] = 208, \
616 [MESA_SHADER_GEOMETRY] = 128, \
617 }, \
618 }
619
620 #define GEN9_FEATURES \
621 GEN8_FEATURES, \
622 GEN9_HW_INFO, \
623 .has_sample_with_hiz = true
624
625 static const struct gen_device_info gen_device_info_skl_gt1 = {
626 GEN9_FEATURES, .gt = 1,
627 .is_skylake = true,
628 .num_slices = 1,
629 .num_subslices = { 2, },
630 .num_eu_per_subslice = 6,
631 .l3_banks = 2,
632 .urb.size = 192,
633 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
634 * leading to some vertices to go missing if we use too much URB.
635 */
636 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
637 .simulator_id = 12,
638 };
639
640 static const struct gen_device_info gen_device_info_skl_gt2 = {
641 GEN9_FEATURES, .gt = 2,
642 .is_skylake = true,
643 .num_slices = 1,
644 .num_subslices = { 3, },
645 .num_eu_per_subslice = 8,
646 .l3_banks = 4,
647 .simulator_id = 12,
648 };
649
650 static const struct gen_device_info gen_device_info_skl_gt3 = {
651 GEN9_FEATURES, .gt = 3,
652 .is_skylake = true,
653 .num_slices = 2,
654 .num_subslices = { 3, 3, },
655 .num_eu_per_subslice = 8,
656 .l3_banks = 8,
657 .simulator_id = 12,
658 };
659
660 static const struct gen_device_info gen_device_info_skl_gt4 = {
661 GEN9_FEATURES, .gt = 4,
662 .is_skylake = true,
663 .num_slices = 3,
664 .num_subslices = { 3, 3, 3, },
665 .num_eu_per_subslice = 8,
666 .l3_banks = 12,
667 /* From the "L3 Allocation and Programming" documentation:
668 *
669 * "URB is limited to 1008KB due to programming restrictions. This is not a
670 * restriction of the L3 implementation, but of the FF and other clients.
671 * Therefore, in a GT4 implementation it is possible for the programmed
672 * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
673 * only 1008KB of this will be used."
674 */
675 .urb.size = 1008 / 3,
676 .simulator_id = 12,
677 };
678
679 static const struct gen_device_info gen_device_info_bxt = {
680 GEN9_LP_FEATURES_3X6,
681 .is_broxton = true,
682 .l3_banks = 2,
683 .simulator_id = 14,
684 };
685
686 static const struct gen_device_info gen_device_info_bxt_2x6 = {
687 GEN9_LP_FEATURES_2X6,
688 .is_broxton = true,
689 .l3_banks = 1,
690 .simulator_id = 14,
691 };
692 /*
693 * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
694 * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
695 */
696
697 static const struct gen_device_info gen_device_info_kbl_gt1 = {
698 GEN9_FEATURES,
699 .is_kabylake = true,
700 .gt = 1,
701
702 .max_cs_threads = 7 * 6,
703 .urb.size = 192,
704 .num_slices = 1,
705 .num_subslices = { 2, },
706 .num_eu_per_subslice = 6,
707 .l3_banks = 2,
708 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
709 * leading to some vertices to go missing if we use too much URB.
710 */
711 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
712 .simulator_id = 16,
713 };
714
715 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
716 GEN9_FEATURES,
717 .is_kabylake = true,
718 .gt = 1,
719
720 .max_cs_threads = 7 * 6,
721 .num_slices = 1,
722 .num_subslices = { 3, },
723 .num_eu_per_subslice = 6,
724 .l3_banks = 4,
725 .simulator_id = 16,
726 };
727
728 static const struct gen_device_info gen_device_info_kbl_gt2 = {
729 GEN9_FEATURES,
730 .is_kabylake = true,
731 .gt = 2,
732
733 .num_slices = 1,
734 .num_subslices = { 3, },
735 .num_eu_per_subslice = 8,
736 .l3_banks = 4,
737 .simulator_id = 16,
738 };
739
740 static const struct gen_device_info gen_device_info_kbl_gt3 = {
741 GEN9_FEATURES,
742 .is_kabylake = true,
743 .gt = 3,
744
745 .num_slices = 2,
746 .num_subslices = { 3, 3, },
747 .num_eu_per_subslice = 8,
748 .l3_banks = 8,
749 .simulator_id = 16,
750 };
751
752 static const struct gen_device_info gen_device_info_kbl_gt4 = {
753 GEN9_FEATURES,
754 .is_kabylake = true,
755 .gt = 4,
756
757 /*
758 * From the "L3 Allocation and Programming" documentation:
759 *
760 * "URB is limited to 1008KB due to programming restrictions. This
761 * is not a restriction of the L3 implementation, but of the FF and
762 * other clients. Therefore, in a GT4 implementation it is
763 * possible for the programmed allocation of the L3 data array to
764 * provide 3*384KB=1152KB for URB, but only 1008KB of this
765 * will be used."
766 */
767 .urb.size = 1008 / 3,
768 .num_slices = 3,
769 .num_subslices = { 3, 3, 3, },
770 .num_eu_per_subslice = 8,
771 .l3_banks = 12,
772 .simulator_id = 16,
773 };
774
775 static const struct gen_device_info gen_device_info_glk = {
776 GEN9_LP_FEATURES_3X6,
777 .is_geminilake = true,
778 .l3_banks = 2,
779 .simulator_id = 17,
780 };
781
782 static const struct gen_device_info gen_device_info_glk_2x6 = {
783 GEN9_LP_FEATURES_2X6,
784 .is_geminilake = true,
785 .l3_banks = 2,
786 .simulator_id = 17,
787 };
788
789 static const struct gen_device_info gen_device_info_cfl_gt1 = {
790 GEN9_FEATURES,
791 .is_coffeelake = true,
792 .gt = 1,
793
794 .num_slices = 1,
795 .num_subslices = { 2, },
796 .num_eu_per_subslice = 6,
797 .l3_banks = 2,
798 .urb.size = 192,
799 /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
800 * leading to some vertices to go missing if we use too much URB.
801 */
802 .urb.max_entries[MESA_SHADER_VERTEX] = 928,
803 .simulator_id = 24,
804 };
805 static const struct gen_device_info gen_device_info_cfl_gt2 = {
806 GEN9_FEATURES,
807 .is_coffeelake = true,
808 .gt = 2,
809
810 .num_slices = 1,
811 .num_subslices = { 3, },
812 .num_eu_per_subslice = 8,
813 .l3_banks = 4,
814 .simulator_id = 24,
815 };
816
817 static const struct gen_device_info gen_device_info_cfl_gt3 = {
818 GEN9_FEATURES,
819 .is_coffeelake = true,
820 .gt = 3,
821
822 .num_slices = 2,
823 .num_subslices = { 3, 3, },
824 .num_eu_per_subslice = 8,
825 .l3_banks = 8,
826 .simulator_id = 24,
827 };
828
829 #define GEN10_HW_INFO \
830 .gen = 10, \
831 .num_thread_per_eu = 7, \
832 .max_vs_threads = 728, \
833 .max_gs_threads = 432, \
834 .max_tcs_threads = 432, \
835 .max_tes_threads = 624, \
836 .max_cs_threads = 56, \
837 .timestamp_frequency = 19200000, \
838 .urb = { \
839 .size = 256, \
840 .min_entries = { \
841 [MESA_SHADER_VERTEX] = 64, \
842 [MESA_SHADER_TESS_EVAL] = 34, \
843 }, \
844 .max_entries = { \
845 [MESA_SHADER_VERTEX] = 3936, \
846 [MESA_SHADER_TESS_CTRL] = 896, \
847 [MESA_SHADER_TESS_EVAL] = 2064, \
848 [MESA_SHADER_GEOMETRY] = 832, \
849 }, \
850 }
851
852 #define subslices(args...) { args, }
853
854 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
855 GEN8_FEATURES, \
856 GEN10_HW_INFO, \
857 .has_sample_with_hiz = true, \
858 .gt = _gt, \
859 .num_slices = _slices, \
860 .num_subslices = _subslices, \
861 .num_eu_per_subslice = 8, \
862 .l3_banks = _l3
863
864 static const struct gen_device_info gen_device_info_cnl_gt0_5 = {
865 /* GT0.5 */
866 GEN10_FEATURES(1, 1, subslices(2), 2),
867 .is_cannonlake = true,
868 .simulator_id = 15,
869 };
870
871 static const struct gen_device_info gen_device_info_cnl_gt1 = {
872 /* GT1 */
873 GEN10_FEATURES(1, 1, subslices(3), 3),
874 .is_cannonlake = true,
875 .simulator_id = 15,
876 };
877
878 static const struct gen_device_info gen_device_info_cnl_gt1_5 = {
879 /* GT 1.5 */
880 GEN10_FEATURES(1, 2, subslices(2, 2), 6),
881 .is_cannonlake = true,
882 .simulator_id = 15,
883 };
884
885 static const struct gen_device_info gen_device_info_cnl_gt2 = {
886 /* GT2 */
887 GEN10_FEATURES(2, 2, subslices(3, 2), 6),
888 .is_cannonlake = true,
889 .simulator_id = 15,
890 };
891
892 #define GEN11_HW_INFO \
893 .gen = 11, \
894 .has_pln = false, \
895 .max_vs_threads = 364, \
896 .max_gs_threads = 224, \
897 .max_tcs_threads = 224, \
898 .max_tes_threads = 364, \
899 .max_cs_threads = 56
900
901 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
902 GEN8_FEATURES, \
903 GEN11_HW_INFO, \
904 .has_64bit_float = false, \
905 .has_64bit_int = false, \
906 .has_integer_dword_mul = false, \
907 .has_sample_with_hiz = false, \
908 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
909 .num_subslices = _subslices, \
910 .num_eu_per_subslice = 8
911
912 #define GEN11_URB_MIN_MAX_ENTRIES \
913 .min_entries = { \
914 [MESA_SHADER_VERTEX] = 64, \
915 [MESA_SHADER_TESS_EVAL] = 34, \
916 }, \
917 .max_entries = { \
918 [MESA_SHADER_VERTEX] = 2384, \
919 [MESA_SHADER_TESS_CTRL] = 1032, \
920 [MESA_SHADER_TESS_EVAL] = 2384, \
921 [MESA_SHADER_GEOMETRY] = 1032, \
922 }
923
924 static const struct gen_device_info gen_device_info_icl_gt2 = {
925 GEN11_FEATURES(2, 1, subslices(8), 8),
926 .urb = {
927 .size = 1024,
928 GEN11_URB_MIN_MAX_ENTRIES,
929 },
930 .simulator_id = 19,
931 };
932
933 static const struct gen_device_info gen_device_info_icl_gt1_5 = {
934 GEN11_FEATURES(1, 1, subslices(6), 6),
935 .urb = {
936 .size = 768,
937 GEN11_URB_MIN_MAX_ENTRIES,
938 },
939 .simulator_id = 19,
940 };
941
942 static const struct gen_device_info gen_device_info_icl_gt1 = {
943 GEN11_FEATURES(1, 1, subslices(4), 6),
944 .urb = {
945 .size = 768,
946 GEN11_URB_MIN_MAX_ENTRIES,
947 },
948 .simulator_id = 19,
949 };
950
951 static const struct gen_device_info gen_device_info_icl_gt0_5 = {
952 GEN11_FEATURES(1, 1, subslices(1), 6),
953 .urb = {
954 .size = 768,
955 GEN11_URB_MIN_MAX_ENTRIES,
956 },
957 .simulator_id = 19,
958 };
959
960 static const struct gen_device_info gen_device_info_ehl_7 = {
961 GEN11_FEATURES(1, 1, subslices(4), 4),
962 .is_elkhartlake = true,
963 .urb = {
964 .size = 512,
965 .min_entries = {
966 [MESA_SHADER_VERTEX] = 64,
967 [MESA_SHADER_TESS_EVAL] = 34,
968 },
969 .max_entries = {
970 [MESA_SHADER_VERTEX] = 2384,
971 [MESA_SHADER_TESS_CTRL] = 1032,
972 [MESA_SHADER_TESS_EVAL] = 2384,
973 [MESA_SHADER_GEOMETRY] = 1032,
974 },
975 },
976 .disable_ccs_repack = true,
977 .simulator_id = 28,
978 };
979
980 static const struct gen_device_info gen_device_info_ehl_6 = {
981 GEN11_FEATURES(1, 1, subslices(4), 4),
982 .is_elkhartlake = true,
983 .urb = {
984 .size = 512,
985 .min_entries = {
986 [MESA_SHADER_VERTEX] = 64,
987 [MESA_SHADER_TESS_EVAL] = 34,
988 },
989 .max_entries = {
990 [MESA_SHADER_VERTEX] = 2384,
991 [MESA_SHADER_TESS_CTRL] = 1032,
992 [MESA_SHADER_TESS_EVAL] = 2384,
993 [MESA_SHADER_GEOMETRY] = 1032,
994 },
995 },
996 .disable_ccs_repack = true,
997 .num_eu_per_subslice = 6,
998 .simulator_id = 28,
999 };
1000
1001 static const struct gen_device_info gen_device_info_ehl_5 = {
1002 GEN11_FEATURES(1, 1, subslices(4), 4),
1003 .is_elkhartlake = true,
1004 .urb = {
1005 .size = 512,
1006 .min_entries = {
1007 [MESA_SHADER_VERTEX] = 64,
1008 [MESA_SHADER_TESS_EVAL] = 34,
1009 },
1010 .max_entries = {
1011 [MESA_SHADER_VERTEX] = 2384,
1012 [MESA_SHADER_TESS_CTRL] = 1032,
1013 [MESA_SHADER_TESS_EVAL] = 2384,
1014 [MESA_SHADER_GEOMETRY] = 1032,
1015 },
1016 },
1017 .disable_ccs_repack = true,
1018 .num_eu_per_subslice = 4,
1019 .simulator_id = 28,
1020 };
1021
1022 static const struct gen_device_info gen_device_info_ehl_4 = {
1023 GEN11_FEATURES(1, 1, subslices(2), 4),
1024 .is_elkhartlake = true,
1025 .urb = {
1026 .size = 512,
1027 .min_entries = {
1028 [MESA_SHADER_VERTEX] = 64,
1029 [MESA_SHADER_TESS_EVAL] = 34,
1030 },
1031 .max_entries = {
1032 [MESA_SHADER_VERTEX] = 2384,
1033 [MESA_SHADER_TESS_CTRL] = 1032,
1034 [MESA_SHADER_TESS_EVAL] = 2384,
1035 [MESA_SHADER_GEOMETRY] = 1032,
1036 },
1037 },
1038 .disable_ccs_repack = true,
1039 .num_eu_per_subslice =4,
1040 .simulator_id = 28,
1041 };
1042
1043 #define GEN12_URB_MIN_MAX_ENTRIES \
1044 .min_entries = { \
1045 [MESA_SHADER_VERTEX] = 64, \
1046 [MESA_SHADER_TESS_EVAL] = 34, \
1047 }, \
1048 .max_entries = { \
1049 [MESA_SHADER_VERTEX] = 3576, \
1050 [MESA_SHADER_TESS_CTRL] = 1548, \
1051 [MESA_SHADER_TESS_EVAL] = 3576, \
1052 [MESA_SHADER_GEOMETRY] = 1548, \
1053 }
1054
1055 #define GEN12_HW_INFO \
1056 .gen = 12, \
1057 .has_pln = false, \
1058 .has_sample_with_hiz = false, \
1059 .has_aux_map = true, \
1060 .max_vs_threads = 546, \
1061 .max_gs_threads = 336, \
1062 .max_tcs_threads = 336, \
1063 .max_tes_threads = 546, \
1064 .max_cs_threads = 112, /* threads per DSS */ \
1065 .urb = { \
1066 GEN12_URB_MIN_MAX_ENTRIES, \
1067 }
1068
1069 #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3) \
1070 GEN8_FEATURES, \
1071 GEN12_HW_INFO, \
1072 .has_64bit_float = false, \
1073 .has_64bit_int = false, \
1074 .has_integer_dword_mul = false, \
1075 .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
1076 .simulator_id = 22, \
1077 .urb.size = (_gt) == 1 ? 512 : 1024, \
1078 .num_subslices = _dual_subslices, \
1079 .num_eu_per_subslice = 16
1080
1081 #define dual_subslices(args...) { args, }
1082
1083 static const struct gen_device_info gen_device_info_tgl_gt1 = {
1084 GEN12_FEATURES(1, 1, dual_subslices(2), 8),
1085 };
1086
1087 static const struct gen_device_info gen_device_info_tgl_gt2 = {
1088 GEN12_FEATURES(2, 1, dual_subslices(6), 8),
1089 };
1090
1091 static void
1092 gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
1093 unsigned slice,
1094 unsigned subslice,
1095 unsigned eu_mask)
1096 {
1097 unsigned subslice_offset = slice * devinfo->eu_slice_stride +
1098 subslice * devinfo->eu_subslice_stride;
1099
1100 for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
1101 devinfo->eu_masks[subslice_offset + b_eu] =
1102 (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
1103 }
1104 }
1105
1106 /* Generate slice/subslice/eu masks from number of
1107 * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
1108 * structure.
1109 *
1110 * These can be overridden with values reported by the kernel either from
1111 * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1112 * through the i915 query uapi.
1113 */
1114 static void
1115 fill_masks(struct gen_device_info *devinfo)
1116 {
1117 devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1118
1119 /* Subslice masks */
1120 unsigned max_subslices = 0;
1121 for (int s = 0; s < devinfo->num_slices; s++)
1122 max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1123 devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1124
1125 for (int s = 0; s < devinfo->num_slices; s++) {
1126 devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1127 (1U << devinfo->num_subslices[s]) - 1;
1128 }
1129
1130 /* EU masks */
1131 devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1132 devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1133
1134 for (int s = 0; s < devinfo->num_slices; s++) {
1135 for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1136 gen_device_info_set_eu_mask(devinfo, s, ss,
1137 (1U << devinfo->num_eu_per_subslice) - 1);
1138 }
1139 }
1140 }
1141
1142 static void
1143 reset_masks(struct gen_device_info *devinfo)
1144 {
1145 devinfo->subslice_slice_stride = 0;
1146 devinfo->eu_subslice_stride = 0;
1147 devinfo->eu_slice_stride = 0;
1148
1149 devinfo->num_slices = 0;
1150 devinfo->num_eu_per_subslice = 0;
1151 memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1152
1153 memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1154 memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1155 memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1156 memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1157 }
1158
1159 static void
1160 update_from_topology(struct gen_device_info *devinfo,
1161 const struct drm_i915_query_topology_info *topology)
1162 {
1163 reset_masks(devinfo);
1164
1165 devinfo->subslice_slice_stride = topology->subslice_stride;
1166
1167 devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1168 devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1169
1170 assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1171 memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1172 devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1173
1174 uint32_t subslice_mask_len =
1175 topology->max_slices * topology->subslice_stride;
1176 assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1177 memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1178 subslice_mask_len);
1179
1180 uint32_t n_subslices = 0;
1181 for (int s = 0; s < topology->max_slices; s++) {
1182 if ((devinfo->slice_masks & (1 << s)) == 0)
1183 continue;
1184
1185 for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1186 devinfo->num_subslices[s] +=
1187 __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1188 }
1189 n_subslices += devinfo->num_subslices[s];
1190 }
1191 assert(n_subslices > 0);
1192
1193 if (devinfo->gen == 11) {
1194 /* On ICL we only have one slice */
1195 assert(devinfo->slice_masks == 1);
1196
1197 /* Count the number of subslices on each pixel pipe. Assume that
1198 * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1.
1199 */
1200 unsigned subslices = devinfo->subslice_masks[0];
1201 unsigned ss = 0;
1202 while (subslices > 0) {
1203 if (subslices & 1)
1204 devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1;
1205 subslices >>= 1;
1206 ss++;
1207 }
1208 }
1209
1210 if (devinfo->gen == 12 && devinfo->num_slices == 1) {
1211 if (n_subslices >= 6) {
1212 assert(n_subslices == 6);
1213 devinfo->l3_banks = 8;
1214 } else if (n_subslices > 2) {
1215 devinfo->l3_banks = 6;
1216 } else {
1217 devinfo->l3_banks = 4;
1218 }
1219 }
1220
1221 uint32_t eu_mask_len =
1222 topology->eu_stride * topology->max_subslices * topology->max_slices;
1223 assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1224 memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1225
1226 uint32_t n_eus = 0;
1227 for (int b = 0; b < eu_mask_len; b++)
1228 n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1229
1230 devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1231 }
1232
1233 static bool
1234 update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask,
1235 uint32_t subslice_mask, uint32_t n_eus)
1236 {
1237 struct drm_i915_query_topology_info *topology;
1238
1239 assert((slice_mask & 0xff) == slice_mask);
1240
1241 size_t data_length = 100;
1242
1243 topology = calloc(1, sizeof(*topology) + data_length);
1244 if (!topology)
1245 return false;
1246
1247 topology->max_slices = util_last_bit(slice_mask);
1248 topology->max_subslices = util_last_bit(subslice_mask);
1249
1250 topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);
1251 topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);
1252
1253 uint32_t n_subslices = __builtin_popcount(slice_mask) *
1254 __builtin_popcount(subslice_mask);
1255 uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1256 uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1257
1258 topology->eu_offset = topology->subslice_offset +
1259 DIV_ROUND_UP(topology->max_subslices, 8);
1260 topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1261
1262 /* Set slice mask in topology */
1263 for (int b = 0; b < topology->subslice_offset; b++)
1264 topology->data[b] = (slice_mask >> (b * 8)) & 0xff;
1265
1266 for (int s = 0; s < topology->max_slices; s++) {
1267
1268 /* Set subslice mask in topology */
1269 for (int b = 0; b < topology->subslice_stride; b++) {
1270 int subslice_offset = topology->subslice_offset +
1271 s * topology->subslice_stride + b;
1272
1273 topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1274 }
1275
1276 /* Set eu mask in topology */
1277 for (int ss = 0; ss < topology->max_subslices; ss++) {
1278 for (int b = 0; b < topology->eu_stride; b++) {
1279 int eu_offset = topology->eu_offset +
1280 (s * topology->max_subslices + ss) * topology->eu_stride + b;
1281
1282 topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1283 }
1284 }
1285 }
1286
1287 update_from_topology(devinfo, topology);
1288 free(topology);
1289
1290 return true;
1291 }
1292
1293 static bool
1294 getparam(int fd, uint32_t param, int *value)
1295 {
1296 int tmp;
1297
1298 struct drm_i915_getparam gp = {
1299 .param = param,
1300 .value = &tmp,
1301 };
1302
1303 int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
1304 if (ret != 0)
1305 return false;
1306
1307 *value = tmp;
1308 return true;
1309 }
1310
1311 bool
1312 gen_get_device_info_from_pci_id(int pci_id,
1313 struct gen_device_info *devinfo)
1314 {
1315 switch (pci_id) {
1316 #undef CHIPSET
1317 #define CHIPSET(id, family, fam_str, name) \
1318 case id: *devinfo = gen_device_info_##family; break;
1319 #include "pci_ids/i965_pci_ids.h"
1320 #include "pci_ids/iris_pci_ids.h"
1321 default:
1322 fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id);
1323 return false;
1324 }
1325
1326 fill_masks(devinfo);
1327
1328 /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1329 *
1330 * "Scratch Space per slice is computed based on 4 sub-slices. SW must
1331 * allocate scratch space enough so that each slice has 4 slices allowed."
1332 *
1333 * The equivalent internal documentation says that this programming note
1334 * applies to all Gen9+ platforms.
1335 *
1336 * The hardware typically calculates the scratch space pointer by taking
1337 * the base address, and adding per-thread-scratch-space * thread ID.
1338 * Extra padding can be necessary depending how the thread IDs are
1339 * calculated for a particular shader stage.
1340 */
1341
1342 switch(devinfo->gen) {
1343 case 9:
1344 case 10:
1345 devinfo->max_wm_threads = 64 /* threads-per-PSD */
1346 * devinfo->num_slices
1347 * 4; /* effective subslices per slice */
1348 break;
1349 case 11:
1350 case 12:
1351 devinfo->max_wm_threads = 128 /* threads-per-PSD */
1352 * devinfo->num_slices
1353 * 8; /* subslices per slice */
1354 break;
1355 default:
1356 assert(devinfo->gen < 9);
1357 break;
1358 }
1359
1360 assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1361
1362 devinfo->chipset_id = pci_id;
1363 return true;
1364 }
1365
1366 const char *
1367 gen_get_device_name(int devid)
1368 {
1369 switch (devid) {
1370 #undef CHIPSET
1371 #define CHIPSET(id, family, fam_str, name) case id: return name " (" fam_str ")"; break;
1372 #include "pci_ids/i965_pci_ids.h"
1373 #include "pci_ids/iris_pci_ids.h"
1374 default:
1375 return NULL;
1376 }
1377 }
1378
1379 /**
1380 * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
1381 * (kernel 4.13+)
1382 */
1383 static bool
1384 getparam_topology(struct gen_device_info *devinfo, int fd)
1385 {
1386 int slice_mask = 0;
1387 if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))
1388 return false;
1389
1390 int n_eus;
1391 if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))
1392 return false;
1393
1394 int subslice_mask = 0;
1395 if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))
1396 return false;
1397
1398 return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
1399 }
1400
1401 /**
1402 * preferred API for updating the topology in devinfo (kernel 4.17+)
1403 */
1404 static bool
1405 query_topology(struct gen_device_info *devinfo, int fd)
1406 {
1407 struct drm_i915_query_item item = {
1408 .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
1409 };
1410 struct drm_i915_query query = {
1411 .num_items = 1,
1412 .items_ptr = (uintptr_t) &item,
1413 };
1414
1415 if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))
1416 return false;
1417
1418 if (item.length < 0)
1419 return false;
1420
1421 struct drm_i915_query_topology_info *topo_info =
1422 (struct drm_i915_query_topology_info *) calloc(1, item.length);
1423 item.data_ptr = (uintptr_t) topo_info;
1424
1425 if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||
1426 item.length <= 0)
1427 return false;
1428
1429 update_from_topology(devinfo, topo_info);
1430
1431 free(topo_info);
1432
1433 return true;
1434
1435 }
1436
1437 bool
1438 gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
1439 {
1440 int devid = get_pci_device_id_override();
1441 if (devid > 0) {
1442 if (!gen_get_device_info_from_pci_id(devid, devinfo))
1443 return false;
1444 devinfo->no_hw = true;
1445 } else {
1446 /* query the device id */
1447 if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))
1448 return false;
1449 if (!gen_get_device_info_from_pci_id(devid, devinfo))
1450 return false;
1451 devinfo->no_hw = false;
1452 }
1453
1454 /* remaining initializion queries the kernel for device info */
1455 if (devinfo->no_hw)
1456 return true;
1457
1458 int timestamp_frequency;
1459 if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,
1460 &timestamp_frequency))
1461 devinfo->timestamp_frequency = timestamp_frequency;
1462 else if (devinfo->gen >= 10)
1463 /* gen10 and later requires the timestamp_frequency to be updated */
1464 return false;
1465
1466 if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
1467 devinfo->revision = 0;
1468
1469 if (!query_topology(devinfo, fd)) {
1470 if (devinfo->gen >= 10) {
1471 /* topology uAPI required for CNL+ (kernel 4.17+) */
1472 return false;
1473 }
1474
1475 /* else use the kernel 4.13+ api for gen8+. For older kernels, topology
1476 * will be wrong, affecting GPU metrics. In this case, fail silently.
1477 */
1478 getparam_topology(devinfo, fd);
1479 }
1480
1481 return true;
1482 }