src/intel/dev/gen_device_info.c

   1 /*
   2  * Copyright © 2013 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 #include "gen_device_info.h"
  31 #include "compiler/shader_enums.h"
  32 #include "intel/common/gen_gem.h"
  33 #include "util/bitscan.h"
  34 #include "util/macros.h"
  35
  36 #include "drm-uapi/i915_drm.h"
  37
  38 /**
  39  * Get the PCI ID for the device name.
  40  *
  41  * Returns -1 if the device is not known.
  42  */
  43 int
  44 gen_device_name_to_pci_device_id(const char *name)
  45 {
  46    static const struct {
  47       const char *name;
  48       int pci_id;
  49    } name_map[] = {
  50       { "brw", 0x2a02 },
  51       { "g4x", 0x2a42 },
  52       { "ilk", 0x0042 },
  53       { "snb", 0x0126 },
  54       { "ivb", 0x016a },
  55       { "hsw", 0x0d2e },
  56       { "byt", 0x0f33 },
  57       { "bdw", 0x162e },
  58       { "chv", 0x22B3 },
  59       { "skl", 0x1912 },
  60       { "bxt", 0x5A85 },
  61       { "kbl", 0x5912 },
  62       { "aml", 0x591C },
  63       { "glk", 0x3185 },
  64       { "cfl", 0x3E9B },
  65       { "whl", 0x3EA1 },
  66       { "cml", 0x9b41 },
  67       { "cnl", 0x5a52 },
  68       { "icl", 0x8a52 },
  69       { "tgl", 0x9a49 },
  70    };
  71
  72    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
  73       if (!strcmp(name_map[i].name, name))
  74          return name_map[i].pci_id;
  75    }
  76
  77    return -1;
  78 }
  79
  80 /**
  81  * Get the overridden PCI ID for the device. This is set with the
  82  * INTEL_DEVID_OVERRIDE environment variable.
  83  *
  84  * Returns -1 if the override is not set.
  85  */
  86 static int
  87 get_pci_device_id_override(void)
  88 {
  89    if (geteuid() == getuid()) {
  90       const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
  91       if (devid_override) {
  92          const int id = gen_device_name_to_pci_device_id(devid_override);
  93          return id >= 0 ? id : strtol(devid_override, NULL, 0);
  94       }
  95    }
  96
  97    return -1;
  98 }
  99
 100 static const struct gen_device_info gen_device_info_i965 = {
 101    .gen = 4,
 102    .has_negative_rhw_bug = true,
 103    .num_slices = 1,
 104    .num_subslices = { 1, },
 105    .num_eu_per_subslice = 8,
 106    .num_thread_per_eu = 4,
 107    .max_vs_threads = 16,
 108    .max_gs_threads = 2,
 109    .max_wm_threads = 8 * 4,
 110    .urb = {
 111       .size = 256,
 112    },
 113    .timestamp_frequency = 12500000,
 114    .simulator_id = -1,
 115 };
 116
 117 static const struct gen_device_info gen_device_info_g4x = {
 118    .gen = 4,
 119    .has_pln = true,
 120    .has_compr4 = true,
 121    .has_surface_tile_offset = true,
 122    .is_g4x = true,
 123    .num_slices = 1,
 124    .num_subslices = { 1, },
 125    .num_eu_per_subslice = 10,
 126    .num_thread_per_eu = 5,
 127    .max_vs_threads = 32,
 128    .max_gs_threads = 2,
 129    .max_wm_threads = 10 * 5,
 130    .urb = {
 131       .size = 384,
 132    },
 133    .timestamp_frequency = 12500000,
 134    .simulator_id = -1,
 135 };
 136
 137 static const struct gen_device_info gen_device_info_ilk = {
 138    .gen = 5,
 139    .has_pln = true,
 140    .has_compr4 = true,
 141    .has_surface_tile_offset = true,
 142    .num_slices = 1,
 143    .num_subslices = { 1, },
 144    .num_eu_per_subslice = 12,
 145    .num_thread_per_eu = 6,
 146    .max_vs_threads = 72,
 147    .max_gs_threads = 32,
 148    .max_wm_threads = 12 * 6,
 149    .urb = {
 150       .size = 1024,
 151    },
 152    .timestamp_frequency = 12500000,
 153    .simulator_id = -1,
 154 };
 155
 156 static const struct gen_device_info gen_device_info_snb_gt1 = {
 157    .gen = 6,
 158    .gt = 1,
 159    .has_hiz_and_separate_stencil = true,
 160    .has_llc = true,
 161    .has_pln = true,
 162    .has_surface_tile_offset = true,
 163    .needs_unlit_centroid_workaround = true,
 164    .num_slices = 1,
 165    .num_subslices = { 1, },
 166    .num_eu_per_subslice = 6,
 167    .num_thread_per_eu = 6, /* Not confirmed */
 168    .max_vs_threads = 24,
 169    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
 170    .max_wm_threads = 40,
 171    .urb = {
 172       .size = 32,
 173       .min_entries = {
 174          [MESA_SHADER_VERTEX]   = 24,
 175       },
 176       .max_entries = {
 177          [MESA_SHADER_VERTEX]   = 256,
 178          [MESA_SHADER_GEOMETRY] = 256,
 179       },
 180    },
 181    .timestamp_frequency = 12500000,
 182    .simulator_id = -1,
 183 };
 184
 185 static const struct gen_device_info gen_device_info_snb_gt2 = {
 186    .gen = 6,
 187    .gt = 2,
 188    .has_hiz_and_separate_stencil = true,
 189    .has_llc = true,
 190    .has_pln = true,
 191    .has_surface_tile_offset = true,
 192    .needs_unlit_centroid_workaround = true,
 193    .num_slices = 1,
 194    .num_subslices = { 1, },
 195    .num_eu_per_subslice = 12,
 196    .num_thread_per_eu = 6, /* Not confirmed */
 197    .max_vs_threads = 60,
 198    .max_gs_threads = 60,
 199    .max_wm_threads = 80,
 200    .urb = {
 201       .size = 64,
 202       .min_entries = {
 203          [MESA_SHADER_VERTEX]   = 24,
 204       },
 205       .max_entries = {
 206          [MESA_SHADER_VERTEX]   = 256,
 207          [MESA_SHADER_GEOMETRY] = 256,
 208       },
 209    },
 210    .timestamp_frequency = 12500000,
 211    .simulator_id = -1,
 212 };
 213
 214 #define GEN7_FEATURES                               \
 215    .gen = 7,                                        \
 216    .has_hiz_and_separate_stencil = true,            \
 217    .must_use_separate_stencil = true,               \
 218    .has_llc = true,                                 \
 219    .has_pln = true,                                 \
 220    .has_64bit_types = true,                         \
 221    .has_surface_tile_offset = true,                 \
 222    .timestamp_frequency = 12500000
 223
 224 static const struct gen_device_info gen_device_info_ivb_gt1 = {
 225    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
 226    .num_slices = 1,
 227    .num_subslices = { 1, },
 228    .num_eu_per_subslice = 6,
 229    .num_thread_per_eu = 6,
 230    .l3_banks = 2,
 231    .max_vs_threads = 36,
 232    .max_tcs_threads = 36,
 233    .max_tes_threads = 36,
 234    .max_gs_threads = 36,
 235    .max_wm_threads = 48,
 236    .max_cs_threads = 36,
 237    .urb = {
 238       .size = 128,
 239       .min_entries = {
 240          [MESA_SHADER_VERTEX]    = 32,
 241          [MESA_SHADER_TESS_EVAL] = 10,
 242       },
 243       .max_entries = {
 244          [MESA_SHADER_VERTEX]    = 512,
 245          [MESA_SHADER_TESS_CTRL] = 32,
 246          [MESA_SHADER_TESS_EVAL] = 288,
 247          [MESA_SHADER_GEOMETRY]  = 192,
 248       },
 249    },
 250    .simulator_id = 7,
 251 };
 252
 253 static const struct gen_device_info gen_device_info_ivb_gt2 = {
 254    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
 255    .num_slices = 1,
 256    .num_subslices = { 1, },
 257    .num_eu_per_subslice = 12,
 258    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
 259                             * @max_wm_threads ... */
 260    .l3_banks = 4,
 261    .max_vs_threads = 128,
 262    .max_tcs_threads = 128,
 263    .max_tes_threads = 128,
 264    .max_gs_threads = 128,
 265    .max_wm_threads = 172,
 266    .max_cs_threads = 64,
 267    .urb = {
 268       .size = 256,
 269       .min_entries = {
 270          [MESA_SHADER_VERTEX]    = 32,
 271          [MESA_SHADER_TESS_EVAL] = 10,
 272       },
 273       .max_entries = {
 274          [MESA_SHADER_VERTEX]    = 704,
 275          [MESA_SHADER_TESS_CTRL] = 64,
 276          [MESA_SHADER_TESS_EVAL] = 448,
 277          [MESA_SHADER_GEOMETRY]  = 320,
 278       },
 279    },
 280    .simulator_id = 7,
 281 };
 282
 283 static const struct gen_device_info gen_device_info_byt = {
 284    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
 285    .num_slices = 1,
 286    .num_subslices = { 1, },
 287    .num_eu_per_subslice = 4,
 288    .num_thread_per_eu = 8,
 289    .l3_banks = 1,
 290    .has_llc = false,
 291    .max_vs_threads = 36,
 292    .max_tcs_threads = 36,
 293    .max_tes_threads = 36,
 294    .max_gs_threads = 36,
 295    .max_wm_threads = 48,
 296    .max_cs_threads = 32,
 297    .urb = {
 298       .size = 128,
 299       .min_entries = {
 300          [MESA_SHADER_VERTEX]    = 32,
 301          [MESA_SHADER_TESS_EVAL] = 10,
 302       },
 303       .max_entries = {
 304          [MESA_SHADER_VERTEX]    = 512,
 305          [MESA_SHADER_TESS_CTRL] = 32,
 306          [MESA_SHADER_TESS_EVAL] = 288,
 307          [MESA_SHADER_GEOMETRY]  = 192,
 308       },
 309    },
 310    .simulator_id = 10,
 311 };
 312
 313 #define HSW_FEATURES             \
 314    GEN7_FEATURES,                \
 315    .is_haswell = true,           \
 316    .supports_simd16_3src = true, \
 317    .has_resource_streamer = true
 318
 319 static const struct gen_device_info gen_device_info_hsw_gt1 = {
 320    HSW_FEATURES, .gt = 1,
 321    .num_slices = 1,
 322    .num_subslices = { 1, },
 323    .num_eu_per_subslice = 10,
 324    .num_thread_per_eu = 7,
 325    .l3_banks = 2,
 326    .max_vs_threads = 70,
 327    .max_tcs_threads = 70,
 328    .max_tes_threads = 70,
 329    .max_gs_threads = 70,
 330    .max_wm_threads = 102,
 331    .max_cs_threads = 70,
 332    .urb = {
 333       .size = 128,
 334       .min_entries = {
 335          [MESA_SHADER_VERTEX]    = 32,
 336          [MESA_SHADER_TESS_EVAL] = 10,
 337       },
 338       .max_entries = {
 339          [MESA_SHADER_VERTEX]    = 640,
 340          [MESA_SHADER_TESS_CTRL] = 64,
 341          [MESA_SHADER_TESS_EVAL] = 384,
 342          [MESA_SHADER_GEOMETRY]  = 256,
 343       },
 344    },
 345    .simulator_id = 9,
 346 };
 347
 348 static const struct gen_device_info gen_device_info_hsw_gt2 = {
 349    HSW_FEATURES, .gt = 2,
 350    .num_slices = 1,
 351    .num_subslices = { 2, },
 352    .num_eu_per_subslice = 10,
 353    .num_thread_per_eu = 7,
 354    .l3_banks = 4,
 355    .max_vs_threads = 280,
 356    .max_tcs_threads = 256,
 357    .max_tes_threads = 280,
 358    .max_gs_threads = 256,
 359    .max_wm_threads = 204,
 360    .max_cs_threads = 70,
 361    .urb = {
 362       .size = 256,
 363       .min_entries = {
 364          [MESA_SHADER_VERTEX]    = 64,
 365          [MESA_SHADER_TESS_EVAL] = 10,
 366       },
 367       .max_entries = {
 368          [MESA_SHADER_VERTEX]    = 1664,
 369          [MESA_SHADER_TESS_CTRL] = 128,
 370          [MESA_SHADER_TESS_EVAL] = 960,
 371          [MESA_SHADER_GEOMETRY]  = 640,
 372       },
 373    },
 374    .simulator_id = 9,
 375 };
 376
 377 static const struct gen_device_info gen_device_info_hsw_gt3 = {
 378    HSW_FEATURES, .gt = 3,
 379    .num_slices = 2,
 380    .num_subslices = { 2, },
 381    .num_eu_per_subslice = 10,
 382    .num_thread_per_eu = 7,
 383    .l3_banks = 8,
 384    .max_vs_threads = 280,
 385    .max_tcs_threads = 256,
 386    .max_tes_threads = 280,
 387    .max_gs_threads = 256,
 388    .max_wm_threads = 408,
 389    .max_cs_threads = 70,
 390    .urb = {
 391       .size = 512,
 392       .min_entries = {
 393          [MESA_SHADER_VERTEX]    = 64,
 394          [MESA_SHADER_TESS_EVAL] = 10,
 395       },
 396       .max_entries = {
 397          [MESA_SHADER_VERTEX]    = 1664,
 398          [MESA_SHADER_TESS_CTRL] = 128,
 399          [MESA_SHADER_TESS_EVAL] = 960,
 400          [MESA_SHADER_GEOMETRY]  = 640,
 401       },
 402    },
 403    .simulator_id = 9,
 404 };
 405
 406 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
 407  * so keep things conservative for now and set has_sample_with_hiz = false.
 408  */
 409 #define GEN8_FEATURES                               \
 410    .gen = 8,                                        \
 411    .has_hiz_and_separate_stencil = true,            \
 412    .has_resource_streamer = true,                   \
 413    .must_use_separate_stencil = true,               \
 414    .has_llc = true,                                 \
 415    .has_sample_with_hiz = false,                    \
 416    .has_pln = true,                                 \
 417    .has_integer_dword_mul = true,                   \
 418    .has_64bit_types = true,                         \
 419    .supports_simd16_3src = true,                    \
 420    .has_surface_tile_offset = true,                 \
 421    .num_thread_per_eu = 7,                          \
 422    .max_vs_threads = 504,                           \
 423    .max_tcs_threads = 504,                          \
 424    .max_tes_threads = 504,                          \
 425    .max_gs_threads = 504,                           \
 426    .max_wm_threads = 384,                           \
 427    .timestamp_frequency = 12500000
 428
 429 static const struct gen_device_info gen_device_info_bdw_gt1 = {
 430    GEN8_FEATURES, .gt = 1,
 431    .is_broadwell = true,
 432    .num_slices = 1,
 433    .num_subslices = { 2, },
 434    .num_eu_per_subslice = 8,
 435    .l3_banks = 2,
 436    .max_cs_threads = 42,
 437    .urb = {
 438       .size = 192,
 439       .min_entries = {
 440          [MESA_SHADER_VERTEX]    = 64,
 441          [MESA_SHADER_TESS_EVAL] = 34,
 442       },
 443       .max_entries = {
 444          [MESA_SHADER_VERTEX]    = 2560,
 445          [MESA_SHADER_TESS_CTRL] = 504,
 446          [MESA_SHADER_TESS_EVAL] = 1536,
 447          [MESA_SHADER_GEOMETRY]  = 960,
 448       },
 449    },
 450    .simulator_id = 11,
 451 };
 452
 453 static const struct gen_device_info gen_device_info_bdw_gt2 = {
 454    GEN8_FEATURES, .gt = 2,
 455    .is_broadwell = true,
 456    .num_slices = 1,
 457    .num_subslices = { 3, },
 458    .num_eu_per_subslice = 8,
 459    .l3_banks = 4,
 460    .max_cs_threads = 56,
 461    .urb = {
 462       .size = 384,
 463       .min_entries = {
 464          [MESA_SHADER_VERTEX]    = 64,
 465          [MESA_SHADER_TESS_EVAL] = 34,
 466       },
 467       .max_entries = {
 468          [MESA_SHADER_VERTEX]    = 2560,
 469          [MESA_SHADER_TESS_CTRL] = 504,
 470          [MESA_SHADER_TESS_EVAL] = 1536,
 471          [MESA_SHADER_GEOMETRY]  = 960,
 472       },
 473    },
 474    .simulator_id = 11,
 475 };
 476
 477 static const struct gen_device_info gen_device_info_bdw_gt3 = {
 478    GEN8_FEATURES, .gt = 3,
 479    .is_broadwell = true,
 480    .num_slices = 2,
 481    .num_subslices = { 3, 3, },
 482    .num_eu_per_subslice = 8,
 483    .l3_banks = 8,
 484    .max_cs_threads = 56,
 485    .urb = {
 486       .size = 384,
 487       .min_entries = {
 488          [MESA_SHADER_VERTEX]    = 64,
 489          [MESA_SHADER_TESS_EVAL] = 34,
 490       },
 491       .max_entries = {
 492          [MESA_SHADER_VERTEX]    = 2560,
 493          [MESA_SHADER_TESS_CTRL] = 504,
 494          [MESA_SHADER_TESS_EVAL] = 1536,
 495          [MESA_SHADER_GEOMETRY]  = 960,
 496       },
 497    },
 498    .simulator_id = 11,
 499 };
 500
 501 static const struct gen_device_info gen_device_info_chv = {
 502    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
 503    .has_llc = false,
 504    .has_integer_dword_mul = false,
 505    .num_slices = 1,
 506    .num_subslices = { 2, },
 507    .num_eu_per_subslice = 8,
 508    .l3_banks = 2,
 509    .max_vs_threads = 80,
 510    .max_tcs_threads = 80,
 511    .max_tes_threads = 80,
 512    .max_gs_threads = 80,
 513    .max_wm_threads = 128,
 514    .max_cs_threads = 6 * 7,
 515    .urb = {
 516       .size = 192,
 517       .min_entries = {
 518          [MESA_SHADER_VERTEX]    = 34,
 519          [MESA_SHADER_TESS_EVAL] = 34,
 520       },
 521       .max_entries = {
 522          [MESA_SHADER_VERTEX]    = 640,
 523          [MESA_SHADER_TESS_CTRL] = 80,
 524          [MESA_SHADER_TESS_EVAL] = 384,
 525          [MESA_SHADER_GEOMETRY]  = 256,
 526       },
 527    },
 528    .simulator_id = 13,
 529 };
 530
 531 #define GEN9_HW_INFO                                \
 532    .gen = 9,                                        \
 533    .max_vs_threads = 336,                           \
 534    .max_gs_threads = 336,                           \
 535    .max_tcs_threads = 336,                          \
 536    .max_tes_threads = 336,                          \
 537    .max_cs_threads = 56,                            \
 538    .timestamp_frequency = 12000000,                 \
 539    .urb = {                                         \
 540       .size = 384,                                  \
 541       .min_entries = {                              \
 542          [MESA_SHADER_VERTEX]    = 64,              \
 543          [MESA_SHADER_TESS_EVAL] = 34,              \
 544       },                                            \
 545       .max_entries = {                              \
 546          [MESA_SHADER_VERTEX]    = 1856,            \
 547          [MESA_SHADER_TESS_CTRL] = 672,             \
 548          [MESA_SHADER_TESS_EVAL] = 1120,            \
 549          [MESA_SHADER_GEOMETRY]  = 640,             \
 550       },                                            \
 551    }
 552
 553 #define GEN9_LP_FEATURES                           \
 554    GEN8_FEATURES,                                  \
 555    GEN9_HW_INFO,                                   \
 556    .has_integer_dword_mul = false,                 \
 557    .gt = 1,                                        \
 558    .has_llc = false,                               \
 559    .has_sample_with_hiz = true,                    \
 560    .num_slices = 1,                                \
 561    .num_thread_per_eu = 6,                         \
 562    .max_vs_threads = 112,                          \
 563    .max_tcs_threads = 112,                         \
 564    .max_tes_threads = 112,                         \
 565    .max_gs_threads = 112,                          \
 566    .max_cs_threads = 6 * 6,                        \
 567    .timestamp_frequency = 19200000,                \
 568    .urb = {                                        \
 569       .size = 192,                                 \
 570       .min_entries = {                             \
 571          [MESA_SHADER_VERTEX]    = 34,             \
 572          [MESA_SHADER_TESS_EVAL] = 34,             \
 573       },                                           \
 574       .max_entries = {                             \
 575          [MESA_SHADER_VERTEX]    = 704,            \
 576          [MESA_SHADER_TESS_CTRL] = 256,            \
 577          [MESA_SHADER_TESS_EVAL] = 416,            \
 578          [MESA_SHADER_GEOMETRY]  = 256,            \
 579       },                                           \
 580    }
 581
 582 #define GEN9_LP_FEATURES_3X6                       \
 583    GEN9_LP_FEATURES,                               \
 584    .num_subslices = { 3, },                        \
 585    .num_eu_per_subslice = 6
 586
 587 #define GEN9_LP_FEATURES_2X6                       \
 588    GEN9_LP_FEATURES,                               \
 589    .num_subslices = { 2, },                        \
 590    .num_eu_per_subslice = 6,                       \
 591    .max_vs_threads = 56,                           \
 592    .max_tcs_threads = 56,                          \
 593    .max_tes_threads = 56,                          \
 594    .max_gs_threads = 56,                           \
 595    .max_cs_threads = 6 * 6,                        \
 596    .urb = {                                        \
 597       .size = 128,                                 \
 598       .min_entries = {                             \
 599          [MESA_SHADER_VERTEX]    = 34,             \
 600          [MESA_SHADER_TESS_EVAL] = 34,             \
 601       },                                           \
 602       .max_entries = {                             \
 603          [MESA_SHADER_VERTEX]    = 352,            \
 604          [MESA_SHADER_TESS_CTRL] = 128,            \
 605          [MESA_SHADER_TESS_EVAL] = 208,            \
 606          [MESA_SHADER_GEOMETRY]  = 128,            \
 607       },                                           \
 608    }
 609
 610 #define GEN9_FEATURES                               \
 611    GEN8_FEATURES,                                   \
 612    GEN9_HW_INFO,                                    \
 613    .has_sample_with_hiz = true
 614
 615 static const struct gen_device_info gen_device_info_skl_gt1 = {
 616    GEN9_FEATURES, .gt = 1,
 617    .is_skylake = true,
 618    .num_slices = 1,
 619    .num_subslices = { 2, },
 620    .num_eu_per_subslice = 6,
 621    .l3_banks = 2,
 622    .urb.size = 192,
 623    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 624     * leading to some vertices to go missing if we use too much URB.
 625     */
 626    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 627    .simulator_id = 12,
 628 };
 629
 630 static const struct gen_device_info gen_device_info_skl_gt2 = {
 631    GEN9_FEATURES, .gt = 2,
 632    .is_skylake = true,
 633    .num_slices = 1,
 634    .num_subslices = { 3, },
 635    .num_eu_per_subslice = 8,
 636    .l3_banks = 4,
 637    .simulator_id = 12,
 638 };
 639
 640 static const struct gen_device_info gen_device_info_skl_gt3 = {
 641    GEN9_FEATURES, .gt = 3,
 642    .is_skylake = true,
 643    .num_slices = 2,
 644    .num_subslices = { 3, 3, },
 645    .num_eu_per_subslice = 8,
 646    .l3_banks = 8,
 647    .simulator_id = 12,
 648 };
 649
 650 static const struct gen_device_info gen_device_info_skl_gt4 = {
 651    GEN9_FEATURES, .gt = 4,
 652    .is_skylake = true,
 653    .num_slices = 3,
 654    .num_subslices = { 3, 3, 3, },
 655    .num_eu_per_subslice = 8,
 656    .l3_banks = 12,
 657    /* From the "L3 Allocation and Programming" documentation:
 658     *
 659     * "URB is limited to 1008KB due to programming restrictions.  This is not a
 660     * restriction of the L3 implementation, but of the FF and other clients.
 661     * Therefore, in a GT4 implementation it is possible for the programmed
 662     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
 663     * only 1008KB of this will be used."
 664     */
 665    .urb.size = 1008 / 3,
 666    .simulator_id = 12,
 667 };
 668
 669 static const struct gen_device_info gen_device_info_bxt = {
 670    GEN9_LP_FEATURES_3X6,
 671    .is_broxton = true,
 672    .l3_banks = 2,
 673    .simulator_id = 14,
 674 };
 675
 676 static const struct gen_device_info gen_device_info_bxt_2x6 = {
 677    GEN9_LP_FEATURES_2X6,
 678    .is_broxton = true,
 679    .l3_banks = 1,
 680    .simulator_id = 14,
 681 };
 682 /*
 683  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
 684  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
 685  */
 686
 687 static const struct gen_device_info gen_device_info_kbl_gt1 = {
 688    GEN9_FEATURES,
 689    .is_kabylake = true,
 690    .gt = 1,
 691
 692    .max_cs_threads = 7 * 6,
 693    .urb.size = 192,
 694    .num_slices = 1,
 695    .num_subslices = { 2, },
 696    .num_eu_per_subslice = 6,
 697    .l3_banks = 2,
 698    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 699     * leading to some vertices to go missing if we use too much URB.
 700     */
 701    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 702    .simulator_id = 16,
 703 };
 704
 705 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
 706    GEN9_FEATURES,
 707    .is_kabylake = true,
 708    .gt = 1,
 709
 710    .max_cs_threads = 7 * 6,
 711    .num_slices = 1,
 712    .num_subslices = { 3, },
 713    .num_eu_per_subslice = 6,
 714    .l3_banks = 4,
 715    .simulator_id = 16,
 716 };
 717
 718 static const struct gen_device_info gen_device_info_kbl_gt2 = {
 719    GEN9_FEATURES,
 720    .is_kabylake = true,
 721    .gt = 2,
 722
 723    .num_slices = 1,
 724    .num_subslices = { 3, },
 725    .num_eu_per_subslice = 8,
 726    .l3_banks = 4,
 727    .simulator_id = 16,
 728 };
 729
 730 static const struct gen_device_info gen_device_info_kbl_gt3 = {
 731    GEN9_FEATURES,
 732    .is_kabylake = true,
 733    .gt = 3,
 734
 735    .num_slices = 2,
 736    .num_subslices = { 3, 3, },
 737    .num_eu_per_subslice = 8,
 738    .l3_banks = 8,
 739    .simulator_id = 16,
 740 };
 741
 742 static const struct gen_device_info gen_device_info_kbl_gt4 = {
 743    GEN9_FEATURES,
 744    .is_kabylake = true,
 745    .gt = 4,
 746
 747    /*
 748     * From the "L3 Allocation and Programming" documentation:
 749     *
 750     * "URB is limited to 1008KB due to programming restrictions.  This
 751     *  is not a restriction of the L3 implementation, but of the FF and
 752     *  other clients.  Therefore, in a GT4 implementation it is
 753     *  possible for the programmed allocation of the L3 data array to
 754     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
 755     *  will be used."
 756     */
 757    .urb.size = 1008 / 3,
 758    .num_slices = 3,
 759    .num_subslices = { 3, 3, 3, },
 760    .num_eu_per_subslice = 8,
 761    .l3_banks = 12,
 762    .simulator_id = 16,
 763 };
 764
 765 static const struct gen_device_info gen_device_info_glk = {
 766    GEN9_LP_FEATURES_3X6,
 767    .is_geminilake = true,
 768    .l3_banks = 2,
 769    .simulator_id = 17,
 770 };
 771
 772 static const struct gen_device_info gen_device_info_glk_2x6 = {
 773    GEN9_LP_FEATURES_2X6,
 774    .is_geminilake = true,
 775    .l3_banks = 2,
 776    .simulator_id = 17,
 777 };
 778
 779 static const struct gen_device_info gen_device_info_cfl_gt1 = {
 780    GEN9_FEATURES,
 781    .is_coffeelake = true,
 782    .gt = 1,
 783
 784    .num_slices = 1,
 785    .num_subslices = { 2, },
 786    .num_eu_per_subslice = 6,
 787    .l3_banks = 2,
 788    .urb.size = 192,
 789    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 790     * leading to some vertices to go missing if we use too much URB.
 791     */
 792    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 793    .simulator_id = 24,
 794 };
 795 static const struct gen_device_info gen_device_info_cfl_gt2 = {
 796    GEN9_FEATURES,
 797    .is_coffeelake = true,
 798    .gt = 2,
 799
 800    .num_slices = 1,
 801    .num_subslices = { 3, },
 802    .num_eu_per_subslice = 8,
 803    .l3_banks = 4,
 804    .simulator_id = 24,
 805 };
 806
 807 static const struct gen_device_info gen_device_info_cfl_gt3 = {
 808    GEN9_FEATURES,
 809    .is_coffeelake = true,
 810    .gt = 3,
 811
 812    .num_slices = 2,
 813    .num_subslices = { 3, 3, },
 814    .num_eu_per_subslice = 8,
 815    .l3_banks = 8,
 816    .simulator_id = 24,
 817 };
 818
 819 #define GEN10_HW_INFO                               \
 820    .gen = 10,                                       \
 821    .num_thread_per_eu = 7,                          \
 822    .max_vs_threads = 728,                           \
 823    .max_gs_threads = 432,                           \
 824    .max_tcs_threads = 432,                          \
 825    .max_tes_threads = 624,                          \
 826    .max_cs_threads = 56,                            \
 827    .timestamp_frequency = 19200000,                 \
 828    .urb = {                                         \
 829       .size = 256,                                  \
 830       .min_entries = {                              \
 831          [MESA_SHADER_VERTEX]    = 64,              \
 832          [MESA_SHADER_TESS_EVAL] = 34,              \
 833       },                                            \
 834       .max_entries = {                              \
 835       [MESA_SHADER_VERTEX]       = 3936,            \
 836       [MESA_SHADER_TESS_CTRL]    = 896,             \
 837       [MESA_SHADER_TESS_EVAL]    = 2064,            \
 838       [MESA_SHADER_GEOMETRY]     = 832,             \
 839       },                                            \
 840    }
 841
 842 #define subslices(args...) { args, }
 843
 844 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
 845    GEN8_FEATURES,                                   \
 846    GEN10_HW_INFO,                                   \
 847    .has_sample_with_hiz = true,                     \
 848    .gt = _gt,                                       \
 849    .num_slices = _slices,                           \
 850    .num_subslices = _subslices,                     \
 851    .num_eu_per_subslice = 8,                        \
 852    .l3_banks = _l3
 853
 854 static const struct gen_device_info gen_device_info_cnl_2x8 = {
 855    /* GT0.5 */
 856    GEN10_FEATURES(1, 1, subslices(2), 2),
 857    .is_cannonlake = true,
 858    .simulator_id = 15,
 859 };
 860
 861 static const struct gen_device_info gen_device_info_cnl_3x8 = {
 862    /* GT1 */
 863    GEN10_FEATURES(1, 1, subslices(3), 3),
 864    .is_cannonlake = true,
 865    .simulator_id = 15,
 866 };
 867
 868 static const struct gen_device_info gen_device_info_cnl_4x8 = {
 869    /* GT 1.5 */
 870    GEN10_FEATURES(1, 2, subslices(2, 2), 6),
 871    .is_cannonlake = true,
 872    .simulator_id = 15,
 873 };
 874
 875 static const struct gen_device_info gen_device_info_cnl_5x8 = {
 876    /* GT2 */
 877    GEN10_FEATURES(2, 2, subslices(3, 2), 6),
 878    .is_cannonlake = true,
 879    .simulator_id = 15,
 880 };
 881
 882 #define GEN11_HW_INFO                               \
 883    .gen = 11,                                       \
 884    .has_pln = false,                                \
 885    .max_vs_threads = 364,                           \
 886    .max_gs_threads = 224,                           \
 887    .max_tcs_threads = 224,                          \
 888    .max_tes_threads = 364,                          \
 889    .max_cs_threads = 56
 890
 891 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
 892    GEN8_FEATURES,                                     \
 893    GEN11_HW_INFO,                                     \
 894    .has_64bit_types = false,                          \
 895    .has_integer_dword_mul = false,                    \
 896    .has_sample_with_hiz = false,                      \
 897    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
 898    .num_subslices = _subslices,                       \
 899    .num_eu_per_subslice = 8
 900
 901 #define GEN11_URB_MIN_MAX_ENTRIES                     \
 902    .min_entries = {                                   \
 903       [MESA_SHADER_VERTEX]    = 64,                   \
 904       [MESA_SHADER_TESS_EVAL] = 34,                   \
 905    },                                                 \
 906    .max_entries = {                                   \
 907       [MESA_SHADER_VERTEX]    = 2384,                 \
 908       [MESA_SHADER_TESS_CTRL] = 1032,                 \
 909       [MESA_SHADER_TESS_EVAL] = 2384,                 \
 910       [MESA_SHADER_GEOMETRY]  = 1032,                 \
 911    }
 912
 913 static const struct gen_device_info gen_device_info_icl_8x8 = {
 914    GEN11_FEATURES(2, 1, subslices(8), 8),
 915    .urb = {
 916       .size = 1024,
 917       GEN11_URB_MIN_MAX_ENTRIES,
 918    },
 919    .simulator_id = 19,
 920 };
 921
 922 static const struct gen_device_info gen_device_info_icl_6x8 = {
 923    GEN11_FEATURES(1, 1, subslices(6), 6),
 924    .urb = {
 925       .size = 768,
 926       GEN11_URB_MIN_MAX_ENTRIES,
 927    },
 928    .simulator_id = 19,
 929 };
 930
 931 static const struct gen_device_info gen_device_info_icl_4x8 = {
 932    GEN11_FEATURES(1, 1, subslices(4), 6),
 933    .urb = {
 934       .size = 768,
 935       GEN11_URB_MIN_MAX_ENTRIES,
 936    },
 937    .simulator_id = 19,
 938 };
 939
 940 static const struct gen_device_info gen_device_info_icl_1x8 = {
 941    GEN11_FEATURES(1, 1, subslices(1), 6),
 942    .urb = {
 943       .size = 768,
 944       GEN11_URB_MIN_MAX_ENTRIES,
 945    },
 946    .simulator_id = 19,
 947 };
 948
 949 static const struct gen_device_info gen_device_info_ehl_4x8 = {
 950    GEN11_FEATURES(1, 1, subslices(4), 4),
 951    .urb = {
 952       .size = 512,
 953       .min_entries = {
 954          [MESA_SHADER_VERTEX]    = 64,
 955          [MESA_SHADER_TESS_EVAL] = 34,
 956       },
 957       .max_entries = {
 958          [MESA_SHADER_VERTEX]    = 2384,
 959          [MESA_SHADER_TESS_CTRL] = 1032,
 960          [MESA_SHADER_TESS_EVAL] = 2384,
 961          [MESA_SHADER_GEOMETRY]  = 1032,
 962       },
 963    },
 964    .disable_ccs_repack = true,
 965    .simulator_id = 28,
 966 };
 967
 968 /* FIXME: Verfiy below entries when more information is available for this SKU.
 969  */
 970 static const struct gen_device_info gen_device_info_ehl_4x4 = {
 971    GEN11_FEATURES(1, 1, subslices(4), 4),
 972    .urb = {
 973       .size = 512,
 974       .min_entries = {
 975          [MESA_SHADER_VERTEX]    = 64,
 976          [MESA_SHADER_TESS_EVAL] = 34,
 977       },
 978       .max_entries = {
 979          [MESA_SHADER_VERTEX]    = 2384,
 980          [MESA_SHADER_TESS_CTRL] = 1032,
 981          [MESA_SHADER_TESS_EVAL] = 2384,
 982          [MESA_SHADER_GEOMETRY]  = 1032,
 983       },
 984    },
 985    .disable_ccs_repack = true,
 986    .num_eu_per_subslice = 4,
 987    .simulator_id = 28,
 988 };
 989
 990 /* FIXME: Verfiy below entries when more information is available for this SKU.
 991  */
 992 static const struct gen_device_info gen_device_info_ehl_2x4 = {
 993    GEN11_FEATURES(1, 1, subslices(2), 4),
 994    .urb = {
 995       .size = 512,
 996       .min_entries = {
 997          [MESA_SHADER_VERTEX]    = 64,
 998          [MESA_SHADER_TESS_EVAL] = 34,
 999       },
1000       .max_entries = {
1001          [MESA_SHADER_VERTEX]    = 2384,
1002          [MESA_SHADER_TESS_CTRL] = 1032,
1003          [MESA_SHADER_TESS_EVAL] = 2384,
1004          [MESA_SHADER_GEOMETRY]  = 1032,
1005       },
1006    },
1007    .disable_ccs_repack = true,
1008    .num_eu_per_subslice =4,
1009    .simulator_id = 28,
1010 };
1011
1012 #define GEN12_URB_MIN_MAX_ENTRIES                   \
1013    .min_entries = {                                 \
1014       [MESA_SHADER_VERTEX]    = 64,                 \
1015       [MESA_SHADER_TESS_EVAL] = 34,                 \
1016    },                                               \
1017    .max_entries = {                                 \
1018       [MESA_SHADER_VERTEX]    = 3576,               \
1019       [MESA_SHADER_TESS_CTRL] = 1548,               \
1020       [MESA_SHADER_TESS_EVAL] = 3576,               \
1021       [MESA_SHADER_GEOMETRY]  = 1548,               \
1022    }
1023
1024 #define GEN12_HW_INFO                               \
1025    .gen = 12,                                       \
1026    .has_pln = false,                                \
1027    .has_sample_with_hiz = false,                    \
1028    .has_aux_map = true,                             \
1029    .max_vs_threads = 546,                           \
1030    .max_gs_threads = 336,                           \
1031    .max_tcs_threads = 336,                          \
1032    .max_tes_threads = 546,                          \
1033    .max_cs_threads = 112, /* threads per DSS */     \
1034    .urb = {                                         \
1035       GEN12_URB_MIN_MAX_ENTRIES,                    \
1036    }
1037
1038 #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3)      \
1039    GEN8_FEATURES,                                               \
1040    GEN12_HW_INFO,                                               \
1041    .has_64bit_types = false,                                    \
1042    .has_integer_dword_mul = false,                              \
1043    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1044    .simulator_id = 22,                                          \
1045    .urb.size = (_gt) == 1 ? 512 : 1024,                         \
1046    .num_subslices = _dual_subslices,                            \
1047    .num_eu_per_subslice = 16
1048
1049 #define dual_subslices(args...) { args, }
1050
1051 static const struct gen_device_info gen_device_info_tgl_1x2x16 = {
1052    GEN12_FEATURES(1, 1, dual_subslices(2), 8),
1053 };
1054
1055 static const struct gen_device_info gen_device_info_tgl_1x6x16 = {
1056    GEN12_FEATURES(2, 1, dual_subslices(6), 8),
1057 };
1058
1059 static void
1060 gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
1061                             unsigned slice,
1062                             unsigned subslice,
1063                             unsigned eu_mask)
1064 {
1065    unsigned subslice_offset = slice * devinfo->eu_slice_stride +
1066       subslice * devinfo->eu_subslice_stride;
1067
1068    for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
1069       devinfo->eu_masks[subslice_offset + b_eu] =
1070          (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
1071    }
1072 }
1073
1074 /* Generate slice/subslice/eu masks from number of
1075  * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
1076  * structure.
1077  *
1078  * These can be overridden with values reported by the kernel either from
1079  * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1080  * through the i915 query uapi.
1081  */
1082 static void
1083 fill_masks(struct gen_device_info *devinfo)
1084 {
1085    devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1086
1087    /* Subslice masks */
1088    unsigned max_subslices = 0;
1089    for (int s = 0; s < devinfo->num_slices; s++)
1090       max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1091    devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1092
1093    for (int s = 0; s < devinfo->num_slices; s++) {
1094       devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1095          (1U << devinfo->num_subslices[s]) - 1;
1096    }
1097
1098    /* EU masks */
1099    devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1100    devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1101
1102    for (int s = 0; s < devinfo->num_slices; s++) {
1103       for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1104          gen_device_info_set_eu_mask(devinfo, s, ss,
1105                                      (1U << devinfo->num_eu_per_subslice) - 1);
1106       }
1107    }
1108 }
1109
1110 static void
1111 reset_masks(struct gen_device_info *devinfo)
1112 {
1113    devinfo->subslice_slice_stride = 0;
1114    devinfo->eu_subslice_stride = 0;
1115    devinfo->eu_slice_stride = 0;
1116
1117    devinfo->num_slices = 0;
1118    devinfo->num_eu_per_subslice = 0;
1119    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1120
1121    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1122    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1123    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1124    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1125 }
1126
1127 static void
1128 update_from_topology(struct gen_device_info *devinfo,
1129                      const struct drm_i915_query_topology_info *topology)
1130 {
1131    reset_masks(devinfo);
1132
1133    devinfo->subslice_slice_stride = topology->subslice_stride;
1134
1135    devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1136    devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1137
1138    assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1139    memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1140    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1141
1142    uint32_t subslice_mask_len =
1143       topology->max_slices * topology->subslice_stride;
1144    assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1145    memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1146           subslice_mask_len);
1147
1148    uint32_t n_subslices = 0;
1149    for (int s = 0; s < topology->max_slices; s++) {
1150       if ((devinfo->slice_masks & (1 << s)) == 0)
1151          continue;
1152
1153       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1154          devinfo->num_subslices[s] +=
1155             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1156       }
1157       n_subslices += devinfo->num_subslices[s];
1158    }
1159    assert(n_subslices > 0);
1160
1161    if (devinfo->gen == 11) {
1162       /* On ICL we only have one slice */
1163       assert(devinfo->slice_masks == 1);
1164
1165       /* Count the number of subslices on each pixel pipe. Assume that
1166        * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1.
1167        */
1168       unsigned subslices = devinfo->subslice_masks[0];
1169       unsigned ss = 0;
1170       while (subslices > 0) {
1171          if (subslices & 1)
1172             devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1;
1173          subslices >>= 1;
1174          ss++;
1175       }
1176    }
1177
1178    uint32_t eu_mask_len =
1179       topology->eu_stride * topology->max_subslices * topology->max_slices;
1180    assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1181    memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1182
1183    uint32_t n_eus = 0;
1184    for (int b = 0; b < eu_mask_len; b++)
1185       n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1186
1187    devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1188 }
1189
1190 static bool
1191 update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask,
1192                   uint32_t subslice_mask, uint32_t n_eus)
1193 {
1194    struct drm_i915_query_topology_info *topology;
1195
1196    assert((slice_mask & 0xff) == slice_mask);
1197
1198    size_t data_length = 100;
1199
1200    topology = calloc(1, sizeof(*topology) + data_length);
1201    if (!topology)
1202       return false;
1203
1204    topology->max_slices = util_last_bit(slice_mask);
1205    topology->max_subslices = util_last_bit(subslice_mask);
1206
1207    topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);
1208    topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);
1209
1210    uint32_t n_subslices = __builtin_popcount(slice_mask) *
1211       __builtin_popcount(subslice_mask);
1212    uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1213    uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1214
1215    topology->eu_offset = topology->subslice_offset +
1216       DIV_ROUND_UP(topology->max_subslices, 8);
1217    topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1218
1219    /* Set slice mask in topology */
1220    for (int b = 0; b < topology->subslice_offset; b++)
1221       topology->data[b] = (slice_mask >> (b * 8)) & 0xff;
1222
1223    for (int s = 0; s < topology->max_slices; s++) {
1224
1225       /* Set subslice mask in topology */
1226       for (int b = 0; b < topology->subslice_stride; b++) {
1227          int subslice_offset = topology->subslice_offset +
1228             s * topology->subslice_stride + b;
1229
1230          topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1231       }
1232
1233       /* Set eu mask in topology */
1234       for (int ss = 0; ss < topology->max_subslices; ss++) {
1235          for (int b = 0; b < topology->eu_stride; b++) {
1236             int eu_offset = topology->eu_offset +
1237                (s * topology->max_subslices + ss) * topology->eu_stride + b;
1238
1239             topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1240          }
1241       }
1242    }
1243
1244    update_from_topology(devinfo, topology);
1245    free(topology);
1246
1247    return true;
1248 }
1249
1250 static bool
1251 getparam(int fd, uint32_t param, int *value)
1252 {
1253    int tmp;
1254
1255    struct drm_i915_getparam gp = {
1256       .param = param,
1257       .value = &tmp,
1258    };
1259
1260    int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
1261    if (ret != 0)
1262       return false;
1263
1264    *value = tmp;
1265    return true;
1266 }
1267
1268 bool
1269 gen_get_device_info_from_pci_id(int pci_id,
1270                                 struct gen_device_info *devinfo)
1271 {
1272    switch (pci_id) {
1273 #undef CHIPSET
1274 #define CHIPSET(id, family, name) \
1275       case id: *devinfo = gen_device_info_##family; break;
1276 #include "pci_ids/i965_pci_ids.h"
1277 #include "pci_ids/iris_pci_ids.h"
1278    default:
1279       fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id);
1280       return false;
1281    }
1282
1283    fill_masks(devinfo);
1284
1285    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1286     *
1287     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1288     *  allocate scratch space enough so that each slice has 4 slices allowed."
1289     *
1290     * The equivalent internal documentation says that this programming note
1291     * applies to all Gen9+ platforms.
1292     *
1293     * The hardware typically calculates the scratch space pointer by taking
1294     * the base address, and adding per-thread-scratch-space * thread ID.
1295     * Extra padding can be necessary depending how the thread IDs are
1296     * calculated for a particular shader stage.
1297     */
1298
1299    switch(devinfo->gen) {
1300    case 9:
1301    case 10:
1302       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1303                               * devinfo->num_slices
1304                               * 4; /* effective subslices per slice */
1305       break;
1306    case 11:
1307    case 12:
1308       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1309                               * devinfo->num_slices
1310                               * 8; /* subslices per slice */
1311       break;
1312    default:
1313       assert(devinfo->gen < 9);
1314       break;
1315    }
1316
1317    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1318
1319    devinfo->chipset_id = pci_id;
1320    return true;
1321 }
1322
1323 const char *
1324 gen_get_device_name(int devid)
1325 {
1326    switch (devid) {
1327 #undef CHIPSET
1328 #define CHIPSET(id, family, name) case id: return name;
1329 #include "pci_ids/i965_pci_ids.h"
1330 #include "pci_ids/iris_pci_ids.h"
1331    default:
1332       return NULL;
1333    }
1334 }
1335
1336 /**
1337  * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
1338  * (kernel 4.13+)
1339  */
1340 static bool
1341 getparam_topology(struct gen_device_info *devinfo, int fd)
1342 {
1343    int slice_mask = 0;
1344    if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))
1345       return false;
1346
1347    int n_eus;
1348    if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))
1349       return false;
1350
1351    int subslice_mask = 0;
1352    if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))
1353       return false;
1354
1355    return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
1356 }
1357
1358 /**
1359  * preferred API for updating the topology in devinfo (kernel 4.17+)
1360  */
1361 static bool
1362 query_topology(struct gen_device_info *devinfo, int fd)
1363 {
1364    struct drm_i915_query_item item = {
1365       .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
1366    };
1367    struct drm_i915_query query = {
1368       .num_items = 1,
1369       .items_ptr = (uintptr_t) &item,
1370    };
1371
1372    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))
1373       return false;
1374
1375    if (item.length < 0)
1376       return false;
1377
1378    struct drm_i915_query_topology_info *topo_info =
1379       (struct drm_i915_query_topology_info *) calloc(1, item.length);
1380    item.data_ptr = (uintptr_t) topo_info;
1381
1382    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||
1383        item.length <= 0)
1384       return false;
1385
1386    update_from_topology(devinfo, topo_info);
1387
1388    free(topo_info);
1389
1390    return true;
1391
1392 }
1393
1394 bool
1395 gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
1396 {
1397    int devid = get_pci_device_id_override();
1398    if (devid > 0) {
1399       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1400          return false;
1401       devinfo->no_hw = true;
1402    } else {
1403       /* query the device id */
1404       if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))
1405          return false;
1406       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1407          return false;
1408       devinfo->no_hw = false;
1409    }
1410
1411    /* remaining initializion queries the kernel for device info */
1412    if (devinfo->no_hw)
1413       return true;
1414
1415    int timestamp_frequency;
1416    if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,
1417                 &timestamp_frequency))
1418       devinfo->timestamp_frequency = timestamp_frequency;
1419    else if (devinfo->gen >= 10)
1420       /* gen10 and later requires the timestamp_frequency to be updated */
1421       return false;
1422
1423    if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
1424        return false;
1425
1426    if (!query_topology(devinfo, fd)) {
1427       if (devinfo->gen >= 10) {
1428          /* topology uAPI required for CNL+ (kernel 4.17+) */
1429          return false;
1430       }
1431
1432       /* else use the kernel 4.13+ api for gen8+.  For older kernels, topology
1433        * will be wrong, affecting GPU metrics. In this case, fail silently.
1434        */
1435       getparam_topology(devinfo, fd);
1436    }
1437
1438    return true;
1439 }