src/intel/dev/gen_device_info.c

   1 /*
   2  * Copyright © 2013 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 #include "gen_device_info.h"
  31 #include "compiler/shader_enums.h"
  32 #include "intel/common/gen_gem.h"
  33 #include "util/bitscan.h"
  34 #include "util/macros.h"
  35
  36 #include "drm-uapi/i915_drm.h"
  37
  38 /**
  39  * Get the PCI ID for the device name.
  40  *
  41  * Returns -1 if the device is not known.
  42  */
  43 int
  44 gen_device_name_to_pci_device_id(const char *name)
  45 {
  46    static const struct {
  47       const char *name;
  48       int pci_id;
  49    } name_map[] = {
  50       { "brw", 0x2a02 },
  51       { "g4x", 0x2a42 },
  52       { "ilk", 0x0042 },
  53       { "snb", 0x0126 },
  54       { "ivb", 0x016a },
  55       { "hsw", 0x0d2e },
  56       { "byt", 0x0f33 },
  57       { "bdw", 0x162e },
  58       { "chv", 0x22B3 },
  59       { "skl", 0x1912 },
  60       { "bxt", 0x5A85 },
  61       { "kbl", 0x5912 },
  62       { "aml", 0x591C },
  63       { "glk", 0x3185 },
  64       { "cfl", 0x3E9B },
  65       { "whl", 0x3EA1 },
  66       { "cml", 0x9b41 },
  67       { "cnl", 0x5a52 },
  68       { "icl", 0x8a52 },
  69       { "ehl", 0x4500 },
  70       { "tgl", 0x9a49 },
  71    };
  72
  73    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
  74       if (!strcmp(name_map[i].name, name))
  75          return name_map[i].pci_id;
  76    }
  77
  78    return -1;
  79 }
  80
  81 /**
  82  * Get the overridden PCI ID for the device. This is set with the
  83  * INTEL_DEVID_OVERRIDE environment variable.
  84  *
  85  * Returns -1 if the override is not set.
  86  */
  87 static int
  88 get_pci_device_id_override(void)
  89 {
  90    if (geteuid() == getuid()) {
  91       const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
  92       if (devid_override) {
  93          const int id = gen_device_name_to_pci_device_id(devid_override);
  94          return id >= 0 ? id : strtol(devid_override, NULL, 0);
  95       }
  96    }
  97
  98    return -1;
  99 }
 100
 101 static const struct gen_device_info gen_device_info_i965 = {
 102    .gen = 4,
 103    .has_negative_rhw_bug = true,
 104    .num_slices = 1,
 105    .num_subslices = { 1, },
 106    .num_eu_per_subslice = 8,
 107    .num_thread_per_eu = 4,
 108    .max_vs_threads = 16,
 109    .max_gs_threads = 2,
 110    .max_wm_threads = 8 * 4,
 111    .urb = {
 112       .size = 256,
 113    },
 114    .timestamp_frequency = 12500000,
 115    .simulator_id = -1,
 116 };
 117
 118 static const struct gen_device_info gen_device_info_g4x = {
 119    .gen = 4,
 120    .has_pln = true,
 121    .has_compr4 = true,
 122    .has_surface_tile_offset = true,
 123    .is_g4x = true,
 124    .num_slices = 1,
 125    .num_subslices = { 1, },
 126    .num_eu_per_subslice = 10,
 127    .num_thread_per_eu = 5,
 128    .max_vs_threads = 32,
 129    .max_gs_threads = 2,
 130    .max_wm_threads = 10 * 5,
 131    .urb = {
 132       .size = 384,
 133    },
 134    .timestamp_frequency = 12500000,
 135    .simulator_id = -1,
 136 };
 137
 138 static const struct gen_device_info gen_device_info_ilk = {
 139    .gen = 5,
 140    .has_pln = true,
 141    .has_compr4 = true,
 142    .has_surface_tile_offset = true,
 143    .num_slices = 1,
 144    .num_subslices = { 1, },
 145    .num_eu_per_subslice = 12,
 146    .num_thread_per_eu = 6,
 147    .max_vs_threads = 72,
 148    .max_gs_threads = 32,
 149    .max_wm_threads = 12 * 6,
 150    .urb = {
 151       .size = 1024,
 152    },
 153    .timestamp_frequency = 12500000,
 154    .simulator_id = -1,
 155 };
 156
 157 static const struct gen_device_info gen_device_info_snb_gt1 = {
 158    .gen = 6,
 159    .gt = 1,
 160    .has_hiz_and_separate_stencil = true,
 161    .has_llc = true,
 162    .has_pln = true,
 163    .has_surface_tile_offset = true,
 164    .needs_unlit_centroid_workaround = true,
 165    .num_slices = 1,
 166    .num_subslices = { 1, },
 167    .num_eu_per_subslice = 6,
 168    .num_thread_per_eu = 6, /* Not confirmed */
 169    .max_vs_threads = 24,
 170    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
 171    .max_wm_threads = 40,
 172    .urb = {
 173       .size = 32,
 174       .min_entries = {
 175          [MESA_SHADER_VERTEX]   = 24,
 176       },
 177       .max_entries = {
 178          [MESA_SHADER_VERTEX]   = 256,
 179          [MESA_SHADER_GEOMETRY] = 256,
 180       },
 181    },
 182    .timestamp_frequency = 12500000,
 183    .simulator_id = -1,
 184 };
 185
 186 static const struct gen_device_info gen_device_info_snb_gt2 = {
 187    .gen = 6,
 188    .gt = 2,
 189    .has_hiz_and_separate_stencil = true,
 190    .has_llc = true,
 191    .has_pln = true,
 192    .has_surface_tile_offset = true,
 193    .needs_unlit_centroid_workaround = true,
 194    .num_slices = 1,
 195    .num_subslices = { 1, },
 196    .num_eu_per_subslice = 12,
 197    .num_thread_per_eu = 6, /* Not confirmed */
 198    .max_vs_threads = 60,
 199    .max_gs_threads = 60,
 200    .max_wm_threads = 80,
 201    .urb = {
 202       .size = 64,
 203       .min_entries = {
 204          [MESA_SHADER_VERTEX]   = 24,
 205       },
 206       .max_entries = {
 207          [MESA_SHADER_VERTEX]   = 256,
 208          [MESA_SHADER_GEOMETRY] = 256,
 209       },
 210    },
 211    .timestamp_frequency = 12500000,
 212    .simulator_id = -1,
 213 };
 214
 215 #define GEN7_FEATURES                               \
 216    .gen = 7,                                        \
 217    .has_hiz_and_separate_stencil = true,            \
 218    .must_use_separate_stencil = true,               \
 219    .has_llc = true,                                 \
 220    .has_pln = true,                                 \
 221    .has_64bit_types = true,                         \
 222    .has_surface_tile_offset = true,                 \
 223    .timestamp_frequency = 12500000
 224
 225 static const struct gen_device_info gen_device_info_ivb_gt1 = {
 226    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
 227    .num_slices = 1,
 228    .num_subslices = { 1, },
 229    .num_eu_per_subslice = 6,
 230    .num_thread_per_eu = 6,
 231    .l3_banks = 2,
 232    .max_vs_threads = 36,
 233    .max_tcs_threads = 36,
 234    .max_tes_threads = 36,
 235    .max_gs_threads = 36,
 236    .max_wm_threads = 48,
 237    .max_cs_threads = 36,
 238    .urb = {
 239       .size = 128,
 240       .min_entries = {
 241          [MESA_SHADER_VERTEX]    = 32,
 242          [MESA_SHADER_TESS_EVAL] = 10,
 243       },
 244       .max_entries = {
 245          [MESA_SHADER_VERTEX]    = 512,
 246          [MESA_SHADER_TESS_CTRL] = 32,
 247          [MESA_SHADER_TESS_EVAL] = 288,
 248          [MESA_SHADER_GEOMETRY]  = 192,
 249       },
 250    },
 251    .simulator_id = 7,
 252 };
 253
 254 static const struct gen_device_info gen_device_info_ivb_gt2 = {
 255    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
 256    .num_slices = 1,
 257    .num_subslices = { 1, },
 258    .num_eu_per_subslice = 12,
 259    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
 260                             * @max_wm_threads ... */
 261    .l3_banks = 4,
 262    .max_vs_threads = 128,
 263    .max_tcs_threads = 128,
 264    .max_tes_threads = 128,
 265    .max_gs_threads = 128,
 266    .max_wm_threads = 172,
 267    .max_cs_threads = 64,
 268    .urb = {
 269       .size = 256,
 270       .min_entries = {
 271          [MESA_SHADER_VERTEX]    = 32,
 272          [MESA_SHADER_TESS_EVAL] = 10,
 273       },
 274       .max_entries = {
 275          [MESA_SHADER_VERTEX]    = 704,
 276          [MESA_SHADER_TESS_CTRL] = 64,
 277          [MESA_SHADER_TESS_EVAL] = 448,
 278          [MESA_SHADER_GEOMETRY]  = 320,
 279       },
 280    },
 281    .simulator_id = 7,
 282 };
 283
 284 static const struct gen_device_info gen_device_info_byt = {
 285    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
 286    .num_slices = 1,
 287    .num_subslices = { 1, },
 288    .num_eu_per_subslice = 4,
 289    .num_thread_per_eu = 8,
 290    .l3_banks = 1,
 291    .has_llc = false,
 292    .max_vs_threads = 36,
 293    .max_tcs_threads = 36,
 294    .max_tes_threads = 36,
 295    .max_gs_threads = 36,
 296    .max_wm_threads = 48,
 297    .max_cs_threads = 32,
 298    .urb = {
 299       .size = 128,
 300       .min_entries = {
 301          [MESA_SHADER_VERTEX]    = 32,
 302          [MESA_SHADER_TESS_EVAL] = 10,
 303       },
 304       .max_entries = {
 305          [MESA_SHADER_VERTEX]    = 512,
 306          [MESA_SHADER_TESS_CTRL] = 32,
 307          [MESA_SHADER_TESS_EVAL] = 288,
 308          [MESA_SHADER_GEOMETRY]  = 192,
 309       },
 310    },
 311    .simulator_id = 10,
 312 };
 313
 314 #define HSW_FEATURES             \
 315    GEN7_FEATURES,                \
 316    .is_haswell = true,           \
 317    .supports_simd16_3src = true, \
 318    .has_resource_streamer = true
 319
 320 static const struct gen_device_info gen_device_info_hsw_gt1 = {
 321    HSW_FEATURES, .gt = 1,
 322    .num_slices = 1,
 323    .num_subslices = { 1, },
 324    .num_eu_per_subslice = 10,
 325    .num_thread_per_eu = 7,
 326    .l3_banks = 2,
 327    .max_vs_threads = 70,
 328    .max_tcs_threads = 70,
 329    .max_tes_threads = 70,
 330    .max_gs_threads = 70,
 331    .max_wm_threads = 102,
 332    .max_cs_threads = 70,
 333    .urb = {
 334       .size = 128,
 335       .min_entries = {
 336          [MESA_SHADER_VERTEX]    = 32,
 337          [MESA_SHADER_TESS_EVAL] = 10,
 338       },
 339       .max_entries = {
 340          [MESA_SHADER_VERTEX]    = 640,
 341          [MESA_SHADER_TESS_CTRL] = 64,
 342          [MESA_SHADER_TESS_EVAL] = 384,
 343          [MESA_SHADER_GEOMETRY]  = 256,
 344       },
 345    },
 346    .simulator_id = 9,
 347 };
 348
 349 static const struct gen_device_info gen_device_info_hsw_gt2 = {
 350    HSW_FEATURES, .gt = 2,
 351    .num_slices = 1,
 352    .num_subslices = { 2, },
 353    .num_eu_per_subslice = 10,
 354    .num_thread_per_eu = 7,
 355    .l3_banks = 4,
 356    .max_vs_threads = 280,
 357    .max_tcs_threads = 256,
 358    .max_tes_threads = 280,
 359    .max_gs_threads = 256,
 360    .max_wm_threads = 204,
 361    .max_cs_threads = 70,
 362    .urb = {
 363       .size = 256,
 364       .min_entries = {
 365          [MESA_SHADER_VERTEX]    = 64,
 366          [MESA_SHADER_TESS_EVAL] = 10,
 367       },
 368       .max_entries = {
 369          [MESA_SHADER_VERTEX]    = 1664,
 370          [MESA_SHADER_TESS_CTRL] = 128,
 371          [MESA_SHADER_TESS_EVAL] = 960,
 372          [MESA_SHADER_GEOMETRY]  = 640,
 373       },
 374    },
 375    .simulator_id = 9,
 376 };
 377
 378 static const struct gen_device_info gen_device_info_hsw_gt3 = {
 379    HSW_FEATURES, .gt = 3,
 380    .num_slices = 2,
 381    .num_subslices = { 2, },
 382    .num_eu_per_subslice = 10,
 383    .num_thread_per_eu = 7,
 384    .l3_banks = 8,
 385    .max_vs_threads = 280,
 386    .max_tcs_threads = 256,
 387    .max_tes_threads = 280,
 388    .max_gs_threads = 256,
 389    .max_wm_threads = 408,
 390    .max_cs_threads = 70,
 391    .urb = {
 392       .size = 512,
 393       .min_entries = {
 394          [MESA_SHADER_VERTEX]    = 64,
 395          [MESA_SHADER_TESS_EVAL] = 10,
 396       },
 397       .max_entries = {
 398          [MESA_SHADER_VERTEX]    = 1664,
 399          [MESA_SHADER_TESS_CTRL] = 128,
 400          [MESA_SHADER_TESS_EVAL] = 960,
 401          [MESA_SHADER_GEOMETRY]  = 640,
 402       },
 403    },
 404    .simulator_id = 9,
 405 };
 406
 407 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
 408  * so keep things conservative for now and set has_sample_with_hiz = false.
 409  */
 410 #define GEN8_FEATURES                               \
 411    .gen = 8,                                        \
 412    .has_hiz_and_separate_stencil = true,            \
 413    .has_resource_streamer = true,                   \
 414    .must_use_separate_stencil = true,               \
 415    .has_llc = true,                                 \
 416    .has_sample_with_hiz = false,                    \
 417    .has_pln = true,                                 \
 418    .has_integer_dword_mul = true,                   \
 419    .has_64bit_types = true,                         \
 420    .supports_simd16_3src = true,                    \
 421    .has_surface_tile_offset = true,                 \
 422    .num_thread_per_eu = 7,                          \
 423    .max_vs_threads = 504,                           \
 424    .max_tcs_threads = 504,                          \
 425    .max_tes_threads = 504,                          \
 426    .max_gs_threads = 504,                           \
 427    .max_wm_threads = 384,                           \
 428    .timestamp_frequency = 12500000
 429
 430 static const struct gen_device_info gen_device_info_bdw_gt1 = {
 431    GEN8_FEATURES, .gt = 1,
 432    .is_broadwell = true,
 433    .num_slices = 1,
 434    .num_subslices = { 2, },
 435    .num_eu_per_subslice = 8,
 436    .l3_banks = 2,
 437    .max_cs_threads = 42,
 438    .urb = {
 439       .size = 192,
 440       .min_entries = {
 441          [MESA_SHADER_VERTEX]    = 64,
 442          [MESA_SHADER_TESS_EVAL] = 34,
 443       },
 444       .max_entries = {
 445          [MESA_SHADER_VERTEX]    = 2560,
 446          [MESA_SHADER_TESS_CTRL] = 504,
 447          [MESA_SHADER_TESS_EVAL] = 1536,
 448          [MESA_SHADER_GEOMETRY]  = 960,
 449       },
 450    },
 451    .simulator_id = 11,
 452 };
 453
 454 static const struct gen_device_info gen_device_info_bdw_gt2 = {
 455    GEN8_FEATURES, .gt = 2,
 456    .is_broadwell = true,
 457    .num_slices = 1,
 458    .num_subslices = { 3, },
 459    .num_eu_per_subslice = 8,
 460    .l3_banks = 4,
 461    .max_cs_threads = 56,
 462    .urb = {
 463       .size = 384,
 464       .min_entries = {
 465          [MESA_SHADER_VERTEX]    = 64,
 466          [MESA_SHADER_TESS_EVAL] = 34,
 467       },
 468       .max_entries = {
 469          [MESA_SHADER_VERTEX]    = 2560,
 470          [MESA_SHADER_TESS_CTRL] = 504,
 471          [MESA_SHADER_TESS_EVAL] = 1536,
 472          [MESA_SHADER_GEOMETRY]  = 960,
 473       },
 474    },
 475    .simulator_id = 11,
 476 };
 477
 478 static const struct gen_device_info gen_device_info_bdw_gt3 = {
 479    GEN8_FEATURES, .gt = 3,
 480    .is_broadwell = true,
 481    .num_slices = 2,
 482    .num_subslices = { 3, 3, },
 483    .num_eu_per_subslice = 8,
 484    .l3_banks = 8,
 485    .max_cs_threads = 56,
 486    .urb = {
 487       .size = 384,
 488       .min_entries = {
 489          [MESA_SHADER_VERTEX]    = 64,
 490          [MESA_SHADER_TESS_EVAL] = 34,
 491       },
 492       .max_entries = {
 493          [MESA_SHADER_VERTEX]    = 2560,
 494          [MESA_SHADER_TESS_CTRL] = 504,
 495          [MESA_SHADER_TESS_EVAL] = 1536,
 496          [MESA_SHADER_GEOMETRY]  = 960,
 497       },
 498    },
 499    .simulator_id = 11,
 500 };
 501
 502 static const struct gen_device_info gen_device_info_chv = {
 503    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
 504    .has_llc = false,
 505    .has_integer_dword_mul = false,
 506    .num_slices = 1,
 507    .num_subslices = { 2, },
 508    .num_eu_per_subslice = 8,
 509    .l3_banks = 2,
 510    .max_vs_threads = 80,
 511    .max_tcs_threads = 80,
 512    .max_tes_threads = 80,
 513    .max_gs_threads = 80,
 514    .max_wm_threads = 128,
 515    .max_cs_threads = 6 * 7,
 516    .urb = {
 517       .size = 192,
 518       .min_entries = {
 519          [MESA_SHADER_VERTEX]    = 34,
 520          [MESA_SHADER_TESS_EVAL] = 34,
 521       },
 522       .max_entries = {
 523          [MESA_SHADER_VERTEX]    = 640,
 524          [MESA_SHADER_TESS_CTRL] = 80,
 525          [MESA_SHADER_TESS_EVAL] = 384,
 526          [MESA_SHADER_GEOMETRY]  = 256,
 527       },
 528    },
 529    .simulator_id = 13,
 530 };
 531
 532 #define GEN9_HW_INFO                                \
 533    .gen = 9,                                        \
 534    .max_vs_threads = 336,                           \
 535    .max_gs_threads = 336,                           \
 536    .max_tcs_threads = 336,                          \
 537    .max_tes_threads = 336,                          \
 538    .max_cs_threads = 56,                            \
 539    .timestamp_frequency = 12000000,                 \
 540    .urb = {                                         \
 541       .size = 384,                                  \
 542       .min_entries = {                              \
 543          [MESA_SHADER_VERTEX]    = 64,              \
 544          [MESA_SHADER_TESS_EVAL] = 34,              \
 545       },                                            \
 546       .max_entries = {                              \
 547          [MESA_SHADER_VERTEX]    = 1856,            \
 548          [MESA_SHADER_TESS_CTRL] = 672,             \
 549          [MESA_SHADER_TESS_EVAL] = 1120,            \
 550          [MESA_SHADER_GEOMETRY]  = 640,             \
 551       },                                            \
 552    }
 553
 554 #define GEN9_LP_FEATURES                           \
 555    GEN8_FEATURES,                                  \
 556    GEN9_HW_INFO,                                   \
 557    .has_integer_dword_mul = false,                 \
 558    .gt = 1,                                        \
 559    .has_llc = false,                               \
 560    .has_sample_with_hiz = true,                    \
 561    .num_slices = 1,                                \
 562    .num_thread_per_eu = 6,                         \
 563    .max_vs_threads = 112,                          \
 564    .max_tcs_threads = 112,                         \
 565    .max_tes_threads = 112,                         \
 566    .max_gs_threads = 112,                          \
 567    .max_cs_threads = 6 * 6,                        \
 568    .timestamp_frequency = 19200000,                \
 569    .urb = {                                        \
 570       .size = 192,                                 \
 571       .min_entries = {                             \
 572          [MESA_SHADER_VERTEX]    = 34,             \
 573          [MESA_SHADER_TESS_EVAL] = 34,             \
 574       },                                           \
 575       .max_entries = {                             \
 576          [MESA_SHADER_VERTEX]    = 704,            \
 577          [MESA_SHADER_TESS_CTRL] = 256,            \
 578          [MESA_SHADER_TESS_EVAL] = 416,            \
 579          [MESA_SHADER_GEOMETRY]  = 256,            \
 580       },                                           \
 581    }
 582
 583 #define GEN9_LP_FEATURES_3X6                       \
 584    GEN9_LP_FEATURES,                               \
 585    .num_subslices = { 3, },                        \
 586    .num_eu_per_subslice = 6
 587
 588 #define GEN9_LP_FEATURES_2X6                       \
 589    GEN9_LP_FEATURES,                               \
 590    .num_subslices = { 2, },                        \
 591    .num_eu_per_subslice = 6,                       \
 592    .max_vs_threads = 56,                           \
 593    .max_tcs_threads = 56,                          \
 594    .max_tes_threads = 56,                          \
 595    .max_gs_threads = 56,                           \
 596    .max_cs_threads = 6 * 6,                        \
 597    .urb = {                                        \
 598       .size = 128,                                 \
 599       .min_entries = {                             \
 600          [MESA_SHADER_VERTEX]    = 34,             \
 601          [MESA_SHADER_TESS_EVAL] = 34,             \
 602       },                                           \
 603       .max_entries = {                             \
 604          [MESA_SHADER_VERTEX]    = 352,            \
 605          [MESA_SHADER_TESS_CTRL] = 128,            \
 606          [MESA_SHADER_TESS_EVAL] = 208,            \
 607          [MESA_SHADER_GEOMETRY]  = 128,            \
 608       },                                           \
 609    }
 610
 611 #define GEN9_FEATURES                               \
 612    GEN8_FEATURES,                                   \
 613    GEN9_HW_INFO,                                    \
 614    .has_sample_with_hiz = true
 615
 616 static const struct gen_device_info gen_device_info_skl_gt1 = {
 617    GEN9_FEATURES, .gt = 1,
 618    .is_skylake = true,
 619    .num_slices = 1,
 620    .num_subslices = { 2, },
 621    .num_eu_per_subslice = 6,
 622    .l3_banks = 2,
 623    .urb.size = 192,
 624    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 625     * leading to some vertices to go missing if we use too much URB.
 626     */
 627    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 628    .simulator_id = 12,
 629 };
 630
 631 static const struct gen_device_info gen_device_info_skl_gt2 = {
 632    GEN9_FEATURES, .gt = 2,
 633    .is_skylake = true,
 634    .num_slices = 1,
 635    .num_subslices = { 3, },
 636    .num_eu_per_subslice = 8,
 637    .l3_banks = 4,
 638    .simulator_id = 12,
 639 };
 640
 641 static const struct gen_device_info gen_device_info_skl_gt3 = {
 642    GEN9_FEATURES, .gt = 3,
 643    .is_skylake = true,
 644    .num_slices = 2,
 645    .num_subslices = { 3, 3, },
 646    .num_eu_per_subslice = 8,
 647    .l3_banks = 8,
 648    .simulator_id = 12,
 649 };
 650
 651 static const struct gen_device_info gen_device_info_skl_gt4 = {
 652    GEN9_FEATURES, .gt = 4,
 653    .is_skylake = true,
 654    .num_slices = 3,
 655    .num_subslices = { 3, 3, 3, },
 656    .num_eu_per_subslice = 8,
 657    .l3_banks = 12,
 658    /* From the "L3 Allocation and Programming" documentation:
 659     *
 660     * "URB is limited to 1008KB due to programming restrictions.  This is not a
 661     * restriction of the L3 implementation, but of the FF and other clients.
 662     * Therefore, in a GT4 implementation it is possible for the programmed
 663     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
 664     * only 1008KB of this will be used."
 665     */
 666    .urb.size = 1008 / 3,
 667    .simulator_id = 12,
 668 };
 669
 670 static const struct gen_device_info gen_device_info_bxt = {
 671    GEN9_LP_FEATURES_3X6,
 672    .is_broxton = true,
 673    .l3_banks = 2,
 674    .simulator_id = 14,
 675 };
 676
 677 static const struct gen_device_info gen_device_info_bxt_2x6 = {
 678    GEN9_LP_FEATURES_2X6,
 679    .is_broxton = true,
 680    .l3_banks = 1,
 681    .simulator_id = 14,
 682 };
 683 /*
 684  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
 685  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
 686  */
 687
 688 static const struct gen_device_info gen_device_info_kbl_gt1 = {
 689    GEN9_FEATURES,
 690    .is_kabylake = true,
 691    .gt = 1,
 692
 693    .max_cs_threads = 7 * 6,
 694    .urb.size = 192,
 695    .num_slices = 1,
 696    .num_subslices = { 2, },
 697    .num_eu_per_subslice = 6,
 698    .l3_banks = 2,
 699    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 700     * leading to some vertices to go missing if we use too much URB.
 701     */
 702    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 703    .simulator_id = 16,
 704 };
 705
 706 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
 707    GEN9_FEATURES,
 708    .is_kabylake = true,
 709    .gt = 1,
 710
 711    .max_cs_threads = 7 * 6,
 712    .num_slices = 1,
 713    .num_subslices = { 3, },
 714    .num_eu_per_subslice = 6,
 715    .l3_banks = 4,
 716    .simulator_id = 16,
 717 };
 718
 719 static const struct gen_device_info gen_device_info_kbl_gt2 = {
 720    GEN9_FEATURES,
 721    .is_kabylake = true,
 722    .gt = 2,
 723
 724    .num_slices = 1,
 725    .num_subslices = { 3, },
 726    .num_eu_per_subslice = 8,
 727    .l3_banks = 4,
 728    .simulator_id = 16,
 729 };
 730
 731 static const struct gen_device_info gen_device_info_kbl_gt3 = {
 732    GEN9_FEATURES,
 733    .is_kabylake = true,
 734    .gt = 3,
 735
 736    .num_slices = 2,
 737    .num_subslices = { 3, 3, },
 738    .num_eu_per_subslice = 8,
 739    .l3_banks = 8,
 740    .simulator_id = 16,
 741 };
 742
 743 static const struct gen_device_info gen_device_info_kbl_gt4 = {
 744    GEN9_FEATURES,
 745    .is_kabylake = true,
 746    .gt = 4,
 747
 748    /*
 749     * From the "L3 Allocation and Programming" documentation:
 750     *
 751     * "URB is limited to 1008KB due to programming restrictions.  This
 752     *  is not a restriction of the L3 implementation, but of the FF and
 753     *  other clients.  Therefore, in a GT4 implementation it is
 754     *  possible for the programmed allocation of the L3 data array to
 755     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
 756     *  will be used."
 757     */
 758    .urb.size = 1008 / 3,
 759    .num_slices = 3,
 760    .num_subslices = { 3, 3, 3, },
 761    .num_eu_per_subslice = 8,
 762    .l3_banks = 12,
 763    .simulator_id = 16,
 764 };
 765
 766 static const struct gen_device_info gen_device_info_glk = {
 767    GEN9_LP_FEATURES_3X6,
 768    .is_geminilake = true,
 769    .l3_banks = 2,
 770    .simulator_id = 17,
 771 };
 772
 773 static const struct gen_device_info gen_device_info_glk_2x6 = {
 774    GEN9_LP_FEATURES_2X6,
 775    .is_geminilake = true,
 776    .l3_banks = 2,
 777    .simulator_id = 17,
 778 };
 779
 780 static const struct gen_device_info gen_device_info_cfl_gt1 = {
 781    GEN9_FEATURES,
 782    .is_coffeelake = true,
 783    .gt = 1,
 784
 785    .num_slices = 1,
 786    .num_subslices = { 2, },
 787    .num_eu_per_subslice = 6,
 788    .l3_banks = 2,
 789    .urb.size = 192,
 790    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 791     * leading to some vertices to go missing if we use too much URB.
 792     */
 793    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 794    .simulator_id = 24,
 795 };
 796 static const struct gen_device_info gen_device_info_cfl_gt2 = {
 797    GEN9_FEATURES,
 798    .is_coffeelake = true,
 799    .gt = 2,
 800
 801    .num_slices = 1,
 802    .num_subslices = { 3, },
 803    .num_eu_per_subslice = 8,
 804    .l3_banks = 4,
 805    .simulator_id = 24,
 806 };
 807
 808 static const struct gen_device_info gen_device_info_cfl_gt3 = {
 809    GEN9_FEATURES,
 810    .is_coffeelake = true,
 811    .gt = 3,
 812
 813    .num_slices = 2,
 814    .num_subslices = { 3, 3, },
 815    .num_eu_per_subslice = 8,
 816    .l3_banks = 8,
 817    .simulator_id = 24,
 818 };
 819
 820 #define GEN10_HW_INFO                               \
 821    .gen = 10,                                       \
 822    .num_thread_per_eu = 7,                          \
 823    .max_vs_threads = 728,                           \
 824    .max_gs_threads = 432,                           \
 825    .max_tcs_threads = 432,                          \
 826    .max_tes_threads = 624,                          \
 827    .max_cs_threads = 56,                            \
 828    .timestamp_frequency = 19200000,                 \
 829    .urb = {                                         \
 830       .size = 256,                                  \
 831       .min_entries = {                              \
 832          [MESA_SHADER_VERTEX]    = 64,              \
 833          [MESA_SHADER_TESS_EVAL] = 34,              \
 834       },                                            \
 835       .max_entries = {                              \
 836       [MESA_SHADER_VERTEX]       = 3936,            \
 837       [MESA_SHADER_TESS_CTRL]    = 896,             \
 838       [MESA_SHADER_TESS_EVAL]    = 2064,            \
 839       [MESA_SHADER_GEOMETRY]     = 832,             \
 840       },                                            \
 841    }
 842
 843 #define subslices(args...) { args, }
 844
 845 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
 846    GEN8_FEATURES,                                   \
 847    GEN10_HW_INFO,                                   \
 848    .has_sample_with_hiz = true,                     \
 849    .gt = _gt,                                       \
 850    .num_slices = _slices,                           \
 851    .num_subslices = _subslices,                     \
 852    .num_eu_per_subslice = 8,                        \
 853    .l3_banks = _l3
 854
 855 static const struct gen_device_info gen_device_info_cnl_2x8 = {
 856    /* GT0.5 */
 857    GEN10_FEATURES(1, 1, subslices(2), 2),
 858    .is_cannonlake = true,
 859    .simulator_id = 15,
 860 };
 861
 862 static const struct gen_device_info gen_device_info_cnl_3x8 = {
 863    /* GT1 */
 864    GEN10_FEATURES(1, 1, subslices(3), 3),
 865    .is_cannonlake = true,
 866    .simulator_id = 15,
 867 };
 868
 869 static const struct gen_device_info gen_device_info_cnl_4x8 = {
 870    /* GT 1.5 */
 871    GEN10_FEATURES(1, 2, subslices(2, 2), 6),
 872    .is_cannonlake = true,
 873    .simulator_id = 15,
 874 };
 875
 876 static const struct gen_device_info gen_device_info_cnl_5x8 = {
 877    /* GT2 */
 878    GEN10_FEATURES(2, 2, subslices(3, 2), 6),
 879    .is_cannonlake = true,
 880    .simulator_id = 15,
 881 };
 882
 883 #define GEN11_HW_INFO                               \
 884    .gen = 11,                                       \
 885    .has_pln = false,                                \
 886    .max_vs_threads = 364,                           \
 887    .max_gs_threads = 224,                           \
 888    .max_tcs_threads = 224,                          \
 889    .max_tes_threads = 364,                          \
 890    .max_cs_threads = 56
 891
 892 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
 893    GEN8_FEATURES,                                     \
 894    GEN11_HW_INFO,                                     \
 895    .has_64bit_types = false,                          \
 896    .has_integer_dword_mul = false,                    \
 897    .has_sample_with_hiz = false,                      \
 898    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
 899    .num_subslices = _subslices,                       \
 900    .num_eu_per_subslice = 8
 901
 902 #define GEN11_URB_MIN_MAX_ENTRIES                     \
 903    .min_entries = {                                   \
 904       [MESA_SHADER_VERTEX]    = 64,                   \
 905       [MESA_SHADER_TESS_EVAL] = 34,                   \
 906    },                                                 \
 907    .max_entries = {                                   \
 908       [MESA_SHADER_VERTEX]    = 2384,                 \
 909       [MESA_SHADER_TESS_CTRL] = 1032,                 \
 910       [MESA_SHADER_TESS_EVAL] = 2384,                 \
 911       [MESA_SHADER_GEOMETRY]  = 1032,                 \
 912    }
 913
 914 static const struct gen_device_info gen_device_info_icl_8x8 = {
 915    GEN11_FEATURES(2, 1, subslices(8), 8),
 916    .urb = {
 917       .size = 1024,
 918       GEN11_URB_MIN_MAX_ENTRIES,
 919    },
 920    .simulator_id = 19,
 921 };
 922
 923 static const struct gen_device_info gen_device_info_icl_6x8 = {
 924    GEN11_FEATURES(1, 1, subslices(6), 6),
 925    .urb = {
 926       .size = 768,
 927       GEN11_URB_MIN_MAX_ENTRIES,
 928    },
 929    .simulator_id = 19,
 930 };
 931
 932 static const struct gen_device_info gen_device_info_icl_4x8 = {
 933    GEN11_FEATURES(1, 1, subslices(4), 6),
 934    .urb = {
 935       .size = 768,
 936       GEN11_URB_MIN_MAX_ENTRIES,
 937    },
 938    .simulator_id = 19,
 939 };
 940
 941 static const struct gen_device_info gen_device_info_icl_1x8 = {
 942    GEN11_FEATURES(1, 1, subslices(1), 6),
 943    .urb = {
 944       .size = 768,
 945       GEN11_URB_MIN_MAX_ENTRIES,
 946    },
 947    .simulator_id = 19,
 948 };
 949
 950 static const struct gen_device_info gen_device_info_ehl_4x8 = {
 951    GEN11_FEATURES(1, 1, subslices(4), 4),
 952    .is_elkhartlake = true,
 953    .urb = {
 954       .size = 512,
 955       .min_entries = {
 956          [MESA_SHADER_VERTEX]    = 64,
 957          [MESA_SHADER_TESS_EVAL] = 34,
 958       },
 959       .max_entries = {
 960          [MESA_SHADER_VERTEX]    = 2384,
 961          [MESA_SHADER_TESS_CTRL] = 1032,
 962          [MESA_SHADER_TESS_EVAL] = 2384,
 963          [MESA_SHADER_GEOMETRY]  = 1032,
 964       },
 965    },
 966    .disable_ccs_repack = true,
 967    .simulator_id = 28,
 968 };
 969
 970 /* FIXME: Verfiy below entries when more information is available for this SKU.
 971  */
 972 static const struct gen_device_info gen_device_info_ehl_4x4 = {
 973    GEN11_FEATURES(1, 1, subslices(4), 4),
 974    .is_elkhartlake = true,
 975    .urb = {
 976       .size = 512,
 977       .min_entries = {
 978          [MESA_SHADER_VERTEX]    = 64,
 979          [MESA_SHADER_TESS_EVAL] = 34,
 980       },
 981       .max_entries = {
 982          [MESA_SHADER_VERTEX]    = 2384,
 983          [MESA_SHADER_TESS_CTRL] = 1032,
 984          [MESA_SHADER_TESS_EVAL] = 2384,
 985          [MESA_SHADER_GEOMETRY]  = 1032,
 986       },
 987    },
 988    .disable_ccs_repack = true,
 989    .num_eu_per_subslice = 4,
 990    .simulator_id = 28,
 991 };
 992
 993 /* FIXME: Verfiy below entries when more information is available for this SKU.
 994  */
 995 static const struct gen_device_info gen_device_info_ehl_2x4 = {
 996    GEN11_FEATURES(1, 1, subslices(2), 4),
 997    .is_elkhartlake = true,
 998    .urb = {
 999       .size = 512,
1000       .min_entries = {
1001          [MESA_SHADER_VERTEX]    = 64,
1002          [MESA_SHADER_TESS_EVAL] = 34,
1003       },
1004       .max_entries = {
1005          [MESA_SHADER_VERTEX]    = 2384,
1006          [MESA_SHADER_TESS_CTRL] = 1032,
1007          [MESA_SHADER_TESS_EVAL] = 2384,
1008          [MESA_SHADER_GEOMETRY]  = 1032,
1009       },
1010    },
1011    .disable_ccs_repack = true,
1012    .num_eu_per_subslice =4,
1013    .simulator_id = 28,
1014 };
1015
1016 #define GEN12_URB_MIN_MAX_ENTRIES                   \
1017    .min_entries = {                                 \
1018       [MESA_SHADER_VERTEX]    = 64,                 \
1019       [MESA_SHADER_TESS_EVAL] = 34,                 \
1020    },                                               \
1021    .max_entries = {                                 \
1022       [MESA_SHADER_VERTEX]    = 3576,               \
1023       [MESA_SHADER_TESS_CTRL] = 1548,               \
1024       [MESA_SHADER_TESS_EVAL] = 3576,               \
1025       [MESA_SHADER_GEOMETRY]  = 1548,               \
1026    }
1027
1028 #define GEN12_HW_INFO                               \
1029    .gen = 12,                                       \
1030    .has_pln = false,                                \
1031    .has_sample_with_hiz = false,                    \
1032    .has_aux_map = true,                             \
1033    .max_vs_threads = 546,                           \
1034    .max_gs_threads = 336,                           \
1035    .max_tcs_threads = 336,                          \
1036    .max_tes_threads = 546,                          \
1037    .max_cs_threads = 112, /* threads per DSS */     \
1038    .urb = {                                         \
1039       GEN12_URB_MIN_MAX_ENTRIES,                    \
1040    }
1041
1042 #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3)      \
1043    GEN8_FEATURES,                                               \
1044    GEN12_HW_INFO,                                               \
1045    .has_64bit_types = false,                                    \
1046    .has_integer_dword_mul = false,                              \
1047    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1048    .simulator_id = 22,                                          \
1049    .urb.size = (_gt) == 1 ? 512 : 1024,                         \
1050    .num_subslices = _dual_subslices,                            \
1051    .num_eu_per_subslice = 16
1052
1053 #define dual_subslices(args...) { args, }
1054
1055 static const struct gen_device_info gen_device_info_tgl_1x2x16 = {
1056    GEN12_FEATURES(1, 1, dual_subslices(2), 8),
1057 };
1058
1059 static const struct gen_device_info gen_device_info_tgl_1x6x16 = {
1060    GEN12_FEATURES(2, 1, dual_subslices(6), 8),
1061 };
1062
1063 static void
1064 gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
1065                             unsigned slice,
1066                             unsigned subslice,
1067                             unsigned eu_mask)
1068 {
1069    unsigned subslice_offset = slice * devinfo->eu_slice_stride +
1070       subslice * devinfo->eu_subslice_stride;
1071
1072    for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
1073       devinfo->eu_masks[subslice_offset + b_eu] =
1074          (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
1075    }
1076 }
1077
1078 /* Generate slice/subslice/eu masks from number of
1079  * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
1080  * structure.
1081  *
1082  * These can be overridden with values reported by the kernel either from
1083  * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1084  * through the i915 query uapi.
1085  */
1086 static void
1087 fill_masks(struct gen_device_info *devinfo)
1088 {
1089    devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1090
1091    /* Subslice masks */
1092    unsigned max_subslices = 0;
1093    for (int s = 0; s < devinfo->num_slices; s++)
1094       max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1095    devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1096
1097    for (int s = 0; s < devinfo->num_slices; s++) {
1098       devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1099          (1U << devinfo->num_subslices[s]) - 1;
1100    }
1101
1102    /* EU masks */
1103    devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1104    devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1105
1106    for (int s = 0; s < devinfo->num_slices; s++) {
1107       for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1108          gen_device_info_set_eu_mask(devinfo, s, ss,
1109                                      (1U << devinfo->num_eu_per_subslice) - 1);
1110       }
1111    }
1112 }
1113
1114 static void
1115 reset_masks(struct gen_device_info *devinfo)
1116 {
1117    devinfo->subslice_slice_stride = 0;
1118    devinfo->eu_subslice_stride = 0;
1119    devinfo->eu_slice_stride = 0;
1120
1121    devinfo->num_slices = 0;
1122    devinfo->num_eu_per_subslice = 0;
1123    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1124
1125    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1126    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1127    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1128    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1129 }
1130
1131 static void
1132 update_from_topology(struct gen_device_info *devinfo,
1133                      const struct drm_i915_query_topology_info *topology)
1134 {
1135    reset_masks(devinfo);
1136
1137    devinfo->subslice_slice_stride = topology->subslice_stride;
1138
1139    devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1140    devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1141
1142    assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1143    memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1144    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1145
1146    uint32_t subslice_mask_len =
1147       topology->max_slices * topology->subslice_stride;
1148    assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1149    memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1150           subslice_mask_len);
1151
1152    uint32_t n_subslices = 0;
1153    for (int s = 0; s < topology->max_slices; s++) {
1154       if ((devinfo->slice_masks & (1 << s)) == 0)
1155          continue;
1156
1157       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1158          devinfo->num_subslices[s] +=
1159             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1160       }
1161       n_subslices += devinfo->num_subslices[s];
1162    }
1163    assert(n_subslices > 0);
1164
1165    if (devinfo->gen == 11) {
1166       /* On ICL we only have one slice */
1167       assert(devinfo->slice_masks == 1);
1168
1169       /* Count the number of subslices on each pixel pipe. Assume that
1170        * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1.
1171        */
1172       unsigned subslices = devinfo->subslice_masks[0];
1173       unsigned ss = 0;
1174       while (subslices > 0) {
1175          if (subslices & 1)
1176             devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1;
1177          subslices >>= 1;
1178          ss++;
1179       }
1180    }
1181
1182    uint32_t eu_mask_len =
1183       topology->eu_stride * topology->max_subslices * topology->max_slices;
1184    assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1185    memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1186
1187    uint32_t n_eus = 0;
1188    for (int b = 0; b < eu_mask_len; b++)
1189       n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1190
1191    devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1192 }
1193
1194 static bool
1195 update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask,
1196                   uint32_t subslice_mask, uint32_t n_eus)
1197 {
1198    struct drm_i915_query_topology_info *topology;
1199
1200    assert((slice_mask & 0xff) == slice_mask);
1201
1202    size_t data_length = 100;
1203
1204    topology = calloc(1, sizeof(*topology) + data_length);
1205    if (!topology)
1206       return false;
1207
1208    topology->max_slices = util_last_bit(slice_mask);
1209    topology->max_subslices = util_last_bit(subslice_mask);
1210
1211    topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);
1212    topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);
1213
1214    uint32_t n_subslices = __builtin_popcount(slice_mask) *
1215       __builtin_popcount(subslice_mask);
1216    uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1217    uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1218
1219    topology->eu_offset = topology->subslice_offset +
1220       DIV_ROUND_UP(topology->max_subslices, 8);
1221    topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1222
1223    /* Set slice mask in topology */
1224    for (int b = 0; b < topology->subslice_offset; b++)
1225       topology->data[b] = (slice_mask >> (b * 8)) & 0xff;
1226
1227    for (int s = 0; s < topology->max_slices; s++) {
1228
1229       /* Set subslice mask in topology */
1230       for (int b = 0; b < topology->subslice_stride; b++) {
1231          int subslice_offset = topology->subslice_offset +
1232             s * topology->subslice_stride + b;
1233
1234          topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1235       }
1236
1237       /* Set eu mask in topology */
1238       for (int ss = 0; ss < topology->max_subslices; ss++) {
1239          for (int b = 0; b < topology->eu_stride; b++) {
1240             int eu_offset = topology->eu_offset +
1241                (s * topology->max_subslices + ss) * topology->eu_stride + b;
1242
1243             topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1244          }
1245       }
1246    }
1247
1248    update_from_topology(devinfo, topology);
1249    free(topology);
1250
1251    return true;
1252 }
1253
1254 static bool
1255 getparam(int fd, uint32_t param, int *value)
1256 {
1257    int tmp;
1258
1259    struct drm_i915_getparam gp = {
1260       .param = param,
1261       .value = &tmp,
1262    };
1263
1264    int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
1265    if (ret != 0)
1266       return false;
1267
1268    *value = tmp;
1269    return true;
1270 }
1271
1272 bool
1273 gen_get_device_info_from_pci_id(int pci_id,
1274                                 struct gen_device_info *devinfo)
1275 {
1276    switch (pci_id) {
1277 #undef CHIPSET
1278 #define CHIPSET(id, family, name) \
1279       case id: *devinfo = gen_device_info_##family; break;
1280 #include "pci_ids/i965_pci_ids.h"
1281 #include "pci_ids/iris_pci_ids.h"
1282    default:
1283       fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id);
1284       return false;
1285    }
1286
1287    fill_masks(devinfo);
1288
1289    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1290     *
1291     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1292     *  allocate scratch space enough so that each slice has 4 slices allowed."
1293     *
1294     * The equivalent internal documentation says that this programming note
1295     * applies to all Gen9+ platforms.
1296     *
1297     * The hardware typically calculates the scratch space pointer by taking
1298     * the base address, and adding per-thread-scratch-space * thread ID.
1299     * Extra padding can be necessary depending how the thread IDs are
1300     * calculated for a particular shader stage.
1301     */
1302
1303    switch(devinfo->gen) {
1304    case 9:
1305    case 10:
1306       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1307                               * devinfo->num_slices
1308                               * 4; /* effective subslices per slice */
1309       break;
1310    case 11:
1311    case 12:
1312       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1313                               * devinfo->num_slices
1314                               * 8; /* subslices per slice */
1315       break;
1316    default:
1317       assert(devinfo->gen < 9);
1318       break;
1319    }
1320
1321    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1322
1323    devinfo->chipset_id = pci_id;
1324    return true;
1325 }
1326
1327 const char *
1328 gen_get_device_name(int devid)
1329 {
1330    switch (devid) {
1331 #undef CHIPSET
1332 #define CHIPSET(id, family, name) case id: return name;
1333 #include "pci_ids/i965_pci_ids.h"
1334 #include "pci_ids/iris_pci_ids.h"
1335    default:
1336       return NULL;
1337    }
1338 }
1339
1340 /**
1341  * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
1342  * (kernel 4.13+)
1343  */
1344 static bool
1345 getparam_topology(struct gen_device_info *devinfo, int fd)
1346 {
1347    int slice_mask = 0;
1348    if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))
1349       return false;
1350
1351    int n_eus;
1352    if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))
1353       return false;
1354
1355    int subslice_mask = 0;
1356    if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))
1357       return false;
1358
1359    return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
1360 }
1361
1362 /**
1363  * preferred API for updating the topology in devinfo (kernel 4.17+)
1364  */
1365 static bool
1366 query_topology(struct gen_device_info *devinfo, int fd)
1367 {
1368    struct drm_i915_query_item item = {
1369       .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
1370    };
1371    struct drm_i915_query query = {
1372       .num_items = 1,
1373       .items_ptr = (uintptr_t) &item,
1374    };
1375
1376    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))
1377       return false;
1378
1379    if (item.length < 0)
1380       return false;
1381
1382    struct drm_i915_query_topology_info *topo_info =
1383       (struct drm_i915_query_topology_info *) calloc(1, item.length);
1384    item.data_ptr = (uintptr_t) topo_info;
1385
1386    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||
1387        item.length <= 0)
1388       return false;
1389
1390    update_from_topology(devinfo, topo_info);
1391
1392    free(topo_info);
1393
1394    return true;
1395
1396 }
1397
1398 bool
1399 gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
1400 {
1401    int devid = get_pci_device_id_override();
1402    if (devid > 0) {
1403       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1404          return false;
1405       devinfo->no_hw = true;
1406    } else {
1407       /* query the device id */
1408       if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))
1409          return false;
1410       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1411          return false;
1412       devinfo->no_hw = false;
1413    }
1414
1415    /* remaining initializion queries the kernel for device info */
1416    if (devinfo->no_hw)
1417       return true;
1418
1419    int timestamp_frequency;
1420    if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,
1421                 &timestamp_frequency))
1422       devinfo->timestamp_frequency = timestamp_frequency;
1423    else if (devinfo->gen >= 10)
1424       /* gen10 and later requires the timestamp_frequency to be updated */
1425       return false;
1426
1427    if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
1428        return false;
1429
1430    if (!query_topology(devinfo, fd)) {
1431       if (devinfo->gen >= 10) {
1432          /* topology uAPI required for CNL+ (kernel 4.17+) */
1433          return false;
1434       }
1435
1436       /* else use the kernel 4.13+ api for gen8+.  For older kernels, topology
1437        * will be wrong, affecting GPU metrics. In this case, fail silently.
1438        */
1439       getparam_topology(devinfo, fd);
1440    }
1441
1442    return true;
1443 }