src/intel/dev/gen_device_info.c

   1 /*
   2  * Copyright © 2013 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include <assert.h>
  25 #include <stdbool.h>
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 #include "gen_device_info.h"
  31 #include "compiler/shader_enums.h"
  32 #include "intel/common/gen_gem.h"
  33 #include "util/bitscan.h"
  34 #include "util/macros.h"
  35
  36 #include "drm-uapi/i915_drm.h"
  37
  38 /**
  39  * Get the PCI ID for the device name.
  40  *
  41  * Returns -1 if the device is not known.
  42  */
  43 int
  44 gen_device_name_to_pci_device_id(const char *name)
  45 {
  46    static const struct {
  47       const char *name;
  48       int pci_id;
  49    } name_map[] = {
  50       { "brw", 0x2a02 },
  51       { "g4x", 0x2a42 },
  52       { "ilk", 0x0042 },
  53       { "snb", 0x0126 },
  54       { "ivb", 0x016a },
  55       { "hsw", 0x0d2e },
  56       { "byt", 0x0f33 },
  57       { "bdw", 0x162e },
  58       { "chv", 0x22B3 },
  59       { "skl", 0x1912 },
  60       { "bxt", 0x5A85 },
  61       { "kbl", 0x5912 },
  62       { "aml", 0x591C },
  63       { "glk", 0x3185 },
  64       { "cfl", 0x3E9B },
  65       { "whl", 0x3EA1 },
  66       { "cml", 0x9b41 },
  67       { "cnl", 0x5a52 },
  68       { "icl", 0x8a52 },
  69       { "ehl", 0x4500 },
  70       { "jsl", 0x4E71 },
  71       { "tgl", 0x9a49 },
  72    };
  73
  74    for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) {
  75       if (!strcmp(name_map[i].name, name))
  76          return name_map[i].pci_id;
  77    }
  78
  79    return -1;
  80 }
  81
  82 /**
  83  * Get the overridden PCI ID for the device. This is set with the
  84  * INTEL_DEVID_OVERRIDE environment variable.
  85  *
  86  * Returns -1 if the override is not set.
  87  */
  88 static int
  89 get_pci_device_id_override(void)
  90 {
  91    if (geteuid() == getuid()) {
  92       const char *devid_override = getenv("INTEL_DEVID_OVERRIDE");
  93       if (devid_override) {
  94          const int id = gen_device_name_to_pci_device_id(devid_override);
  95          return id >= 0 ? id : strtol(devid_override, NULL, 0);
  96       }
  97    }
  98
  99    return -1;
 100 }
 101
 102 static const struct gen_device_info gen_device_info_i965 = {
 103    .gen = 4,
 104    .has_negative_rhw_bug = true,
 105    .num_slices = 1,
 106    .num_subslices = { 1, },
 107    .num_eu_per_subslice = 8,
 108    .num_thread_per_eu = 4,
 109    .max_vs_threads = 16,
 110    .max_gs_threads = 2,
 111    .max_wm_threads = 8 * 4,
 112    .urb = {
 113       .size = 256,
 114    },
 115    .timestamp_frequency = 12500000,
 116    .simulator_id = -1,
 117 };
 118
 119 static const struct gen_device_info gen_device_info_g4x = {
 120    .gen = 4,
 121    .has_pln = true,
 122    .has_compr4 = true,
 123    .has_surface_tile_offset = true,
 124    .is_g4x = true,
 125    .num_slices = 1,
 126    .num_subslices = { 1, },
 127    .num_eu_per_subslice = 10,
 128    .num_thread_per_eu = 5,
 129    .max_vs_threads = 32,
 130    .max_gs_threads = 2,
 131    .max_wm_threads = 10 * 5,
 132    .urb = {
 133       .size = 384,
 134    },
 135    .timestamp_frequency = 12500000,
 136    .simulator_id = -1,
 137 };
 138
 139 static const struct gen_device_info gen_device_info_ilk = {
 140    .gen = 5,
 141    .has_pln = true,
 142    .has_compr4 = true,
 143    .has_surface_tile_offset = true,
 144    .num_slices = 1,
 145    .num_subslices = { 1, },
 146    .num_eu_per_subslice = 12,
 147    .num_thread_per_eu = 6,
 148    .max_vs_threads = 72,
 149    .max_gs_threads = 32,
 150    .max_wm_threads = 12 * 6,
 151    .urb = {
 152       .size = 1024,
 153    },
 154    .timestamp_frequency = 12500000,
 155    .simulator_id = -1,
 156 };
 157
 158 static const struct gen_device_info gen_device_info_snb_gt1 = {
 159    .gen = 6,
 160    .gt = 1,
 161    .has_hiz_and_separate_stencil = true,
 162    .has_llc = true,
 163    .has_pln = true,
 164    .has_surface_tile_offset = true,
 165    .needs_unlit_centroid_workaround = true,
 166    .num_slices = 1,
 167    .num_subslices = { 1, },
 168    .num_eu_per_subslice = 6,
 169    .num_thread_per_eu = 6, /* Not confirmed */
 170    .max_vs_threads = 24,
 171    .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */
 172    .max_wm_threads = 40,
 173    .urb = {
 174       .size = 32,
 175       .min_entries = {
 176          [MESA_SHADER_VERTEX]   = 24,
 177       },
 178       .max_entries = {
 179          [MESA_SHADER_VERTEX]   = 256,
 180          [MESA_SHADER_GEOMETRY] = 256,
 181       },
 182    },
 183    .timestamp_frequency = 12500000,
 184    .simulator_id = -1,
 185 };
 186
 187 static const struct gen_device_info gen_device_info_snb_gt2 = {
 188    .gen = 6,
 189    .gt = 2,
 190    .has_hiz_and_separate_stencil = true,
 191    .has_llc = true,
 192    .has_pln = true,
 193    .has_surface_tile_offset = true,
 194    .needs_unlit_centroid_workaround = true,
 195    .num_slices = 1,
 196    .num_subslices = { 1, },
 197    .num_eu_per_subslice = 12,
 198    .num_thread_per_eu = 6, /* Not confirmed */
 199    .max_vs_threads = 60,
 200    .max_gs_threads = 60,
 201    .max_wm_threads = 80,
 202    .urb = {
 203       .size = 64,
 204       .min_entries = {
 205          [MESA_SHADER_VERTEX]   = 24,
 206       },
 207       .max_entries = {
 208          [MESA_SHADER_VERTEX]   = 256,
 209          [MESA_SHADER_GEOMETRY] = 256,
 210       },
 211    },
 212    .timestamp_frequency = 12500000,
 213    .simulator_id = -1,
 214 };
 215
 216 #define GEN7_FEATURES                               \
 217    .gen = 7,                                        \
 218    .has_hiz_and_separate_stencil = true,            \
 219    .must_use_separate_stencil = true,               \
 220    .has_llc = true,                                 \
 221    .has_pln = true,                                 \
 222    .has_64bit_float = true,                         \
 223    .has_surface_tile_offset = true,                 \
 224    .timestamp_frequency = 12500000
 225
 226 static const struct gen_device_info gen_device_info_ivb_gt1 = {
 227    GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
 228    .num_slices = 1,
 229    .num_subslices = { 1, },
 230    .num_eu_per_subslice = 6,
 231    .num_thread_per_eu = 6,
 232    .l3_banks = 2,
 233    .max_vs_threads = 36,
 234    .max_tcs_threads = 36,
 235    .max_tes_threads = 36,
 236    .max_gs_threads = 36,
 237    .max_wm_threads = 48,
 238    .max_cs_threads = 36,
 239    .urb = {
 240       .size = 128,
 241       .min_entries = {
 242          [MESA_SHADER_VERTEX]    = 32,
 243          [MESA_SHADER_TESS_EVAL] = 10,
 244       },
 245       .max_entries = {
 246          [MESA_SHADER_VERTEX]    = 512,
 247          [MESA_SHADER_TESS_CTRL] = 32,
 248          [MESA_SHADER_TESS_EVAL] = 288,
 249          [MESA_SHADER_GEOMETRY]  = 192,
 250       },
 251    },
 252    .simulator_id = 7,
 253 };
 254
 255 static const struct gen_device_info gen_device_info_ivb_gt2 = {
 256    GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
 257    .num_slices = 1,
 258    .num_subslices = { 1, },
 259    .num_eu_per_subslice = 12,
 260    .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of
 261                             * @max_wm_threads ... */
 262    .l3_banks = 4,
 263    .max_vs_threads = 128,
 264    .max_tcs_threads = 128,
 265    .max_tes_threads = 128,
 266    .max_gs_threads = 128,
 267    .max_wm_threads = 172,
 268    .max_cs_threads = 64,
 269    .urb = {
 270       .size = 256,
 271       .min_entries = {
 272          [MESA_SHADER_VERTEX]    = 32,
 273          [MESA_SHADER_TESS_EVAL] = 10,
 274       },
 275       .max_entries = {
 276          [MESA_SHADER_VERTEX]    = 704,
 277          [MESA_SHADER_TESS_CTRL] = 64,
 278          [MESA_SHADER_TESS_EVAL] = 448,
 279          [MESA_SHADER_GEOMETRY]  = 320,
 280       },
 281    },
 282    .simulator_id = 7,
 283 };
 284
 285 static const struct gen_device_info gen_device_info_byt = {
 286    GEN7_FEATURES, .is_baytrail = true, .gt = 1,
 287    .num_slices = 1,
 288    .num_subslices = { 1, },
 289    .num_eu_per_subslice = 4,
 290    .num_thread_per_eu = 8,
 291    .l3_banks = 1,
 292    .has_llc = false,
 293    .max_vs_threads = 36,
 294    .max_tcs_threads = 36,
 295    .max_tes_threads = 36,
 296    .max_gs_threads = 36,
 297    .max_wm_threads = 48,
 298    .max_cs_threads = 32,
 299    .urb = {
 300       .size = 128,
 301       .min_entries = {
 302          [MESA_SHADER_VERTEX]    = 32,
 303          [MESA_SHADER_TESS_EVAL] = 10,
 304       },
 305       .max_entries = {
 306          [MESA_SHADER_VERTEX]    = 512,
 307          [MESA_SHADER_TESS_CTRL] = 32,
 308          [MESA_SHADER_TESS_EVAL] = 288,
 309          [MESA_SHADER_GEOMETRY]  = 192,
 310       },
 311    },
 312    .simulator_id = 10,
 313 };
 314
 315 #define HSW_FEATURES             \
 316    GEN7_FEATURES,                \
 317    .is_haswell = true,           \
 318    .supports_simd16_3src = true, \
 319    .has_resource_streamer = true
 320
 321 static const struct gen_device_info gen_device_info_hsw_gt1 = {
 322    HSW_FEATURES, .gt = 1,
 323    .num_slices = 1,
 324    .num_subslices = { 1, },
 325    .num_eu_per_subslice = 10,
 326    .num_thread_per_eu = 7,
 327    .l3_banks = 2,
 328    .max_vs_threads = 70,
 329    .max_tcs_threads = 70,
 330    .max_tes_threads = 70,
 331    .max_gs_threads = 70,
 332    .max_wm_threads = 102,
 333    .max_cs_threads = 70,
 334    .urb = {
 335       .size = 128,
 336       .min_entries = {
 337          [MESA_SHADER_VERTEX]    = 32,
 338          [MESA_SHADER_TESS_EVAL] = 10,
 339       },
 340       .max_entries = {
 341          [MESA_SHADER_VERTEX]    = 640,
 342          [MESA_SHADER_TESS_CTRL] = 64,
 343          [MESA_SHADER_TESS_EVAL] = 384,
 344          [MESA_SHADER_GEOMETRY]  = 256,
 345       },
 346    },
 347    .simulator_id = 9,
 348 };
 349
 350 static const struct gen_device_info gen_device_info_hsw_gt2 = {
 351    HSW_FEATURES, .gt = 2,
 352    .num_slices = 1,
 353    .num_subslices = { 2, },
 354    .num_eu_per_subslice = 10,
 355    .num_thread_per_eu = 7,
 356    .l3_banks = 4,
 357    .max_vs_threads = 280,
 358    .max_tcs_threads = 256,
 359    .max_tes_threads = 280,
 360    .max_gs_threads = 256,
 361    .max_wm_threads = 204,
 362    .max_cs_threads = 70,
 363    .urb = {
 364       .size = 256,
 365       .min_entries = {
 366          [MESA_SHADER_VERTEX]    = 64,
 367          [MESA_SHADER_TESS_EVAL] = 10,
 368       },
 369       .max_entries = {
 370          [MESA_SHADER_VERTEX]    = 1664,
 371          [MESA_SHADER_TESS_CTRL] = 128,
 372          [MESA_SHADER_TESS_EVAL] = 960,
 373          [MESA_SHADER_GEOMETRY]  = 640,
 374       },
 375    },
 376    .simulator_id = 9,
 377 };
 378
 379 static const struct gen_device_info gen_device_info_hsw_gt3 = {
 380    HSW_FEATURES, .gt = 3,
 381    .num_slices = 2,
 382    .num_subslices = { 2, },
 383    .num_eu_per_subslice = 10,
 384    .num_thread_per_eu = 7,
 385    .l3_banks = 8,
 386    .max_vs_threads = 280,
 387    .max_tcs_threads = 256,
 388    .max_tes_threads = 280,
 389    .max_gs_threads = 256,
 390    .max_wm_threads = 408,
 391    .max_cs_threads = 70,
 392    .urb = {
 393       .size = 512,
 394       .min_entries = {
 395          [MESA_SHADER_VERTEX]    = 64,
 396          [MESA_SHADER_TESS_EVAL] = 10,
 397       },
 398       .max_entries = {
 399          [MESA_SHADER_VERTEX]    = 1664,
 400          [MESA_SHADER_TESS_CTRL] = 128,
 401          [MESA_SHADER_TESS_EVAL] = 960,
 402          [MESA_SHADER_GEOMETRY]  = 640,
 403       },
 404    },
 405    .simulator_id = 9,
 406 };
 407
 408 /* It's unclear how well supported sampling from the hiz buffer is on GEN8,
 409  * so keep things conservative for now and set has_sample_with_hiz = false.
 410  */
 411 #define GEN8_FEATURES                               \
 412    .gen = 8,                                        \
 413    .has_hiz_and_separate_stencil = true,            \
 414    .has_resource_streamer = true,                   \
 415    .must_use_separate_stencil = true,               \
 416    .has_llc = true,                                 \
 417    .has_sample_with_hiz = false,                    \
 418    .has_pln = true,                                 \
 419    .has_integer_dword_mul = true,                   \
 420    .has_64bit_float = true,                         \
 421    .has_64bit_int = true,                           \
 422    .supports_simd16_3src = true,                    \
 423    .has_surface_tile_offset = true,                 \
 424    .num_thread_per_eu = 7,                          \
 425    .max_vs_threads = 504,                           \
 426    .max_tcs_threads = 504,                          \
 427    .max_tes_threads = 504,                          \
 428    .max_gs_threads = 504,                           \
 429    .max_wm_threads = 384,                           \
 430    .timestamp_frequency = 12500000
 431
 432 static const struct gen_device_info gen_device_info_bdw_gt1 = {
 433    GEN8_FEATURES, .gt = 1,
 434    .is_broadwell = true,
 435    .num_slices = 1,
 436    .num_subslices = { 2, },
 437    .num_eu_per_subslice = 8,
 438    .l3_banks = 2,
 439    .max_cs_threads = 42,
 440    .urb = {
 441       .size = 192,
 442       .min_entries = {
 443          [MESA_SHADER_VERTEX]    = 64,
 444          [MESA_SHADER_TESS_EVAL] = 34,
 445       },
 446       .max_entries = {
 447          [MESA_SHADER_VERTEX]    = 2560,
 448          [MESA_SHADER_TESS_CTRL] = 504,
 449          [MESA_SHADER_TESS_EVAL] = 1536,
 450          /* Reduced from 960, seems to be similar to the bug on Gen9 GT1. */
 451          [MESA_SHADER_GEOMETRY]  = 690,
 452       },
 453    },
 454    .simulator_id = 11,
 455 };
 456
 457 static const struct gen_device_info gen_device_info_bdw_gt2 = {
 458    GEN8_FEATURES, .gt = 2,
 459    .is_broadwell = true,
 460    .num_slices = 1,
 461    .num_subslices = { 3, },
 462    .num_eu_per_subslice = 8,
 463    .l3_banks = 4,
 464    .max_cs_threads = 56,
 465    .urb = {
 466       .size = 384,
 467       .min_entries = {
 468          [MESA_SHADER_VERTEX]    = 64,
 469          [MESA_SHADER_TESS_EVAL] = 34,
 470       },
 471       .max_entries = {
 472          [MESA_SHADER_VERTEX]    = 2560,
 473          [MESA_SHADER_TESS_CTRL] = 504,
 474          [MESA_SHADER_TESS_EVAL] = 1536,
 475          [MESA_SHADER_GEOMETRY]  = 960,
 476       },
 477    },
 478    .simulator_id = 11,
 479 };
 480
 481 static const struct gen_device_info gen_device_info_bdw_gt3 = {
 482    GEN8_FEATURES, .gt = 3,
 483    .is_broadwell = true,
 484    .num_slices = 2,
 485    .num_subslices = { 3, 3, },
 486    .num_eu_per_subslice = 8,
 487    .l3_banks = 8,
 488    .max_cs_threads = 56,
 489    .urb = {
 490       .size = 384,
 491       .min_entries = {
 492          [MESA_SHADER_VERTEX]    = 64,
 493          [MESA_SHADER_TESS_EVAL] = 34,
 494       },
 495       .max_entries = {
 496          [MESA_SHADER_VERTEX]    = 2560,
 497          [MESA_SHADER_TESS_CTRL] = 504,
 498          [MESA_SHADER_TESS_EVAL] = 1536,
 499          [MESA_SHADER_GEOMETRY]  = 960,
 500       },
 501    },
 502    .simulator_id = 11,
 503 };
 504
 505 static const struct gen_device_info gen_device_info_chv = {
 506    GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
 507    .has_llc = false,
 508    .has_integer_dword_mul = false,
 509    .num_slices = 1,
 510    .num_subslices = { 2, },
 511    .num_eu_per_subslice = 8,
 512    .l3_banks = 2,
 513    .max_vs_threads = 80,
 514    .max_tcs_threads = 80,
 515    .max_tes_threads = 80,
 516    .max_gs_threads = 80,
 517    .max_wm_threads = 128,
 518    .max_cs_threads = 6 * 7,
 519    .urb = {
 520       .size = 192,
 521       .min_entries = {
 522          [MESA_SHADER_VERTEX]    = 34,
 523          [MESA_SHADER_TESS_EVAL] = 34,
 524       },
 525       .max_entries = {
 526          [MESA_SHADER_VERTEX]    = 640,
 527          [MESA_SHADER_TESS_CTRL] = 80,
 528          [MESA_SHADER_TESS_EVAL] = 384,
 529          [MESA_SHADER_GEOMETRY]  = 256,
 530       },
 531    },
 532    .simulator_id = 13,
 533 };
 534
 535 #define GEN9_HW_INFO                                \
 536    .gen = 9,                                        \
 537    .max_vs_threads = 336,                           \
 538    .max_gs_threads = 336,                           \
 539    .max_tcs_threads = 336,                          \
 540    .max_tes_threads = 336,                          \
 541    .max_cs_threads = 56,                            \
 542    .timestamp_frequency = 12000000,                 \
 543    .urb = {                                         \
 544       .size = 384,                                  \
 545       .min_entries = {                              \
 546          [MESA_SHADER_VERTEX]    = 64,              \
 547          [MESA_SHADER_TESS_EVAL] = 34,              \
 548       },                                            \
 549       .max_entries = {                              \
 550          [MESA_SHADER_VERTEX]    = 1856,            \
 551          [MESA_SHADER_TESS_CTRL] = 672,             \
 552          [MESA_SHADER_TESS_EVAL] = 1120,            \
 553          [MESA_SHADER_GEOMETRY]  = 640,             \
 554       },                                            \
 555    }
 556
 557 #define GEN9_LP_FEATURES                           \
 558    GEN8_FEATURES,                                  \
 559    GEN9_HW_INFO,                                   \
 560    .has_integer_dword_mul = false,                 \
 561    .gt = 1,                                        \
 562    .has_llc = false,                               \
 563    .has_sample_with_hiz = true,                    \
 564    .num_slices = 1,                                \
 565    .num_thread_per_eu = 6,                         \
 566    .max_vs_threads = 112,                          \
 567    .max_tcs_threads = 112,                         \
 568    .max_tes_threads = 112,                         \
 569    .max_gs_threads = 112,                          \
 570    .max_cs_threads = 6 * 6,                        \
 571    .timestamp_frequency = 19200000,                \
 572    .urb = {                                        \
 573       .size = 192,                                 \
 574       .min_entries = {                             \
 575          [MESA_SHADER_VERTEX]    = 34,             \
 576          [MESA_SHADER_TESS_EVAL] = 34,             \
 577       },                                           \
 578       .max_entries = {                             \
 579          [MESA_SHADER_VERTEX]    = 704,            \
 580          [MESA_SHADER_TESS_CTRL] = 256,            \
 581          [MESA_SHADER_TESS_EVAL] = 416,            \
 582          [MESA_SHADER_GEOMETRY]  = 256,            \
 583       },                                           \
 584    }
 585
 586 #define GEN9_LP_FEATURES_3X6                       \
 587    GEN9_LP_FEATURES,                               \
 588    .num_subslices = { 3, },                        \
 589    .num_eu_per_subslice = 6
 590
 591 #define GEN9_LP_FEATURES_2X6                       \
 592    GEN9_LP_FEATURES,                               \
 593    .num_subslices = { 2, },                        \
 594    .num_eu_per_subslice = 6,                       \
 595    .max_vs_threads = 56,                           \
 596    .max_tcs_threads = 56,                          \
 597    .max_tes_threads = 56,                          \
 598    .max_gs_threads = 56,                           \
 599    .max_cs_threads = 6 * 6,                        \
 600    .urb = {                                        \
 601       .size = 128,                                 \
 602       .min_entries = {                             \
 603          [MESA_SHADER_VERTEX]    = 34,             \
 604          [MESA_SHADER_TESS_EVAL] = 34,             \
 605       },                                           \
 606       .max_entries = {                             \
 607          [MESA_SHADER_VERTEX]    = 352,            \
 608          [MESA_SHADER_TESS_CTRL] = 128,            \
 609          [MESA_SHADER_TESS_EVAL] = 208,            \
 610          [MESA_SHADER_GEOMETRY]  = 128,            \
 611       },                                           \
 612    }
 613
 614 #define GEN9_FEATURES                               \
 615    GEN8_FEATURES,                                   \
 616    GEN9_HW_INFO,                                    \
 617    .has_sample_with_hiz = true
 618
 619 static const struct gen_device_info gen_device_info_skl_gt1 = {
 620    GEN9_FEATURES, .gt = 1,
 621    .is_skylake = true,
 622    .num_slices = 1,
 623    .num_subslices = { 2, },
 624    .num_eu_per_subslice = 6,
 625    .l3_banks = 2,
 626    .urb.size = 192,
 627    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 628     * leading to some vertices to go missing if we use too much URB.
 629     */
 630    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 631    .simulator_id = 12,
 632 };
 633
 634 static const struct gen_device_info gen_device_info_skl_gt2 = {
 635    GEN9_FEATURES, .gt = 2,
 636    .is_skylake = true,
 637    .num_slices = 1,
 638    .num_subslices = { 3, },
 639    .num_eu_per_subslice = 8,
 640    .l3_banks = 4,
 641    .simulator_id = 12,
 642 };
 643
 644 static const struct gen_device_info gen_device_info_skl_gt3 = {
 645    GEN9_FEATURES, .gt = 3,
 646    .is_skylake = true,
 647    .num_slices = 2,
 648    .num_subslices = { 3, 3, },
 649    .num_eu_per_subslice = 8,
 650    .l3_banks = 8,
 651    .simulator_id = 12,
 652 };
 653
 654 static const struct gen_device_info gen_device_info_skl_gt4 = {
 655    GEN9_FEATURES, .gt = 4,
 656    .is_skylake = true,
 657    .num_slices = 3,
 658    .num_subslices = { 3, 3, 3, },
 659    .num_eu_per_subslice = 8,
 660    .l3_banks = 12,
 661    /* From the "L3 Allocation and Programming" documentation:
 662     *
 663     * "URB is limited to 1008KB due to programming restrictions.  This is not a
 664     * restriction of the L3 implementation, but of the FF and other clients.
 665     * Therefore, in a GT4 implementation it is possible for the programmed
 666     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
 667     * only 1008KB of this will be used."
 668     */
 669    .urb.size = 1008 / 3,
 670    .simulator_id = 12,
 671 };
 672
 673 static const struct gen_device_info gen_device_info_bxt = {
 674    GEN9_LP_FEATURES_3X6,
 675    .is_broxton = true,
 676    .l3_banks = 2,
 677    .simulator_id = 14,
 678 };
 679
 680 static const struct gen_device_info gen_device_info_bxt_2x6 = {
 681    GEN9_LP_FEATURES_2X6,
 682    .is_broxton = true,
 683    .l3_banks = 1,
 684    .simulator_id = 14,
 685 };
 686 /*
 687  * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
 688  * There's no KBL entry. Using the default SKL (GEN9) GS entries value.
 689  */
 690
 691 static const struct gen_device_info gen_device_info_kbl_gt1 = {
 692    GEN9_FEATURES,
 693    .is_kabylake = true,
 694    .gt = 1,
 695
 696    .max_cs_threads = 7 * 6,
 697    .urb.size = 192,
 698    .num_slices = 1,
 699    .num_subslices = { 2, },
 700    .num_eu_per_subslice = 6,
 701    .l3_banks = 2,
 702    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 703     * leading to some vertices to go missing if we use too much URB.
 704     */
 705    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 706    .simulator_id = 16,
 707 };
 708
 709 static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
 710    GEN9_FEATURES,
 711    .is_kabylake = true,
 712    .gt = 1,
 713
 714    .max_cs_threads = 7 * 6,
 715    .num_slices = 1,
 716    .num_subslices = { 3, },
 717    .num_eu_per_subslice = 6,
 718    .l3_banks = 4,
 719    .simulator_id = 16,
 720 };
 721
 722 static const struct gen_device_info gen_device_info_kbl_gt2 = {
 723    GEN9_FEATURES,
 724    .is_kabylake = true,
 725    .gt = 2,
 726
 727    .num_slices = 1,
 728    .num_subslices = { 3, },
 729    .num_eu_per_subslice = 8,
 730    .l3_banks = 4,
 731    .simulator_id = 16,
 732 };
 733
 734 static const struct gen_device_info gen_device_info_kbl_gt3 = {
 735    GEN9_FEATURES,
 736    .is_kabylake = true,
 737    .gt = 3,
 738
 739    .num_slices = 2,
 740    .num_subslices = { 3, 3, },
 741    .num_eu_per_subslice = 8,
 742    .l3_banks = 8,
 743    .simulator_id = 16,
 744 };
 745
 746 static const struct gen_device_info gen_device_info_kbl_gt4 = {
 747    GEN9_FEATURES,
 748    .is_kabylake = true,
 749    .gt = 4,
 750
 751    /*
 752     * From the "L3 Allocation and Programming" documentation:
 753     *
 754     * "URB is limited to 1008KB due to programming restrictions.  This
 755     *  is not a restriction of the L3 implementation, but of the FF and
 756     *  other clients.  Therefore, in a GT4 implementation it is
 757     *  possible for the programmed allocation of the L3 data array to
 758     *  provide 3*384KB=1152KB for URB, but only 1008KB of this
 759     *  will be used."
 760     */
 761    .urb.size = 1008 / 3,
 762    .num_slices = 3,
 763    .num_subslices = { 3, 3, 3, },
 764    .num_eu_per_subslice = 8,
 765    .l3_banks = 12,
 766    .simulator_id = 16,
 767 };
 768
 769 static const struct gen_device_info gen_device_info_glk = {
 770    GEN9_LP_FEATURES_3X6,
 771    .is_geminilake = true,
 772    .l3_banks = 2,
 773    .simulator_id = 17,
 774 };
 775
 776 static const struct gen_device_info gen_device_info_glk_2x6 = {
 777    GEN9_LP_FEATURES_2X6,
 778    .is_geminilake = true,
 779    .l3_banks = 2,
 780    .simulator_id = 17,
 781 };
 782
 783 static const struct gen_device_info gen_device_info_cfl_gt1 = {
 784    GEN9_FEATURES,
 785    .is_coffeelake = true,
 786    .gt = 1,
 787
 788    .num_slices = 1,
 789    .num_subslices = { 2, },
 790    .num_eu_per_subslice = 6,
 791    .l3_banks = 2,
 792    .urb.size = 192,
 793    /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions
 794     * leading to some vertices to go missing if we use too much URB.
 795     */
 796    .urb.max_entries[MESA_SHADER_VERTEX] = 928,
 797    .simulator_id = 24,
 798 };
 799 static const struct gen_device_info gen_device_info_cfl_gt2 = {
 800    GEN9_FEATURES,
 801    .is_coffeelake = true,
 802    .gt = 2,
 803
 804    .num_slices = 1,
 805    .num_subslices = { 3, },
 806    .num_eu_per_subslice = 8,
 807    .l3_banks = 4,
 808    .simulator_id = 24,
 809 };
 810
 811 static const struct gen_device_info gen_device_info_cfl_gt3 = {
 812    GEN9_FEATURES,
 813    .is_coffeelake = true,
 814    .gt = 3,
 815
 816    .num_slices = 2,
 817    .num_subslices = { 3, 3, },
 818    .num_eu_per_subslice = 8,
 819    .l3_banks = 8,
 820    .simulator_id = 24,
 821 };
 822
 823 #define GEN10_HW_INFO                               \
 824    .gen = 10,                                       \
 825    .num_thread_per_eu = 7,                          \
 826    .max_vs_threads = 728,                           \
 827    .max_gs_threads = 432,                           \
 828    .max_tcs_threads = 432,                          \
 829    .max_tes_threads = 624,                          \
 830    .max_cs_threads = 56,                            \
 831    .timestamp_frequency = 19200000,                 \
 832    .urb = {                                         \
 833       .size = 256,                                  \
 834       .min_entries = {                              \
 835          [MESA_SHADER_VERTEX]    = 64,              \
 836          [MESA_SHADER_TESS_EVAL] = 34,              \
 837       },                                            \
 838       .max_entries = {                              \
 839       [MESA_SHADER_VERTEX]       = 3936,            \
 840       [MESA_SHADER_TESS_CTRL]    = 896,             \
 841       [MESA_SHADER_TESS_EVAL]    = 2064,            \
 842       [MESA_SHADER_GEOMETRY]     = 832,             \
 843       },                                            \
 844    }
 845
 846 #define subslices(args...) { args, }
 847
 848 #define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \
 849    GEN8_FEATURES,                                   \
 850    GEN10_HW_INFO,                                   \
 851    .has_sample_with_hiz = true,                     \
 852    .gt = _gt,                                       \
 853    .num_slices = _slices,                           \
 854    .num_subslices = _subslices,                     \
 855    .num_eu_per_subslice = 8,                        \
 856    .l3_banks = _l3
 857
 858 static const struct gen_device_info gen_device_info_cnl_gt0_5 = {
 859    /* GT0.5 */
 860    GEN10_FEATURES(1, 1, subslices(2), 2),
 861    .is_cannonlake = true,
 862    .simulator_id = 15,
 863 };
 864
 865 static const struct gen_device_info gen_device_info_cnl_gt1 = {
 866    /* GT1 */
 867    GEN10_FEATURES(1, 1, subslices(3), 3),
 868    .is_cannonlake = true,
 869    .simulator_id = 15,
 870 };
 871
 872 static const struct gen_device_info gen_device_info_cnl_gt1_5 = {
 873    /* GT 1.5 */
 874    GEN10_FEATURES(1, 2, subslices(2, 2), 6),
 875    .is_cannonlake = true,
 876    .simulator_id = 15,
 877 };
 878
 879 static const struct gen_device_info gen_device_info_cnl_gt2 = {
 880    /* GT2 */
 881    GEN10_FEATURES(2, 2, subslices(3, 2), 6),
 882    .is_cannonlake = true,
 883    .simulator_id = 15,
 884 };
 885
 886 #define GEN11_HW_INFO                               \
 887    .gen = 11,                                       \
 888    .has_pln = false,                                \
 889    .max_vs_threads = 364,                           \
 890    .max_gs_threads = 224,                           \
 891    .max_tcs_threads = 224,                          \
 892    .max_tes_threads = 364,                          \
 893    .max_cs_threads = 56
 894
 895 #define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \
 896    GEN8_FEATURES,                                     \
 897    GEN11_HW_INFO,                                     \
 898    .has_64bit_float = false,                          \
 899    .has_64bit_int = false,                            \
 900    .has_integer_dword_mul = false,                    \
 901    .has_sample_with_hiz = false,                      \
 902    .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
 903    .num_subslices = _subslices,                       \
 904    .num_eu_per_subslice = 8
 905
 906 #define GEN11_URB_MIN_MAX_ENTRIES                     \
 907    .min_entries = {                                   \
 908       [MESA_SHADER_VERTEX]    = 64,                   \
 909       [MESA_SHADER_TESS_EVAL] = 34,                   \
 910    },                                                 \
 911    .max_entries = {                                   \
 912       [MESA_SHADER_VERTEX]    = 2384,                 \
 913       [MESA_SHADER_TESS_CTRL] = 1032,                 \
 914       [MESA_SHADER_TESS_EVAL] = 2384,                 \
 915       [MESA_SHADER_GEOMETRY]  = 1032,                 \
 916    }
 917
 918 static const struct gen_device_info gen_device_info_icl_gt2 = {
 919    GEN11_FEATURES(2, 1, subslices(8), 8),
 920    .urb = {
 921       .size = 1024,
 922       GEN11_URB_MIN_MAX_ENTRIES,
 923    },
 924    .simulator_id = 19,
 925 };
 926
 927 static const struct gen_device_info gen_device_info_icl_gt1_5 = {
 928    GEN11_FEATURES(1, 1, subslices(6), 6),
 929    .urb = {
 930       .size = 768,
 931       GEN11_URB_MIN_MAX_ENTRIES,
 932    },
 933    .simulator_id = 19,
 934 };
 935
 936 static const struct gen_device_info gen_device_info_icl_gt1 = {
 937    GEN11_FEATURES(1, 1, subslices(4), 6),
 938    .urb = {
 939       .size = 768,
 940       GEN11_URB_MIN_MAX_ENTRIES,
 941    },
 942    .simulator_id = 19,
 943 };
 944
 945 static const struct gen_device_info gen_device_info_icl_gt0_5 = {
 946    GEN11_FEATURES(1, 1, subslices(1), 6),
 947    .urb = {
 948       .size = 768,
 949       GEN11_URB_MIN_MAX_ENTRIES,
 950    },
 951    .simulator_id = 19,
 952 };
 953
 954 static const struct gen_device_info gen_device_info_ehl_7 = {
 955    GEN11_FEATURES(1, 1, subslices(4), 4),
 956    .is_elkhartlake = true,
 957    .urb = {
 958       .size = 512,
 959       .min_entries = {
 960          [MESA_SHADER_VERTEX]    = 64,
 961          [MESA_SHADER_TESS_EVAL] = 34,
 962       },
 963       .max_entries = {
 964          [MESA_SHADER_VERTEX]    = 2384,
 965          [MESA_SHADER_TESS_CTRL] = 1032,
 966          [MESA_SHADER_TESS_EVAL] = 2384,
 967          [MESA_SHADER_GEOMETRY]  = 1032,
 968       },
 969    },
 970    .disable_ccs_repack = true,
 971    .simulator_id = 28,
 972 };
 973
 974 static const struct gen_device_info gen_device_info_ehl_6 = {
 975    GEN11_FEATURES(1, 1, subslices(4), 4),
 976    .is_elkhartlake = true,
 977    .urb = {
 978       .size = 512,
 979       .min_entries = {
 980          [MESA_SHADER_VERTEX]    = 64,
 981          [MESA_SHADER_TESS_EVAL] = 34,
 982       },
 983       .max_entries = {
 984          [MESA_SHADER_VERTEX]    = 2384,
 985          [MESA_SHADER_TESS_CTRL] = 1032,
 986          [MESA_SHADER_TESS_EVAL] = 2384,
 987          [MESA_SHADER_GEOMETRY]  = 1032,
 988       },
 989    },
 990    .disable_ccs_repack = true,
 991    .num_eu_per_subslice = 6,
 992    .simulator_id = 28,
 993 };
 994
 995 static const struct gen_device_info gen_device_info_ehl_5 = {
 996    GEN11_FEATURES(1, 1, subslices(4), 4),
 997    .is_elkhartlake = true,
 998    .urb = {
 999       .size = 512,
1000       .min_entries = {
1001          [MESA_SHADER_VERTEX]    = 64,
1002          [MESA_SHADER_TESS_EVAL] = 34,
1003       },
1004       .max_entries = {
1005          [MESA_SHADER_VERTEX]    = 2384,
1006          [MESA_SHADER_TESS_CTRL] = 1032,
1007          [MESA_SHADER_TESS_EVAL] = 2384,
1008          [MESA_SHADER_GEOMETRY]  = 1032,
1009       },
1010    },
1011    .disable_ccs_repack = true,
1012    .num_eu_per_subslice = 4,
1013    .simulator_id = 28,
1014 };
1015
1016 static const struct gen_device_info gen_device_info_ehl_4 = {
1017    GEN11_FEATURES(1, 1, subslices(2), 4),
1018    .is_elkhartlake = true,
1019    .urb = {
1020       .size = 512,
1021       .min_entries = {
1022          [MESA_SHADER_VERTEX]    = 64,
1023          [MESA_SHADER_TESS_EVAL] = 34,
1024       },
1025       .max_entries = {
1026          [MESA_SHADER_VERTEX]    = 2384,
1027          [MESA_SHADER_TESS_CTRL] = 1032,
1028          [MESA_SHADER_TESS_EVAL] = 2384,
1029          [MESA_SHADER_GEOMETRY]  = 1032,
1030       },
1031    },
1032    .disable_ccs_repack = true,
1033    .num_eu_per_subslice =4,
1034    .simulator_id = 28,
1035 };
1036
1037 #define GEN12_URB_MIN_MAX_ENTRIES                   \
1038    .min_entries = {                                 \
1039       [MESA_SHADER_VERTEX]    = 64,                 \
1040       [MESA_SHADER_TESS_EVAL] = 34,                 \
1041    },                                               \
1042    .max_entries = {                                 \
1043       [MESA_SHADER_VERTEX]    = 3576,               \
1044       [MESA_SHADER_TESS_CTRL] = 1548,               \
1045       [MESA_SHADER_TESS_EVAL] = 3576,               \
1046       [MESA_SHADER_GEOMETRY]  = 1548,               \
1047    }
1048
1049 #define GEN12_HW_INFO                               \
1050    .gen = 12,                                       \
1051    .has_pln = false,                                \
1052    .has_sample_with_hiz = false,                    \
1053    .has_aux_map = true,                             \
1054    .max_vs_threads = 546,                           \
1055    .max_gs_threads = 336,                           \
1056    .max_tcs_threads = 336,                          \
1057    .max_tes_threads = 546,                          \
1058    .max_cs_threads = 112, /* threads per DSS */     \
1059    .urb = {                                         \
1060       GEN12_URB_MIN_MAX_ENTRIES,                    \
1061    }
1062
1063 #define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3)      \
1064    GEN8_FEATURES,                                               \
1065    GEN12_HW_INFO,                                               \
1066    .has_64bit_float = false,                                    \
1067    .has_64bit_int = false,                                      \
1068    .has_integer_dword_mul = false,                              \
1069    .gt = _gt, .num_slices = _slices, .l3_banks = _l3,           \
1070    .simulator_id = 22,                                          \
1071    .urb.size = (_gt) == 1 ? 512 : 1024,                         \
1072    .num_subslices = _dual_subslices,                            \
1073    .num_eu_per_subslice = 16
1074
1075 #define dual_subslices(args...) { args, }
1076
1077 static const struct gen_device_info gen_device_info_tgl_gt1 = {
1078    GEN12_FEATURES(1, 1, dual_subslices(2), 8),
1079 };
1080
1081 static const struct gen_device_info gen_device_info_tgl_gt2 = {
1082    GEN12_FEATURES(2, 1, dual_subslices(6), 8),
1083 };
1084
1085 static void
1086 gen_device_info_set_eu_mask(struct gen_device_info *devinfo,
1087                             unsigned slice,
1088                             unsigned subslice,
1089                             unsigned eu_mask)
1090 {
1091    unsigned subslice_offset = slice * devinfo->eu_slice_stride +
1092       subslice * devinfo->eu_subslice_stride;
1093
1094    for (unsigned b_eu = 0; b_eu < devinfo->eu_subslice_stride; b_eu++) {
1095       devinfo->eu_masks[subslice_offset + b_eu] =
1096          (((1U << devinfo->num_eu_per_subslice) - 1) >> (b_eu * 8)) & 0xff;
1097    }
1098 }
1099
1100 /* Generate slice/subslice/eu masks from number of
1101  * slices/subslices/eu_per_subslices in the per generation/gt gen_device_info
1102  * structure.
1103  *
1104  * These can be overridden with values reported by the kernel either from
1105  * getparam SLICE_MASK/SUBSLICE_MASK values or from the kernel version 4.17+
1106  * through the i915 query uapi.
1107  */
1108 static void
1109 fill_masks(struct gen_device_info *devinfo)
1110 {
1111    devinfo->slice_masks = (1U << devinfo->num_slices) - 1;
1112
1113    /* Subslice masks */
1114    unsigned max_subslices = 0;
1115    for (int s = 0; s < devinfo->num_slices; s++)
1116       max_subslices = MAX2(devinfo->num_subslices[s], max_subslices);
1117    devinfo->subslice_slice_stride = DIV_ROUND_UP(max_subslices, 8);
1118
1119    for (int s = 0; s < devinfo->num_slices; s++) {
1120       devinfo->subslice_masks[s * devinfo->subslice_slice_stride] =
1121          (1U << devinfo->num_subslices[s]) - 1;
1122    }
1123
1124    /* EU masks */
1125    devinfo->eu_subslice_stride = DIV_ROUND_UP(devinfo->num_eu_per_subslice, 8);
1126    devinfo->eu_slice_stride = max_subslices * devinfo->eu_subslice_stride;
1127
1128    for (int s = 0; s < devinfo->num_slices; s++) {
1129       for (int ss = 0; ss < devinfo->num_subslices[s]; ss++) {
1130          gen_device_info_set_eu_mask(devinfo, s, ss,
1131                                      (1U << devinfo->num_eu_per_subslice) - 1);
1132       }
1133    }
1134 }
1135
1136 static void
1137 reset_masks(struct gen_device_info *devinfo)
1138 {
1139    devinfo->subslice_slice_stride = 0;
1140    devinfo->eu_subslice_stride = 0;
1141    devinfo->eu_slice_stride = 0;
1142
1143    devinfo->num_slices = 0;
1144    devinfo->num_eu_per_subslice = 0;
1145    memset(devinfo->num_subslices, 0, sizeof(devinfo->num_subslices));
1146
1147    memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks));
1148    memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks));
1149    memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks));
1150    memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices));
1151 }
1152
1153 static void
1154 update_from_topology(struct gen_device_info *devinfo,
1155                      const struct drm_i915_query_topology_info *topology)
1156 {
1157    reset_masks(devinfo);
1158
1159    devinfo->subslice_slice_stride = topology->subslice_stride;
1160
1161    devinfo->eu_subslice_stride = DIV_ROUND_UP(topology->max_eus_per_subslice, 8);
1162    devinfo->eu_slice_stride = topology->max_subslices * devinfo->eu_subslice_stride;
1163
1164    assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
1165    memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
1166    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
1167
1168    uint32_t subslice_mask_len =
1169       topology->max_slices * topology->subslice_stride;
1170    assert(sizeof(devinfo->subslice_masks) >= subslice_mask_len);
1171    memcpy(devinfo->subslice_masks, &topology->data[topology->subslice_offset],
1172           subslice_mask_len);
1173
1174    uint32_t n_subslices = 0;
1175    for (int s = 0; s < topology->max_slices; s++) {
1176       if ((devinfo->slice_masks & (1 << s)) == 0)
1177          continue;
1178
1179       for (int b = 0; b < devinfo->subslice_slice_stride; b++) {
1180          devinfo->num_subslices[s] +=
1181             __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]);
1182       }
1183       n_subslices += devinfo->num_subslices[s];
1184    }
1185    assert(n_subslices > 0);
1186
1187    if (devinfo->gen == 11) {
1188       /* On ICL we only have one slice */
1189       assert(devinfo->slice_masks == 1);
1190
1191       /* Count the number of subslices on each pixel pipe. Assume that
1192        * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1.
1193        */
1194       unsigned subslices = devinfo->subslice_masks[0];
1195       unsigned ss = 0;
1196       while (subslices > 0) {
1197          if (subslices & 1)
1198             devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1;
1199          subslices >>= 1;
1200          ss++;
1201       }
1202    }
1203
1204    uint32_t eu_mask_len =
1205       topology->eu_stride * topology->max_subslices * topology->max_slices;
1206    assert(sizeof(devinfo->eu_masks) >= eu_mask_len);
1207    memcpy(devinfo->eu_masks, &topology->data[topology->eu_offset], eu_mask_len);
1208
1209    uint32_t n_eus = 0;
1210    for (int b = 0; b < eu_mask_len; b++)
1211       n_eus += __builtin_popcount(devinfo->eu_masks[b]);
1212
1213    devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1214 }
1215
1216 static bool
1217 update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask,
1218                   uint32_t subslice_mask, uint32_t n_eus)
1219 {
1220    struct drm_i915_query_topology_info *topology;
1221
1222    assert((slice_mask & 0xff) == slice_mask);
1223
1224    size_t data_length = 100;
1225
1226    topology = calloc(1, sizeof(*topology) + data_length);
1227    if (!topology)
1228       return false;
1229
1230    topology->max_slices = util_last_bit(slice_mask);
1231    topology->max_subslices = util_last_bit(subslice_mask);
1232
1233    topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8);
1234    topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8);
1235
1236    uint32_t n_subslices = __builtin_popcount(slice_mask) *
1237       __builtin_popcount(subslice_mask);
1238    uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices);
1239    uint32_t eu_mask = (1U << num_eu_per_subslice) - 1;
1240
1241    topology->eu_offset = topology->subslice_offset +
1242       DIV_ROUND_UP(topology->max_subslices, 8);
1243    topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8);
1244
1245    /* Set slice mask in topology */
1246    for (int b = 0; b < topology->subslice_offset; b++)
1247       topology->data[b] = (slice_mask >> (b * 8)) & 0xff;
1248
1249    for (int s = 0; s < topology->max_slices; s++) {
1250
1251       /* Set subslice mask in topology */
1252       for (int b = 0; b < topology->subslice_stride; b++) {
1253          int subslice_offset = topology->subslice_offset +
1254             s * topology->subslice_stride + b;
1255
1256          topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff;
1257       }
1258
1259       /* Set eu mask in topology */
1260       for (int ss = 0; ss < topology->max_subslices; ss++) {
1261          for (int b = 0; b < topology->eu_stride; b++) {
1262             int eu_offset = topology->eu_offset +
1263                (s * topology->max_subslices + ss) * topology->eu_stride + b;
1264
1265             topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff;
1266          }
1267       }
1268    }
1269
1270    update_from_topology(devinfo, topology);
1271    free(topology);
1272
1273    return true;
1274 }
1275
1276 static bool
1277 getparam(int fd, uint32_t param, int *value)
1278 {
1279    int tmp;
1280
1281    struct drm_i915_getparam gp = {
1282       .param = param,
1283       .value = &tmp,
1284    };
1285
1286    int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
1287    if (ret != 0)
1288       return false;
1289
1290    *value = tmp;
1291    return true;
1292 }
1293
1294 bool
1295 gen_get_device_info_from_pci_id(int pci_id,
1296                                 struct gen_device_info *devinfo)
1297 {
1298    switch (pci_id) {
1299 #undef CHIPSET
1300 #define CHIPSET(id, family, fam_str, name) \
1301       case id: *devinfo = gen_device_info_##family; break;
1302 #include "pci_ids/i965_pci_ids.h"
1303 #include "pci_ids/iris_pci_ids.h"
1304    default:
1305       fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id);
1306       return false;
1307    }
1308
1309    fill_masks(devinfo);
1310
1311    /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
1312     *
1313     * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
1314     *  allocate scratch space enough so that each slice has 4 slices allowed."
1315     *
1316     * The equivalent internal documentation says that this programming note
1317     * applies to all Gen9+ platforms.
1318     *
1319     * The hardware typically calculates the scratch space pointer by taking
1320     * the base address, and adding per-thread-scratch-space * thread ID.
1321     * Extra padding can be necessary depending how the thread IDs are
1322     * calculated for a particular shader stage.
1323     */
1324
1325    switch(devinfo->gen) {
1326    case 9:
1327    case 10:
1328       devinfo->max_wm_threads = 64 /* threads-per-PSD */
1329                               * devinfo->num_slices
1330                               * 4; /* effective subslices per slice */
1331       break;
1332    case 11:
1333    case 12:
1334       devinfo->max_wm_threads = 128 /* threads-per-PSD */
1335                               * devinfo->num_slices
1336                               * 8; /* subslices per slice */
1337       break;
1338    default:
1339       assert(devinfo->gen < 9);
1340       break;
1341    }
1342
1343    assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices));
1344
1345    devinfo->chipset_id = pci_id;
1346    return true;
1347 }
1348
1349 const char *
1350 gen_get_device_name(int devid)
1351 {
1352    switch (devid) {
1353 #undef CHIPSET
1354 #define CHIPSET(id, family, fam_str, name) case id: return name " (" fam_str ")"; break;
1355 #include "pci_ids/i965_pci_ids.h"
1356 #include "pci_ids/iris_pci_ids.h"
1357    default:
1358       return NULL;
1359    }
1360 }
1361
1362 /**
1363  * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology
1364  * (kernel 4.13+)
1365  */
1366 static bool
1367 getparam_topology(struct gen_device_info *devinfo, int fd)
1368 {
1369    int slice_mask = 0;
1370    if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask))
1371       return false;
1372
1373    int n_eus;
1374    if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus))
1375       return false;
1376
1377    int subslice_mask = 0;
1378    if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask))
1379       return false;
1380
1381    return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus);
1382 }
1383
1384 /**
1385  * preferred API for updating the topology in devinfo (kernel 4.17+)
1386  */
1387 static bool
1388 query_topology(struct gen_device_info *devinfo, int fd)
1389 {
1390    struct drm_i915_query_item item = {
1391       .query_id = DRM_I915_QUERY_TOPOLOGY_INFO,
1392    };
1393    struct drm_i915_query query = {
1394       .num_items = 1,
1395       .items_ptr = (uintptr_t) &item,
1396    };
1397
1398    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query))
1399       return false;
1400
1401    if (item.length < 0)
1402       return false;
1403
1404    struct drm_i915_query_topology_info *topo_info =
1405       (struct drm_i915_query_topology_info *) calloc(1, item.length);
1406    item.data_ptr = (uintptr_t) topo_info;
1407
1408    if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) ||
1409        item.length <= 0)
1410       return false;
1411
1412    update_from_topology(devinfo, topo_info);
1413
1414    free(topo_info);
1415
1416    return true;
1417
1418 }
1419
1420 bool
1421 gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
1422 {
1423    int devid = get_pci_device_id_override();
1424    if (devid > 0) {
1425       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1426          return false;
1427       devinfo->no_hw = true;
1428    } else {
1429       /* query the device id */
1430       if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid))
1431          return false;
1432       if (!gen_get_device_info_from_pci_id(devid, devinfo))
1433          return false;
1434       devinfo->no_hw = false;
1435    }
1436
1437    /* remaining initializion queries the kernel for device info */
1438    if (devinfo->no_hw)
1439       return true;
1440
1441    int timestamp_frequency;
1442    if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY,
1443                 &timestamp_frequency))
1444       devinfo->timestamp_frequency = timestamp_frequency;
1445    else if (devinfo->gen >= 10)
1446       /* gen10 and later requires the timestamp_frequency to be updated */
1447       return false;
1448
1449    if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
1450       devinfo->revision = 0;
1451
1452    if (!query_topology(devinfo, fd)) {
1453       if (devinfo->gen >= 10) {
1454          /* topology uAPI required for CNL+ (kernel 4.17+) */
1455          return false;
1456       }
1457
1458       /* else use the kernel 4.13+ api for gen8+.  For older kernels, topology
1459        * will be wrong, affecting GPU metrics. In this case, fail silently.
1460        */
1461       getparam_topology(devinfo, fd);
1462    }
1463
1464    return true;
1465 }