X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fdev%2Fgen_device_info.c;h=61eedf68ff7c7e4554d544304414dccfb0bcc001;hb=11fdd5f52c3db070f33f7ef82d41acf14b1a2670;hp=e2c6cbc7101a78f3ff1769a9b92f7c7d9659d2b4;hpb=a0baedb6387aea78ffb0eb654cb837421e15d9fe;p=mesa.git diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c index e2c6cbc7101..61eedf68ff7 100644 --- a/src/intel/dev/gen_device_info.c +++ b/src/intel/dev/gen_device_info.c @@ -22,16 +22,18 @@ */ #include +#include #include #include #include #include #include "gen_device_info.h" #include "compiler/shader_enums.h" +#include "intel/common/gen_gem.h" #include "util/bitscan.h" #include "util/macros.h" -#include +#include "drm-uapi/i915_drm.h" /** * Get the PCI ID for the device name. @@ -60,8 +62,12 @@ gen_device_name_to_pci_device_id(const char *name) { "aml", 0x591C }, { "glk", 0x3185 }, { "cfl", 0x3E9B }, + { "whl", 0x3EA1 }, + { "cml", 0x9b41 }, { "cnl", 0x5a52 }, { "icl", 0x8a52 }, + { "ehl", 0x4500 }, + { "tgl", 0x9a49 }, }; for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { @@ -78,8 +84,8 @@ gen_device_name_to_pci_device_id(const char *name) * * Returns -1 if the override is not set. */ -int -gen_get_pci_device_id_override(void) +static int +get_pci_device_id_override(void) { if (geteuid() == getuid()) { const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); @@ -413,6 +419,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = { .has_64bit_types = true, \ .supports_simd16_3src = true, \ .has_surface_tile_offset = true, \ + .num_thread_per_eu = 7, \ .max_vs_threads = 504, \ .max_tcs_threads = 504, \ .max_tes_threads = 504, \ @@ -426,7 +433,6 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = { .num_slices = 1, .num_subslices = { 2, }, .num_eu_per_subslice = 8, - .num_thread_per_eu = 7, .l3_banks = 2, .max_cs_threads = 42, .urb = { @@ -451,7 +457,6 @@ static const struct gen_device_info gen_device_info_bdw_gt2 = { .num_slices = 1, .num_subslices = { 3, }, .num_eu_per_subslice = 8, - .num_thread_per_eu = 7, .l3_banks = 4, .max_cs_threads = 56, .urb = { @@ -476,7 +481,6 @@ static const struct gen_device_info gen_device_info_bdw_gt3 = { .num_slices = 2, .num_subslices = { 3, 3, }, .num_eu_per_subslice = 8, - .num_thread_per_eu = 7, .l3_banks = 8, .max_cs_threads = 56, .urb = { @@ -502,7 +506,6 @@ static const struct gen_device_info gen_device_info_chv = { .num_slices = 1, .num_subslices = { 2, }, .num_eu_per_subslice = 8, - .num_thread_per_eu = 7, .l3_banks = 2, .max_vs_threads = 80, .max_tcs_threads = 80, @@ -608,8 +611,7 @@ static const struct gen_device_info gen_device_info_chv = { #define GEN9_FEATURES \ GEN8_FEATURES, \ GEN9_HW_INFO, \ - .has_sample_with_hiz = true, \ - .num_thread_per_eu = 7 + .has_sample_with_hiz = true static const struct gen_device_info gen_device_info_skl_gt1 = { GEN9_FEATURES, .gt = 1, @@ -619,6 +621,10 @@ static const struct gen_device_info gen_device_info_skl_gt1 = { .num_eu_per_subslice = 6, .l3_banks = 2, .urb.size = 192, + /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions + * leading to some vertices to go missing if we use too much URB. + */ + .urb.max_entries[MESA_SHADER_VERTEX] = 928, .simulator_id = 12, }; @@ -690,6 +696,10 @@ static const struct gen_device_info gen_device_info_kbl_gt1 = { .num_subslices = { 2, }, .num_eu_per_subslice = 6, .l3_banks = 2, + /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions + * leading to some vertices to go missing if we use too much URB. + */ + .urb.max_entries[MESA_SHADER_VERTEX] = 928, .simulator_id = 16, }; @@ -776,6 +786,11 @@ static const struct gen_device_info gen_device_info_cfl_gt1 = { .num_subslices = { 2, }, .num_eu_per_subslice = 6, .l3_banks = 2, + .urb.size = 192, + /* GT1 seems to have a bug in the top of the pipe (VF/VS?) fixed functions + * leading to some vertices to go missing if we use too much URB. + */ + .urb.max_entries[MESA_SHADER_VERTEX] = 928, .simulator_id = 24, }; static const struct gen_device_info gen_device_info_cfl_gt2 = { @@ -932,6 +947,136 @@ static const struct gen_device_info gen_device_info_icl_1x8 = { .simulator_id = 19, }; +static const struct gen_device_info gen_device_info_ehl_4x8 = { + GEN11_FEATURES(1, 1, subslices(4), 4), + .is_elkhartlake = true, + .urb = { + .size = 512, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2384, + [MESA_SHADER_TESS_CTRL] = 1032, + [MESA_SHADER_TESS_EVAL] = 2384, + [MESA_SHADER_GEOMETRY] = 1032, + }, + }, + .disable_ccs_repack = true, + .simulator_id = 28, +}; + +static const struct gen_device_info gen_device_info_ehl_4x6 = { + GEN11_FEATURES(1, 1, subslices(4), 4), + .is_elkhartlake = true, + .urb = { + .size = 512, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2384, + [MESA_SHADER_TESS_CTRL] = 1032, + [MESA_SHADER_TESS_EVAL] = 2384, + [MESA_SHADER_GEOMETRY] = 1032, + }, + }, + .disable_ccs_repack = true, + .num_eu_per_subslice = 6, + .simulator_id = 28, +}; + +static const struct gen_device_info gen_device_info_ehl_4x4 = { + GEN11_FEATURES(1, 1, subslices(4), 4), + .is_elkhartlake = true, + .urb = { + .size = 512, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2384, + [MESA_SHADER_TESS_CTRL] = 1032, + [MESA_SHADER_TESS_EVAL] = 2384, + [MESA_SHADER_GEOMETRY] = 1032, + }, + }, + .disable_ccs_repack = true, + .num_eu_per_subslice = 4, + .simulator_id = 28, +}; + +static const struct gen_device_info gen_device_info_ehl_2x4 = { + GEN11_FEATURES(1, 1, subslices(2), 4), + .is_elkhartlake = true, + .urb = { + .size = 512, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2384, + [MESA_SHADER_TESS_CTRL] = 1032, + [MESA_SHADER_TESS_EVAL] = 2384, + [MESA_SHADER_GEOMETRY] = 1032, + }, + }, + .disable_ccs_repack = true, + .num_eu_per_subslice =4, + .simulator_id = 28, +}; + +#define GEN12_URB_MIN_MAX_ENTRIES \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 64, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 3576, \ + [MESA_SHADER_TESS_CTRL] = 1548, \ + [MESA_SHADER_TESS_EVAL] = 3576, \ + [MESA_SHADER_GEOMETRY] = 1548, \ + } + +#define GEN12_HW_INFO \ + .gen = 12, \ + .has_pln = false, \ + .has_sample_with_hiz = false, \ + .has_aux_map = true, \ + .max_vs_threads = 546, \ + .max_gs_threads = 336, \ + .max_tcs_threads = 336, \ + .max_tes_threads = 546, \ + .max_cs_threads = 112, /* threads per DSS */ \ + .urb = { \ + GEN12_URB_MIN_MAX_ENTRIES, \ + } + +#define GEN12_FEATURES(_gt, _slices, _dual_subslices, _l3) \ + GEN8_FEATURES, \ + GEN12_HW_INFO, \ + .has_64bit_types = false, \ + .has_integer_dword_mul = false, \ + .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ + .simulator_id = 22, \ + .urb.size = (_gt) == 1 ? 512 : 1024, \ + .num_subslices = _dual_subslices, \ + .num_eu_per_subslice = 16 + +#define dual_subslices(args...) { args, } + +static const struct gen_device_info gen_device_info_tgl_1x2x16 = { + GEN12_FEATURES(1, 1, dual_subslices(2), 8), +}; + +static const struct gen_device_info gen_device_info_tgl_1x6x16 = { + GEN12_FEATURES(2, 1, dual_subslices(6), 8), +}; + static void gen_device_info_set_eu_mask(struct gen_device_info *devinfo, unsigned slice, @@ -983,64 +1128,6 @@ fill_masks(struct gen_device_info *devinfo) } } -void -gen_device_info_update_from_masks(struct gen_device_info *devinfo, - uint32_t slice_mask, - uint32_t subslice_mask, - uint32_t n_eus) -{ - struct { - struct drm_i915_query_topology_info base; - uint8_t data[100]; - } topology; - - assert((slice_mask & 0xff) == slice_mask); - - memset(&topology, 0, sizeof(topology)); - - topology.base.max_slices = util_last_bit(slice_mask); - topology.base.max_subslices = util_last_bit(subslice_mask); - - topology.base.subslice_offset = DIV_ROUND_UP(topology.base.max_slices, 8); - topology.base.subslice_stride = DIV_ROUND_UP(topology.base.max_subslices, 8); - - uint32_t n_subslices = __builtin_popcount(slice_mask) * - __builtin_popcount(subslice_mask); - uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); - uint32_t eu_mask = (1U << num_eu_per_subslice) - 1; - - topology.base.eu_offset = topology.base.subslice_offset + - DIV_ROUND_UP(topology.base.max_subslices, 8); - topology.base.eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8); - - /* Set slice mask in topology */ - for (int b = 0; b < topology.base.subslice_offset; b++) - topology.base.data[b] = (slice_mask >> (b * 8)) & 0xff; - - for (int s = 0; s < topology.base.max_slices; s++) { - - /* Set subslice mask in topology */ - for (int b = 0; b < topology.base.subslice_stride; b++) { - int subslice_offset = topology.base.subslice_offset + - s * topology.base.subslice_stride + b; - - topology.base.data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; - } - - /* Set eu mask in topology */ - for (int ss = 0; ss < topology.base.max_subslices; ss++) { - for (int b = 0; b < topology.base.eu_stride; b++) { - int eu_offset = topology.base.eu_offset + - (s * topology.base.max_subslices + ss) * topology.base.eu_stride + b; - - topology.base.data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; - } - } - } - - gen_device_info_update_from_topology(devinfo, &topology.base); -} - static void reset_masks(struct gen_device_info *devinfo) { @@ -1055,11 +1142,12 @@ reset_masks(struct gen_device_info *devinfo) memset(&devinfo->slice_masks, 0, sizeof(devinfo->slice_masks)); memset(devinfo->subslice_masks, 0, sizeof(devinfo->subslice_masks)); memset(devinfo->eu_masks, 0, sizeof(devinfo->eu_masks)); + memset(devinfo->ppipe_subslices, 0, sizeof(devinfo->ppipe_subslices)); } -void -gen_device_info_update_from_topology(struct gen_device_info *devinfo, - const struct drm_i915_query_topology_info *topology) +static void +update_from_topology(struct gen_device_info *devinfo, + const struct drm_i915_query_topology_info *topology) { reset_masks(devinfo); @@ -1080,17 +1168,34 @@ gen_device_info_update_from_topology(struct gen_device_info *devinfo, uint32_t n_subslices = 0; for (int s = 0; s < topology->max_slices; s++) { - if ((devinfo->slice_masks & (1UL << s)) == 0) + if ((devinfo->slice_masks & (1 << s)) == 0) continue; for (int b = 0; b < devinfo->subslice_slice_stride; b++) { devinfo->num_subslices[s] += - __builtin_popcount(devinfo->subslice_masks[b]); + __builtin_popcount(devinfo->subslice_masks[s * devinfo->subslice_slice_stride + b]); } n_subslices += devinfo->num_subslices[s]; } assert(n_subslices > 0); + if (devinfo->gen == 11) { + /* On ICL we only have one slice */ + assert(devinfo->slice_masks == 1); + + /* Count the number of subslices on each pixel pipe. Assume that + * subslices 0-3 are on pixel pipe 0, and 4-7 are on pixel pipe 1. + */ + unsigned subslices = devinfo->subslice_masks[0]; + unsigned ss = 0; + while (subslices > 0) { + if (subslices & 1) + devinfo->ppipe_subslices[ss >= 4 ? 1 : 0] += 1; + subslices >>= 1; + ss++; + } + } + uint32_t eu_mask_len = topology->eu_stride * topology->max_subslices * topology->max_slices; assert(sizeof(devinfo->eu_masks) >= eu_mask_len); @@ -1103,16 +1208,96 @@ gen_device_info_update_from_topology(struct gen_device_info *devinfo, devinfo->num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); } +static bool +update_from_masks(struct gen_device_info *devinfo, uint32_t slice_mask, + uint32_t subslice_mask, uint32_t n_eus) +{ + struct drm_i915_query_topology_info *topology; + + assert((slice_mask & 0xff) == slice_mask); + + size_t data_length = 100; + + topology = calloc(1, sizeof(*topology) + data_length); + if (!topology) + return false; + + topology->max_slices = util_last_bit(slice_mask); + topology->max_subslices = util_last_bit(subslice_mask); + + topology->subslice_offset = DIV_ROUND_UP(topology->max_slices, 8); + topology->subslice_stride = DIV_ROUND_UP(topology->max_subslices, 8); + + uint32_t n_subslices = __builtin_popcount(slice_mask) * + __builtin_popcount(subslice_mask); + uint32_t num_eu_per_subslice = DIV_ROUND_UP(n_eus, n_subslices); + uint32_t eu_mask = (1U << num_eu_per_subslice) - 1; + + topology->eu_offset = topology->subslice_offset + + DIV_ROUND_UP(topology->max_subslices, 8); + topology->eu_stride = DIV_ROUND_UP(num_eu_per_subslice, 8); + + /* Set slice mask in topology */ + for (int b = 0; b < topology->subslice_offset; b++) + topology->data[b] = (slice_mask >> (b * 8)) & 0xff; + + for (int s = 0; s < topology->max_slices; s++) { + + /* Set subslice mask in topology */ + for (int b = 0; b < topology->subslice_stride; b++) { + int subslice_offset = topology->subslice_offset + + s * topology->subslice_stride + b; + + topology->data[subslice_offset] = (subslice_mask >> (b * 8)) & 0xff; + } + + /* Set eu mask in topology */ + for (int ss = 0; ss < topology->max_subslices; ss++) { + for (int b = 0; b < topology->eu_stride; b++) { + int eu_offset = topology->eu_offset + + (s * topology->max_subslices + ss) * topology->eu_stride + b; + + topology->data[eu_offset] = (eu_mask >> (b * 8)) & 0xff; + } + } + } + + update_from_topology(devinfo, topology); + free(topology); + + return true; +} + +static bool +getparam(int fd, uint32_t param, int *value) +{ + int tmp; + + struct drm_i915_getparam gp = { + .param = param, + .value = &tmp, + }; + + int ret = gen_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret != 0) + return false; + + *value = tmp; + return true; +} + bool -gen_get_device_info(int devid, struct gen_device_info *devinfo) +gen_get_device_info_from_pci_id(int pci_id, + struct gen_device_info *devinfo) { - switch (devid) { + switch (pci_id) { #undef CHIPSET #define CHIPSET(id, family, name) \ case id: *devinfo = gen_device_info_##family; break; #include "pci_ids/i965_pci_ids.h" +#include "pci_ids/iris_pci_ids.h" default: - fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); + fprintf(stderr, "Driver does not support the 0x%x PCI ID.\n", pci_id); return false; } @@ -1140,16 +1325,19 @@ gen_get_device_info(int devid, struct gen_device_info *devinfo) * 4; /* effective subslices per slice */ break; case 11: + case 12: devinfo->max_wm_threads = 128 /* threads-per-PSD */ * devinfo->num_slices * 8; /* subslices per slice */ break; default: + assert(devinfo->gen < 9); break; } assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); + devinfo->chipset_id = pci_id; return true; } @@ -1160,7 +1348,113 @@ gen_get_device_name(int devid) #undef CHIPSET #define CHIPSET(id, family, name) case id: return name; #include "pci_ids/i965_pci_ids.h" +#include "pci_ids/iris_pci_ids.h" default: return NULL; } } + +/** + * for gen8/gen9, SLICE_MASK/SUBSLICE_MASK can be used to compute the topology + * (kernel 4.13+) + */ +static bool +getparam_topology(struct gen_device_info *devinfo, int fd) +{ + int slice_mask = 0; + if (!getparam(fd, I915_PARAM_SLICE_MASK, &slice_mask)) + return false; + + int n_eus; + if (!getparam(fd, I915_PARAM_EU_TOTAL, &n_eus)) + return false; + + int subslice_mask = 0; + if (!getparam(fd, I915_PARAM_SUBSLICE_MASK, &subslice_mask)) + return false; + + return update_from_masks(devinfo, slice_mask, subslice_mask, n_eus); +} + +/** + * preferred API for updating the topology in devinfo (kernel 4.17+) + */ +static bool +query_topology(struct gen_device_info *devinfo, int fd) +{ + struct drm_i915_query_item item = { + .query_id = DRM_I915_QUERY_TOPOLOGY_INFO, + }; + struct drm_i915_query query = { + .num_items = 1, + .items_ptr = (uintptr_t) &item, + }; + + if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query)) + return false; + + if (item.length < 0) + return false; + + struct drm_i915_query_topology_info *topo_info = + (struct drm_i915_query_topology_info *) calloc(1, item.length); + item.data_ptr = (uintptr_t) topo_info; + + if (gen_ioctl(fd, DRM_IOCTL_I915_QUERY, &query) || + item.length <= 0) + return false; + + update_from_topology(devinfo, topo_info); + + free(topo_info); + + return true; + +} + +bool +gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo) +{ + int devid = get_pci_device_id_override(); + if (devid > 0) { + if (!gen_get_device_info_from_pci_id(devid, devinfo)) + return false; + devinfo->no_hw = true; + } else { + /* query the device id */ + if (!getparam(fd, I915_PARAM_CHIPSET_ID, &devid)) + return false; + if (!gen_get_device_info_from_pci_id(devid, devinfo)) + return false; + devinfo->no_hw = false; + } + + /* remaining initializion queries the kernel for device info */ + if (devinfo->no_hw) + return true; + + int timestamp_frequency; + if (getparam(fd, I915_PARAM_CS_TIMESTAMP_FREQUENCY, + ×tamp_frequency)) + devinfo->timestamp_frequency = timestamp_frequency; + else if (devinfo->gen >= 10) + /* gen10 and later requires the timestamp_frequency to be updated */ + return false; + + if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision)) + return false; + + if (!query_topology(devinfo, fd)) { + if (devinfo->gen >= 10) { + /* topology uAPI required for CNL+ (kernel 4.17+) */ + return false; + } + + /* else use the kernel 4.13+ api for gen8+. For older kernels, topology + * will be wrong, affecting GPU metrics. In this case, fail silently. + */ + getparam_topology(devinfo, fd); + } + + return true; +}