From: Jordan Justen Date: Mon, 26 Feb 2018 23:39:59 +0000 (-0800) Subject: intel: Split gen_device_info out into libintel_dev X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=272bef0601a1bdb5292771aefc8d62fcbdf4c47f;p=mesa.git intel: Split gen_device_info out into libintel_dev Split out the device info so isl doesn't depend on intel/common. Now it will depend on the new intel/dev device info lib. This will allow the decoder in intel/common to use isl, allowing us to apply Ken's patch that removes the genxml duplication of surface formats. Signed-off-by: Jordan Justen Reviewed-by: Samuel Iglesias Gonsálvez --- diff --git a/src/intel/Android.dev.mk b/src/intel/Android.dev.mk new file mode 100644 index 00000000000..956f32c119f --- /dev/null +++ b/src/intel/Android.dev.mk @@ -0,0 +1,35 @@ +# Copyright © 2016 Intel Corporation +# Copyright © 2016 Mauro Rossi +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# --------------------------------------- +# Build libmesa_intel_dev +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_MODULE := libmesa_intel_dev + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES + +LOCAL_SRC_FILES := $(DEV_FILES) + +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/intel/Makefile.am b/src/intel/Makefile.am index cde4a70fbc6..bfb7f5b9f08 100644 --- a/src/intel/Makefile.am +++ b/src/intel/Makefile.am @@ -75,6 +75,7 @@ EXTRA_DIST = \ include Makefile.blorp.am include Makefile.common.am include Makefile.compiler.am +include Makefile.dev.am include Makefile.genxml.am include Makefile.isl.am include Makefile.tools.am diff --git a/src/intel/Makefile.dev.am b/src/intel/Makefile.dev.am new file mode 100644 index 00000000000..2bf5b4e232a --- /dev/null +++ b/src/intel/Makefile.dev.am @@ -0,0 +1,31 @@ +# Copyright © 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +noinst_LTLIBRARIES += dev/libintel_dev.la + +dev_libintel_dev_la_CFLAGS = $(AM_CFLAGS) $(LIBDRM_CFLAGS) $(EXPAT_CFLAGS) +dev_libintel_dev_la_SOURCES = $(DEV_FILES) +dev_libintel_dev_la_LIBADD = $(EXPAT_LIBS) + +if HAVE_PLATFORM_ANDROID +dev_libintel_dev_la_CFLAGS += $(ANDROID_CFLAGS) +dev_libintel_dev_la_LIBADD += $(ANDROID_LIBS) +endif diff --git a/src/intel/Makefile.isl.am b/src/intel/Makefile.isl.am index 9525f9e9905..52a71cffd4b 100644 --- a/src/intel/Makefile.isl.am +++ b/src/intel/Makefile.isl.am @@ -80,7 +80,7 @@ check_PROGRAMS += isl/tests/isl_surf_get_image_offset_test TESTS += $(check_PROGRAMS) isl_tests_isl_surf_get_image_offset_test_LDADD = \ - common/libintel_common.la \ + dev/libintel_dev.la \ isl/libisl.la \ -lm diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 0a16e2398c9..72901db901c 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -14,8 +14,6 @@ COMMON_FILES = \ common/gen_decoder.c \ common/gen_decoder.h \ common/gen_defines.h \ - common/gen_device_info.c \ - common/gen_device_info.h \ common/gen_l3_config.c \ common/gen_l3_config.h \ common/gen_urb_config.c \ @@ -124,6 +122,10 @@ COMPILER_FILES = \ COMPILER_GENERATED_FILES = \ compiler/brw_nir_trig_workarounds.c +DEV_FILES = \ + dev/gen_device_info.c \ + dev/gen_device_info.h + GENXML_XML_FILES = \ genxml/gen4.xml \ genxml/gen45.xml \ diff --git a/src/intel/Makefile.tools.am b/src/intel/Makefile.tools.am index 944ee19805c..a8685c24e1c 100644 --- a/src/intel/Makefile.tools.am +++ b/src/intel/Makefile.tools.am @@ -37,6 +37,8 @@ tools_aubinator_CFLAGS = \ tools_aubinator_LDADD = \ common/libintel_common.la \ compiler/libintel_compiler.la \ + dev/libintel_dev.la \ + isl/libisl.la \ $(top_builddir)/src/util/libmesautil.la \ $(PER_GEN_LIBS) \ $(PTHREAD_LIBS) \ @@ -54,6 +56,8 @@ tools_aubinator_error_decode_SOURCES = \ tools_aubinator_error_decode_LDADD = \ common/libintel_common.la \ compiler/libintel_compiler.la \ + dev/libintel_dev.la \ + isl/libisl.la \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) \ $(ZLIB_LIBS) diff --git a/src/intel/Makefile.vulkan.am b/src/intel/Makefile.vulkan.am index 891ee698a02..6b71df6319a 100644 --- a/src/intel/Makefile.vulkan.am +++ b/src/intel/Makefile.vulkan.am @@ -152,6 +152,7 @@ VULKAN_LIB_DEPS = \ $(VULKAN_PER_GEN_LIBS) \ compiler/libintel_compiler.la \ common/libintel_common.la \ + dev/libintel_dev.la \ isl/libisl.la \ blorp/libblorp.la \ $(top_builddir)/src/vulkan/libvulkan_util.la \ diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 6dd92d236af..7182399c9c2 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -25,7 +25,7 @@ #define BLORP_GENX_EXEC_H #include "blorp_priv.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "common/gen_sample_positions.h" #include "genxml/gen_macros.h" diff --git a/src/intel/common/gen_decoder.h b/src/intel/common/gen_decoder.h index ff388700287..7ae80cd23ed 100644 --- a/src/intel/common/gen_decoder.h +++ b/src/intel/common/gen_decoder.h @@ -28,7 +28,7 @@ #include #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "util/hash_table.h" #ifdef __cplusplus diff --git a/src/intel/common/gen_device_info.c b/src/intel/common/gen_device_info.c deleted file mode 100644 index 1773009d33c..00000000000 --- a/src/intel/common/gen_device_info.c +++ /dev/null @@ -1,902 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include -#include "gen_device_info.h" -#include "compiler/shader_enums.h" -#include "util/macros.h" - -/** - * Get the PCI ID for the device name. - * - * Returns -1 if the device is not known. - */ -int -gen_device_name_to_pci_device_id(const char *name) -{ - static const struct { - const char *name; - int pci_id; - } name_map[] = { - { "brw", 0x2a02 }, - { "g4x", 0x2a42 }, - { "ilk", 0x0042 }, - { "snb", 0x0126 }, - { "ivb", 0x016a }, - { "hsw", 0x0d2e }, - { "byt", 0x0f33 }, - { "bdw", 0x162e }, - { "chv", 0x22B3 }, - { "skl", 0x1912 }, - { "bxt", 0x5A85 }, - { "kbl", 0x5912 }, - { "glk", 0x3185 }, - { "cnl", 0x5a52 }, - { "icl", 0x8a52 }, - }; - - for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { - if (!strcmp(name_map[i].name, name)) - return name_map[i].pci_id; - } - - return -1; -} - -/** - * Get the overridden PCI ID for the device. This is set with the - * INTEL_DEVID_OVERRIDE environment variable. - * - * Returns -1 if the override is not set. - */ -int -gen_get_pci_device_id_override(void) -{ - if (geteuid() == getuid()) { - const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); - if (devid_override) { - const int id = gen_device_name_to_pci_device_id(devid_override); - return id >= 0 ? id : strtol(devid_override, NULL, 0); - } - } - - return -1; -} - -static const struct gen_device_info gen_device_info_i965 = { - .gen = 4, - .has_negative_rhw_bug = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 4, - .max_vs_threads = 16, - .max_gs_threads = 2, - .max_wm_threads = 8 * 4, - .urb = { - .size = 256, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_g4x = { - .gen = 4, - .has_pln = true, - .has_compr4 = true, - .has_surface_tile_offset = true, - .is_g4x = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 5, - .max_vs_threads = 32, - .max_gs_threads = 2, - .max_wm_threads = 10 * 5, - .urb = { - .size = 384, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_ilk = { - .gen = 5, - .has_pln = true, - .has_compr4 = true, - .has_surface_tile_offset = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, - .max_vs_threads = 72, - .max_gs_threads = 32, - .max_wm_threads = 12 * 6, - .urb = { - .size = 1024, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_snb_gt1 = { - .gen = 6, - .gt = 1, - .has_hiz_and_separate_stencil = true, - .has_llc = true, - .has_pln = true, - .has_surface_tile_offset = true, - .needs_unlit_centroid_workaround = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, /* Not confirmed */ - .max_vs_threads = 24, - .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ - .max_wm_threads = 40, - .urb = { - .size = 32, - .min_entries = { - [MESA_SHADER_VERTEX] = 24, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 256, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, - .timestamp_frequency = 12500000, -}; - -static const struct gen_device_info gen_device_info_snb_gt2 = { - .gen = 6, - .gt = 2, - .has_hiz_and_separate_stencil = true, - .has_llc = true, - .has_pln = true, - .has_surface_tile_offset = true, - .needs_unlit_centroid_workaround = true, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, /* Not confirmed */ - .max_vs_threads = 60, - .max_gs_threads = 60, - .max_wm_threads = 80, - .urb = { - .size = 64, - .min_entries = { - [MESA_SHADER_VERTEX] = 24, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 256, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, - .timestamp_frequency = 12500000, -}; - -#define GEN7_FEATURES \ - .gen = 7, \ - .has_hiz_and_separate_stencil = true, \ - .must_use_separate_stencil = true, \ - .has_llc = true, \ - .has_pln = true, \ - .has_64bit_types = true, \ - .has_surface_tile_offset = true, \ - .timestamp_frequency = 12500000 - -static const struct gen_device_info gen_device_info_ivb_gt1 = { - GEN7_FEATURES, .is_ivybridge = true, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 6, - .l3_banks = 2, - .max_vs_threads = 36, - .max_tcs_threads = 36, - .max_tes_threads = 36, - .max_gs_threads = 36, - .max_wm_threads = 48, - .max_cs_threads = 36, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 512, - [MESA_SHADER_TESS_CTRL] = 32, - [MESA_SHADER_TESS_EVAL] = 288, - [MESA_SHADER_GEOMETRY] = 192, - }, - }, -}; - -static const struct gen_device_info gen_device_info_ivb_gt2 = { - GEN7_FEATURES, .is_ivybridge = true, .gt = 2, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of - * @max_wm_threads ... */ - .l3_banks = 4, - .max_vs_threads = 128, - .max_tcs_threads = 128, - .max_tes_threads = 128, - .max_gs_threads = 128, - .max_wm_threads = 172, - .max_cs_threads = 64, - .urb = { - .size = 256, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 704, - [MESA_SHADER_TESS_CTRL] = 64, - [MESA_SHADER_TESS_EVAL] = 448, - [MESA_SHADER_GEOMETRY] = 320, - }, - }, -}; - -static const struct gen_device_info gen_device_info_byt = { - GEN7_FEATURES, .is_baytrail = true, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 8, - .l3_banks = 1, - .has_llc = false, - .max_vs_threads = 36, - .max_tcs_threads = 36, - .max_tes_threads = 36, - .max_gs_threads = 36, - .max_wm_threads = 48, - .max_cs_threads = 32, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 512, - [MESA_SHADER_TESS_CTRL] = 32, - [MESA_SHADER_TESS_EVAL] = 288, - [MESA_SHADER_GEOMETRY] = 192, - }, - }, -}; - -#define HSW_FEATURES \ - GEN7_FEATURES, \ - .is_haswell = true, \ - .supports_simd16_3src = true, \ - .has_resource_streamer = true - -static const struct gen_device_info gen_device_info_hsw_gt1 = { - HSW_FEATURES, .gt = 1, - .num_slices = 1, - .num_subslices = { 1, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_vs_threads = 70, - .max_tcs_threads = 70, - .max_tes_threads = 70, - .max_gs_threads = 70, - .max_wm_threads = 102, - .max_cs_threads = 70, - .urb = { - .size = 128, - .min_entries = { - [MESA_SHADER_VERTEX] = 32, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 640, - [MESA_SHADER_TESS_CTRL] = 64, - [MESA_SHADER_TESS_EVAL] = 384, - [MESA_SHADER_GEOMETRY] = 256, - }, - }, -}; - -static const struct gen_device_info gen_device_info_hsw_gt2 = { - HSW_FEATURES, .gt = 2, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 4, - .max_vs_threads = 280, - .max_tcs_threads = 256, - .max_tes_threads = 280, - .max_gs_threads = 256, - .max_wm_threads = 204, - .max_cs_threads = 70, - .urb = { - .size = 256, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 1664, - [MESA_SHADER_TESS_CTRL] = 128, - [MESA_SHADER_TESS_EVAL] = 960, - [MESA_SHADER_GEOMETRY] = 640, - }, - }, -}; - -static const struct gen_device_info gen_device_info_hsw_gt3 = { - HSW_FEATURES, .gt = 3, - .num_slices = 2, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 8, - .max_vs_threads = 280, - .max_tcs_threads = 256, - .max_tes_threads = 280, - .max_gs_threads = 256, - .max_wm_threads = 408, - .max_cs_threads = 70, - .urb = { - .size = 512, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 10, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 1664, - [MESA_SHADER_TESS_CTRL] = 128, - [MESA_SHADER_TESS_EVAL] = 960, - [MESA_SHADER_GEOMETRY] = 640, - }, - }, -}; - -/* It's unclear how well supported sampling from the hiz buffer is on GEN8, - * so keep things conservative for now and set has_sample_with_hiz = false. - */ -#define GEN8_FEATURES \ - .gen = 8, \ - .has_hiz_and_separate_stencil = true, \ - .has_resource_streamer = true, \ - .must_use_separate_stencil = true, \ - .has_llc = true, \ - .has_sample_with_hiz = false, \ - .has_pln = true, \ - .has_integer_dword_mul = true, \ - .has_64bit_types = true, \ - .supports_simd16_3src = true, \ - .has_surface_tile_offset = true, \ - .max_vs_threads = 504, \ - .max_tcs_threads = 504, \ - .max_tes_threads = 504, \ - .max_gs_threads = 504, \ - .max_wm_threads = 384, \ - .timestamp_frequency = 12500000 - -static const struct gen_device_info gen_device_info_bdw_gt1 = { - GEN8_FEATURES, .gt = 1, - .is_broadwell = true, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_cs_threads = 42, - .urb = { - .size = 192, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_bdw_gt2 = { - GEN8_FEATURES, .gt = 2, - .is_broadwell = true, - .num_slices = 1, - .num_subslices = { 3, }, - .num_thread_per_eu = 7, - .l3_banks = 4, - .max_cs_threads = 56, - .urb = { - .size = 384, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_bdw_gt3 = { - GEN8_FEATURES, .gt = 3, - .is_broadwell = true, - .num_slices = 2, - .num_subslices = { 3, 3, }, - .num_thread_per_eu = 7, - .l3_banks = 8, - .max_cs_threads = 56, - .urb = { - .size = 384, - .min_entries = { - [MESA_SHADER_VERTEX] = 64, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 2560, - [MESA_SHADER_TESS_CTRL] = 504, - [MESA_SHADER_TESS_EVAL] = 1536, - [MESA_SHADER_GEOMETRY] = 960, - }, - } -}; - -static const struct gen_device_info gen_device_info_chv = { - GEN8_FEATURES, .is_cherryview = 1, .gt = 1, - .has_llc = false, - .has_integer_dword_mul = false, - .num_slices = 1, - .num_subslices = { 2, }, - .num_thread_per_eu = 7, - .l3_banks = 2, - .max_vs_threads = 80, - .max_tcs_threads = 80, - .max_tes_threads = 80, - .max_gs_threads = 80, - .max_wm_threads = 128, - .max_cs_threads = 6 * 7, - .urb = { - .size = 192, - .min_entries = { - [MESA_SHADER_VERTEX] = 34, - [MESA_SHADER_TESS_EVAL] = 34, - }, - .max_entries = { - [MESA_SHADER_VERTEX] = 640, - [MESA_SHADER_TESS_CTRL] = 80, - [MESA_SHADER_TESS_EVAL] = 384, - [MESA_SHADER_GEOMETRY] = 256, - }, - } -}; - -#define GEN9_HW_INFO \ - .gen = 9, \ - .max_vs_threads = 336, \ - .max_gs_threads = 336, \ - .max_tcs_threads = 336, \ - .max_tes_threads = 336, \ - .max_cs_threads = 56, \ - .timestamp_frequency = 12000000, \ - .urb = { \ - .size = 384, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 1856, \ - [MESA_SHADER_TESS_CTRL] = 672, \ - [MESA_SHADER_TESS_EVAL] = 1120, \ - [MESA_SHADER_GEOMETRY] = 640, \ - }, \ - } - -#define GEN9_LP_FEATURES \ - GEN8_FEATURES, \ - GEN9_HW_INFO, \ - .has_integer_dword_mul = false, \ - .gt = 1, \ - .has_llc = false, \ - .has_sample_with_hiz = true, \ - .num_slices = 1, \ - .num_thread_per_eu = 6, \ - .max_vs_threads = 112, \ - .max_tcs_threads = 112, \ - .max_tes_threads = 112, \ - .max_gs_threads = 112, \ - .max_cs_threads = 6 * 6, \ - .timestamp_frequency = 19200000, \ - .urb = { \ - .size = 192, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 34, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 704, \ - [MESA_SHADER_TESS_CTRL] = 256, \ - [MESA_SHADER_TESS_EVAL] = 416, \ - [MESA_SHADER_GEOMETRY] = 256, \ - }, \ - } - -#define GEN9_LP_FEATURES_3X6 \ - GEN9_LP_FEATURES, \ - .num_subslices = { 3, } - -#define GEN9_LP_FEATURES_2X6 \ - GEN9_LP_FEATURES, \ - .num_subslices = { 2, }, \ - .max_vs_threads = 56, \ - .max_tcs_threads = 56, \ - .max_tes_threads = 56, \ - .max_gs_threads = 56, \ - .max_cs_threads = 6 * 6, \ - .urb = { \ - .size = 128, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 34, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 352, \ - [MESA_SHADER_TESS_CTRL] = 128, \ - [MESA_SHADER_TESS_EVAL] = 208, \ - [MESA_SHADER_GEOMETRY] = 128, \ - }, \ - } - -#define GEN9_FEATURES \ - GEN8_FEATURES, \ - GEN9_HW_INFO, \ - .has_sample_with_hiz = true, \ - .num_thread_per_eu = 7 - -static const struct gen_device_info gen_device_info_skl_gt1 = { - GEN9_FEATURES, .gt = 1, - .is_skylake = true, - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, - .urb.size = 192, -}; - -static const struct gen_device_info gen_device_info_skl_gt2 = { - GEN9_FEATURES, .gt = 2, - .is_skylake = true, - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_skl_gt3 = { - GEN9_FEATURES, .gt = 3, - .is_skylake = true, - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -static const struct gen_device_info gen_device_info_skl_gt4 = { - GEN9_FEATURES, .gt = 4, - .is_skylake = true, - .num_slices = 3, - .num_subslices = { 3, 3, 3, }, - .l3_banks = 12, - /* From the "L3 Allocation and Programming" documentation: - * - * "URB is limited to 1008KB due to programming restrictions. This is not a - * restriction of the L3 implementation, but of the FF and other clients. - * Therefore, in a GT4 implementation it is possible for the programmed - * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but - * only 1008KB of this will be used." - */ - .urb.size = 1008 / 3, -}; - -static const struct gen_device_info gen_device_info_bxt = { - GEN9_LP_FEATURES_3X6, - .is_broxton = true, - .l3_banks = 2, -}; - -static const struct gen_device_info gen_device_info_bxt_2x6 = { - GEN9_LP_FEATURES_2X6, - .is_broxton = true, - .l3_banks = 1, -}; -/* - * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. - * There's no KBL entry. Using the default SKL (GEN9) GS entries value. - */ - -static const struct gen_device_info gen_device_info_kbl_gt1 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 1, - - .max_cs_threads = 7 * 6, - .urb.size = 192, - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, -}; - -static const struct gen_device_info gen_device_info_kbl_gt1_5 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 1, - - .max_cs_threads = 7 * 6, - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_kbl_gt2 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 2, - - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_kbl_gt3 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 3, - - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -static const struct gen_device_info gen_device_info_kbl_gt4 = { - GEN9_FEATURES, - .is_kabylake = true, - .gt = 4, - - /* - * From the "L3 Allocation and Programming" documentation: - * - * "URB is limited to 1008KB due to programming restrictions. This - * is not a restriction of the L3 implementation, but of the FF and - * other clients. Therefore, in a GT4 implementation it is - * possible for the programmed allocation of the L3 data array to - * provide 3*384KB=1152KB for URB, but only 1008KB of this - * will be used." - */ - .urb.size = 1008 / 3, - .num_slices = 3, - .num_subslices = { 3, 3, 3, }, - .l3_banks = 12, -}; - -static const struct gen_device_info gen_device_info_glk = { - GEN9_LP_FEATURES_3X6, - .is_geminilake = true, - .l3_banks = 2, -}; - -/*TODO: Initialize l3_banks when we know the number. */ -static const struct gen_device_info gen_device_info_glk_2x6 = { - GEN9_LP_FEATURES_2X6, - .is_geminilake = true, -}; - -static const struct gen_device_info gen_device_info_cfl_gt1 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 1, - - .num_slices = 1, - .num_subslices = { 2, }, - .l3_banks = 2, -}; -static const struct gen_device_info gen_device_info_cfl_gt2 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 2, - - .num_slices = 1, - .num_subslices = { 3, }, - .l3_banks = 4, -}; - -static const struct gen_device_info gen_device_info_cfl_gt3 = { - GEN9_FEATURES, - .is_coffeelake = true, - .gt = 3, - - .num_slices = 2, - .num_subslices = { 3, 3, }, - .l3_banks = 8, -}; - -#define GEN10_HW_INFO \ - .gen = 10, \ - .num_thread_per_eu = 7, \ - .max_vs_threads = 728, \ - .max_gs_threads = 432, \ - .max_tcs_threads = 432, \ - .max_tes_threads = 624, \ - .max_cs_threads = 56, \ - .timestamp_frequency = 19200000, \ - .urb = { \ - .size = 256, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 3936, \ - [MESA_SHADER_TESS_CTRL] = 896, \ - [MESA_SHADER_TESS_EVAL] = 2064, \ - [MESA_SHADER_GEOMETRY] = 832, \ - }, \ - } - -#define subslices(args...) { args, } - -#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \ - GEN8_FEATURES, \ - GEN10_HW_INFO, \ - .has_sample_with_hiz = true, \ - .gt = _gt, \ - .num_slices = _slices, \ - .num_subslices = _subslices, \ - .l3_banks = _l3 - -static const struct gen_device_info gen_device_info_cnl_2x8 = { - /* GT0.5 */ - GEN10_FEATURES(1, 1, subslices(2), 2), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_3x8 = { - /* GT1 */ - GEN10_FEATURES(1, 1, subslices(3), 3), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_4x8 = { - /* GT 1.5 */ - GEN10_FEATURES(1, 2, subslices(2, 2), 6), - .is_cannonlake = true, -}; - -static const struct gen_device_info gen_device_info_cnl_5x8 = { - /* GT2 */ - GEN10_FEATURES(2, 2, subslices(3, 2), 6), - .is_cannonlake = true, -}; - -#define GEN11_HW_INFO \ - .gen = 11, \ - .has_pln = false, \ - .max_vs_threads = 364, \ - .max_gs_threads = 224, \ - .max_tcs_threads = 224, \ - .max_tes_threads = 364, \ - .max_cs_threads = 56, \ - .urb = { \ - .size = 1024, \ - .min_entries = { \ - [MESA_SHADER_VERTEX] = 64, \ - [MESA_SHADER_TESS_EVAL] = 34, \ - }, \ - .max_entries = { \ - [MESA_SHADER_VERTEX] = 2384, \ - [MESA_SHADER_TESS_CTRL] = 1032, \ - [MESA_SHADER_TESS_EVAL] = 2384, \ - [MESA_SHADER_GEOMETRY] = 1032, \ - }, \ - } - -#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ - GEN8_FEATURES, \ - GEN11_HW_INFO, \ - .has_64bit_types = false, \ - .has_integer_dword_mul = false, \ - .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ - .num_subslices = _subslices - -static const struct gen_device_info gen_device_info_icl_8x8 = { - GEN11_FEATURES(2, 1, subslices(8), 8), -}; - -static const struct gen_device_info gen_device_info_icl_6x8 = { - GEN11_FEATURES(1, 1, subslices(6), 6), -}; - -static const struct gen_device_info gen_device_info_icl_4x8 = { - GEN11_FEATURES(1, 1, subslices(4), 6), -}; - -static const struct gen_device_info gen_device_info_icl_1x8 = { - GEN11_FEATURES(1, 1, subslices(1), 6), -}; - -bool -gen_get_device_info(int devid, struct gen_device_info *devinfo) -{ - switch (devid) { -#undef CHIPSET -#define CHIPSET(id, family, name) \ - case id: *devinfo = gen_device_info_##family; break; -#include "pci_ids/i965_pci_ids.h" - default: - fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); - return false; - } - - /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: - * - * "Scratch Space per slice is computed based on 4 sub-slices. SW must - * allocate scratch space enough so that each slice has 4 slices allowed." - * - * The equivalent internal documentation says that this programming note - * applies to all Gen9+ platforms. - * - * The hardware typically calculates the scratch space pointer by taking - * the base address, and adding per-thread-scratch-space * thread ID. - * Extra padding can be necessary depending how the thread IDs are - * calculated for a particular shader stage. - */ - - switch(devinfo->gen) { - case 9: - case 10: - devinfo->max_wm_threads = 64 /* threads-per-PSD */ - * devinfo->num_slices - * 4; /* effective subslices per slice */ - break; - case 11: - devinfo->max_wm_threads = 128 /* threads-per-PSD */ - * devinfo->num_slices - * 8; /* subslices per slice */ - break; - default: - break; - } - - assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); - - return true; -} - -const char * -gen_get_device_name(int devid) -{ - switch (devid) { -#undef CHIPSET -#define CHIPSET(id, family, name) case id: return name; -#include "pci_ids/i965_pci_ids.h" - default: - return NULL; - } -} diff --git a/src/intel/common/gen_device_info.h b/src/intel/common/gen_device_info.h deleted file mode 100644 index b8044d00032..00000000000 --- a/src/intel/common/gen_device_info.h +++ /dev/null @@ -1,213 +0,0 @@ - /* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef GEN_DEVICE_INFO_H -#define GEN_DEVICE_INFO_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * Intel hardware information and quirks - */ -struct gen_device_info -{ - int gen; /**< Generation number: 4, 5, 6, 7, ... */ - int gt; - - bool is_g4x; - bool is_ivybridge; - bool is_baytrail; - bool is_haswell; - bool is_broadwell; - bool is_cherryview; - bool is_skylake; - bool is_broxton; - bool is_kabylake; - bool is_geminilake; - bool is_coffeelake; - bool is_cannonlake; - - bool has_hiz_and_separate_stencil; - bool must_use_separate_stencil; - bool has_sample_with_hiz; - bool has_llc; - - bool has_pln; - bool has_64bit_types; - bool has_integer_dword_mul; - bool has_compr4; - bool has_surface_tile_offset; - bool supports_simd16_3src; - bool has_resource_streamer; - - /** - * \name Intel hardware quirks - * @{ - */ - bool has_negative_rhw_bug; - - /** - * Some versions of Gen hardware don't do centroid interpolation correctly - * on unlit pixels, causing incorrect values for derivatives near triangle - * edges. Enabling this flag causes the fragment shader to use - * non-centroid interpolation for unlit pixels, at the expense of two extra - * fragment shader instructions. - */ - bool needs_unlit_centroid_workaround; - /** @} */ - - /** - * \name GPU hardware limits - * - * In general, you can find shader thread maximums by looking at the "Maximum - * Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, - * 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry - * limits come from the "Number of URB Entries" field in the - * 3DSTATE_URB_VS command and friends. - * - * These fields are used to calculate the scratch space to allocate. The - * amount of scratch space can be larger without being harmful on modern - * GPUs, however, prior to Haswell, programming the maximum number of threads - * to greater than the hardware maximum would cause GPU performance to tank. - * - * @{ - */ - /** - * Total number of slices present on the device whether or not they've been - * fused off. - * - * XXX: CS thread counts are limited by the inability to do cross subslice - * communication. It is the effectively the number of logical threads which - * can be executed in a subslice. Fuse configurations may cause this number - * to change, so we program @max_cs_threads as the lower maximum. - */ - unsigned num_slices; - - /** - * Number of subslices for each slice (used to be uniform until CNL). - */ - unsigned num_subslices[3]; - - /** - * Number of threads per eu, varies between 4 and 8 between generations. - */ - unsigned num_thread_per_eu; - - unsigned l3_banks; - unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ - unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ - unsigned max_tes_threads; /**< Maximum Domain Shader threads */ - unsigned max_gs_threads; /**< Maximum Geometry Shader threads. */ - /** - * Theoretical maximum number of Pixel Shader threads. - * - * PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will - * automatically scale pixel shader thread count, based on a single value - * programmed into 3DSTATE_PS. - * - * To calculate the maximum number of threads for Gen8 beyond (which have - * multiple Pixel Shader Dispatchers): - * - * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" - * - Usually there's only one PSD per subslice, so use the number of - * subslices for number of PSDs. - * - For max_wm_threads, the total should be PSD threads * #PSDs. - */ - unsigned max_wm_threads; - - /** - * Maximum Compute Shader threads. - * - * Thread count * number of EUs per subslice - */ - unsigned max_cs_threads; - - struct { - /** - * Hardware default URB size. - * - * The units this is expressed in are somewhat inconsistent: 512b units - * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+. - * - * Look up "URB Size" in the "Device Attributes" page, and take the - * maximum. Look up the slice count for each GT SKU on the same page. - * urb.size = URB Size (kbytes) / slice count - */ - unsigned size; - - /** - * The minimum number of URB entries. See the 3DSTATE_URB_ docs. - */ - unsigned min_entries[4]; - - /** - * The maximum number of URB entries. See the 3DSTATE_URB_ docs. - */ - unsigned max_entries[4]; - } urb; - - /** - * For the longest time the timestamp frequency for Gen's timestamp counter - * could be assumed to be 12.5MHz, where the least significant bit neatly - * corresponded to 80 nanoseconds. - * - * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for - * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for - * BXT. - * - * For simplicty to fit with the current code scaling by a single constant - * to map from raw timestamps to nanoseconds we now do the conversion in - * floating point instead of integer arithmetic. - * - * In general it's probably worth noting that the documented constants we - * have for the per-platform timestamp frequencies aren't perfect and - * shouldn't be trusted for scaling and comparing timestamps with a large - * delta. - * - * E.g. with crude testing on my system using the 'correct' scale factor I'm - * seeing a drift of ~2 milliseconds per second. - */ - uint64_t timestamp_frequency; - - /** @} */ -}; - -#define gen_device_info_is_9lp(devinfo) \ - ((devinfo)->is_broxton || (devinfo)->is_geminilake) - -int gen_get_pci_device_id_override(void); -int gen_device_name_to_pci_device_id(const char *name); -bool gen_get_device_info(int devid, struct gen_device_info *devinfo); -const char *gen_get_device_name(int devid); - -#ifdef __cplusplus -} -#endif - -#endif /* GEN_DEVICE_INFO_H */ diff --git a/src/intel/common/gen_l3_config.h b/src/intel/common/gen_l3_config.h index 8dc7dda0fcc..33da8bb19de 100644 --- a/src/intel/common/gen_l3_config.h +++ b/src/intel/common/gen_l3_config.h @@ -26,7 +26,7 @@ #include -#include "gen_device_info.h" +#include "dev/gen_device_info.h" /** * Chunk of L3 cache reserved for some specific purpose. diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build index 19472e306f4..d35d5e8f78e 100644 --- a/src/intel/common/meson.build +++ b/src/intel/common/meson.build @@ -26,8 +26,6 @@ files_libintel_common = files( 'gen_debug.h', 'gen_decoder.c', 'gen_decoder.h', - 'gen_device_info.c', - 'gen_device_info.h', 'gen_l3_config.c', 'gen_l3_config.h', 'gen_urb_config.c', diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index fb68a34ab95..3f1852f582a 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -25,7 +25,7 @@ #define BRW_COMPILER_H #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "main/macros.h" #include "util/ralloc.h" diff --git a/src/intel/compiler/brw_inst.h b/src/intel/compiler/brw_inst.h index a67225ca6b9..e6998973b64 100644 --- a/src/intel/compiler/brw_inst.h +++ b/src/intel/compiler/brw_inst.h @@ -36,7 +36,7 @@ #include "brw_eu_defines.h" #include "brw_reg_type.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #ifdef __cplusplus extern "C" { diff --git a/src/intel/compiler/brw_reg_type.c b/src/intel/compiler/brw_reg_type.c index 3c82eb0a76f..704d6c9e1d9 100644 --- a/src/intel/compiler/brw_reg_type.c +++ b/src/intel/compiler/brw_reg_type.c @@ -23,7 +23,7 @@ #include "brw_reg.h" #include "brw_eu_defines.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #define INVALID (-1) diff --git a/src/intel/dev/gen_device_info.c b/src/intel/dev/gen_device_info.c new file mode 100644 index 00000000000..1773009d33c --- /dev/null +++ b/src/intel/dev/gen_device_info.c @@ -0,0 +1,902 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "gen_device_info.h" +#include "compiler/shader_enums.h" +#include "util/macros.h" + +/** + * Get the PCI ID for the device name. + * + * Returns -1 if the device is not known. + */ +int +gen_device_name_to_pci_device_id(const char *name) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", 0x2a02 }, + { "g4x", 0x2a42 }, + { "ilk", 0x0042 }, + { "snb", 0x0126 }, + { "ivb", 0x016a }, + { "hsw", 0x0d2e }, + { "byt", 0x0f33 }, + { "bdw", 0x162e }, + { "chv", 0x22B3 }, + { "skl", 0x1912 }, + { "bxt", 0x5A85 }, + { "kbl", 0x5912 }, + { "glk", 0x3185 }, + { "cnl", 0x5a52 }, + { "icl", 0x8a52 }, + }; + + for (unsigned i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, name)) + return name_map[i].pci_id; + } + + return -1; +} + +/** + * Get the overridden PCI ID for the device. This is set with the + * INTEL_DEVID_OVERRIDE environment variable. + * + * Returns -1 if the override is not set. + */ +int +gen_get_pci_device_id_override(void) +{ + if (geteuid() == getuid()) { + const char *devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + const int id = gen_device_name_to_pci_device_id(devid_override); + return id >= 0 ? id : strtol(devid_override, NULL, 0); + } + } + + return -1; +} + +static const struct gen_device_info gen_device_info_i965 = { + .gen = 4, + .has_negative_rhw_bug = true, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 4, + .max_vs_threads = 16, + .max_gs_threads = 2, + .max_wm_threads = 8 * 4, + .urb = { + .size = 256, + }, + .timestamp_frequency = 12500000, +}; + +static const struct gen_device_info gen_device_info_g4x = { + .gen = 4, + .has_pln = true, + .has_compr4 = true, + .has_surface_tile_offset = true, + .is_g4x = true, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 5, + .max_vs_threads = 32, + .max_gs_threads = 2, + .max_wm_threads = 10 * 5, + .urb = { + .size = 384, + }, + .timestamp_frequency = 12500000, +}; + +static const struct gen_device_info gen_device_info_ilk = { + .gen = 5, + .has_pln = true, + .has_compr4 = true, + .has_surface_tile_offset = true, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 6, + .max_vs_threads = 72, + .max_gs_threads = 32, + .max_wm_threads = 12 * 6, + .urb = { + .size = 1024, + }, + .timestamp_frequency = 12500000, +}; + +static const struct gen_device_info gen_device_info_snb_gt1 = { + .gen = 6, + .gt = 1, + .has_hiz_and_separate_stencil = true, + .has_llc = true, + .has_pln = true, + .has_surface_tile_offset = true, + .needs_unlit_centroid_workaround = true, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 6, /* Not confirmed */ + .max_vs_threads = 24, + .max_gs_threads = 21, /* conservative; 24 if rendering disabled. */ + .max_wm_threads = 40, + .urb = { + .size = 32, + .min_entries = { + [MESA_SHADER_VERTEX] = 24, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 256, + [MESA_SHADER_GEOMETRY] = 256, + }, + }, + .timestamp_frequency = 12500000, +}; + +static const struct gen_device_info gen_device_info_snb_gt2 = { + .gen = 6, + .gt = 2, + .has_hiz_and_separate_stencil = true, + .has_llc = true, + .has_pln = true, + .has_surface_tile_offset = true, + .needs_unlit_centroid_workaround = true, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 6, /* Not confirmed */ + .max_vs_threads = 60, + .max_gs_threads = 60, + .max_wm_threads = 80, + .urb = { + .size = 64, + .min_entries = { + [MESA_SHADER_VERTEX] = 24, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 256, + [MESA_SHADER_GEOMETRY] = 256, + }, + }, + .timestamp_frequency = 12500000, +}; + +#define GEN7_FEATURES \ + .gen = 7, \ + .has_hiz_and_separate_stencil = true, \ + .must_use_separate_stencil = true, \ + .has_llc = true, \ + .has_pln = true, \ + .has_64bit_types = true, \ + .has_surface_tile_offset = true, \ + .timestamp_frequency = 12500000 + +static const struct gen_device_info gen_device_info_ivb_gt1 = { + GEN7_FEATURES, .is_ivybridge = true, .gt = 1, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 6, + .l3_banks = 2, + .max_vs_threads = 36, + .max_tcs_threads = 36, + .max_tes_threads = 36, + .max_gs_threads = 36, + .max_wm_threads = 48, + .max_cs_threads = 36, + .urb = { + .size = 128, + .min_entries = { + [MESA_SHADER_VERTEX] = 32, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 512, + [MESA_SHADER_TESS_CTRL] = 32, + [MESA_SHADER_TESS_EVAL] = 288, + [MESA_SHADER_GEOMETRY] = 192, + }, + }, +}; + +static const struct gen_device_info gen_device_info_ivb_gt2 = { + GEN7_FEATURES, .is_ivybridge = true, .gt = 2, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 8, /* Not sure why this isn't a multiple of + * @max_wm_threads ... */ + .l3_banks = 4, + .max_vs_threads = 128, + .max_tcs_threads = 128, + .max_tes_threads = 128, + .max_gs_threads = 128, + .max_wm_threads = 172, + .max_cs_threads = 64, + .urb = { + .size = 256, + .min_entries = { + [MESA_SHADER_VERTEX] = 32, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 704, + [MESA_SHADER_TESS_CTRL] = 64, + [MESA_SHADER_TESS_EVAL] = 448, + [MESA_SHADER_GEOMETRY] = 320, + }, + }, +}; + +static const struct gen_device_info gen_device_info_byt = { + GEN7_FEATURES, .is_baytrail = true, .gt = 1, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 8, + .l3_banks = 1, + .has_llc = false, + .max_vs_threads = 36, + .max_tcs_threads = 36, + .max_tes_threads = 36, + .max_gs_threads = 36, + .max_wm_threads = 48, + .max_cs_threads = 32, + .urb = { + .size = 128, + .min_entries = { + [MESA_SHADER_VERTEX] = 32, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 512, + [MESA_SHADER_TESS_CTRL] = 32, + [MESA_SHADER_TESS_EVAL] = 288, + [MESA_SHADER_GEOMETRY] = 192, + }, + }, +}; + +#define HSW_FEATURES \ + GEN7_FEATURES, \ + .is_haswell = true, \ + .supports_simd16_3src = true, \ + .has_resource_streamer = true + +static const struct gen_device_info gen_device_info_hsw_gt1 = { + HSW_FEATURES, .gt = 1, + .num_slices = 1, + .num_subslices = { 1, }, + .num_thread_per_eu = 7, + .l3_banks = 2, + .max_vs_threads = 70, + .max_tcs_threads = 70, + .max_tes_threads = 70, + .max_gs_threads = 70, + .max_wm_threads = 102, + .max_cs_threads = 70, + .urb = { + .size = 128, + .min_entries = { + [MESA_SHADER_VERTEX] = 32, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 640, + [MESA_SHADER_TESS_CTRL] = 64, + [MESA_SHADER_TESS_EVAL] = 384, + [MESA_SHADER_GEOMETRY] = 256, + }, + }, +}; + +static const struct gen_device_info gen_device_info_hsw_gt2 = { + HSW_FEATURES, .gt = 2, + .num_slices = 1, + .num_subslices = { 2, }, + .num_thread_per_eu = 7, + .l3_banks = 4, + .max_vs_threads = 280, + .max_tcs_threads = 256, + .max_tes_threads = 280, + .max_gs_threads = 256, + .max_wm_threads = 204, + .max_cs_threads = 70, + .urb = { + .size = 256, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 1664, + [MESA_SHADER_TESS_CTRL] = 128, + [MESA_SHADER_TESS_EVAL] = 960, + [MESA_SHADER_GEOMETRY] = 640, + }, + }, +}; + +static const struct gen_device_info gen_device_info_hsw_gt3 = { + HSW_FEATURES, .gt = 3, + .num_slices = 2, + .num_subslices = { 2, }, + .num_thread_per_eu = 7, + .l3_banks = 8, + .max_vs_threads = 280, + .max_tcs_threads = 256, + .max_tes_threads = 280, + .max_gs_threads = 256, + .max_wm_threads = 408, + .max_cs_threads = 70, + .urb = { + .size = 512, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 10, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 1664, + [MESA_SHADER_TESS_CTRL] = 128, + [MESA_SHADER_TESS_EVAL] = 960, + [MESA_SHADER_GEOMETRY] = 640, + }, + }, +}; + +/* It's unclear how well supported sampling from the hiz buffer is on GEN8, + * so keep things conservative for now and set has_sample_with_hiz = false. + */ +#define GEN8_FEATURES \ + .gen = 8, \ + .has_hiz_and_separate_stencil = true, \ + .has_resource_streamer = true, \ + .must_use_separate_stencil = true, \ + .has_llc = true, \ + .has_sample_with_hiz = false, \ + .has_pln = true, \ + .has_integer_dword_mul = true, \ + .has_64bit_types = true, \ + .supports_simd16_3src = true, \ + .has_surface_tile_offset = true, \ + .max_vs_threads = 504, \ + .max_tcs_threads = 504, \ + .max_tes_threads = 504, \ + .max_gs_threads = 504, \ + .max_wm_threads = 384, \ + .timestamp_frequency = 12500000 + +static const struct gen_device_info gen_device_info_bdw_gt1 = { + GEN8_FEATURES, .gt = 1, + .is_broadwell = true, + .num_slices = 1, + .num_subslices = { 2, }, + .num_thread_per_eu = 7, + .l3_banks = 2, + .max_cs_threads = 42, + .urb = { + .size = 192, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2560, + [MESA_SHADER_TESS_CTRL] = 504, + [MESA_SHADER_TESS_EVAL] = 1536, + [MESA_SHADER_GEOMETRY] = 960, + }, + } +}; + +static const struct gen_device_info gen_device_info_bdw_gt2 = { + GEN8_FEATURES, .gt = 2, + .is_broadwell = true, + .num_slices = 1, + .num_subslices = { 3, }, + .num_thread_per_eu = 7, + .l3_banks = 4, + .max_cs_threads = 56, + .urb = { + .size = 384, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2560, + [MESA_SHADER_TESS_CTRL] = 504, + [MESA_SHADER_TESS_EVAL] = 1536, + [MESA_SHADER_GEOMETRY] = 960, + }, + } +}; + +static const struct gen_device_info gen_device_info_bdw_gt3 = { + GEN8_FEATURES, .gt = 3, + .is_broadwell = true, + .num_slices = 2, + .num_subslices = { 3, 3, }, + .num_thread_per_eu = 7, + .l3_banks = 8, + .max_cs_threads = 56, + .urb = { + .size = 384, + .min_entries = { + [MESA_SHADER_VERTEX] = 64, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 2560, + [MESA_SHADER_TESS_CTRL] = 504, + [MESA_SHADER_TESS_EVAL] = 1536, + [MESA_SHADER_GEOMETRY] = 960, + }, + } +}; + +static const struct gen_device_info gen_device_info_chv = { + GEN8_FEATURES, .is_cherryview = 1, .gt = 1, + .has_llc = false, + .has_integer_dword_mul = false, + .num_slices = 1, + .num_subslices = { 2, }, + .num_thread_per_eu = 7, + .l3_banks = 2, + .max_vs_threads = 80, + .max_tcs_threads = 80, + .max_tes_threads = 80, + .max_gs_threads = 80, + .max_wm_threads = 128, + .max_cs_threads = 6 * 7, + .urb = { + .size = 192, + .min_entries = { + [MESA_SHADER_VERTEX] = 34, + [MESA_SHADER_TESS_EVAL] = 34, + }, + .max_entries = { + [MESA_SHADER_VERTEX] = 640, + [MESA_SHADER_TESS_CTRL] = 80, + [MESA_SHADER_TESS_EVAL] = 384, + [MESA_SHADER_GEOMETRY] = 256, + }, + } +}; + +#define GEN9_HW_INFO \ + .gen = 9, \ + .max_vs_threads = 336, \ + .max_gs_threads = 336, \ + .max_tcs_threads = 336, \ + .max_tes_threads = 336, \ + .max_cs_threads = 56, \ + .timestamp_frequency = 12000000, \ + .urb = { \ + .size = 384, \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 64, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 1856, \ + [MESA_SHADER_TESS_CTRL] = 672, \ + [MESA_SHADER_TESS_EVAL] = 1120, \ + [MESA_SHADER_GEOMETRY] = 640, \ + }, \ + } + +#define GEN9_LP_FEATURES \ + GEN8_FEATURES, \ + GEN9_HW_INFO, \ + .has_integer_dword_mul = false, \ + .gt = 1, \ + .has_llc = false, \ + .has_sample_with_hiz = true, \ + .num_slices = 1, \ + .num_thread_per_eu = 6, \ + .max_vs_threads = 112, \ + .max_tcs_threads = 112, \ + .max_tes_threads = 112, \ + .max_gs_threads = 112, \ + .max_cs_threads = 6 * 6, \ + .timestamp_frequency = 19200000, \ + .urb = { \ + .size = 192, \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 34, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 704, \ + [MESA_SHADER_TESS_CTRL] = 256, \ + [MESA_SHADER_TESS_EVAL] = 416, \ + [MESA_SHADER_GEOMETRY] = 256, \ + }, \ + } + +#define GEN9_LP_FEATURES_3X6 \ + GEN9_LP_FEATURES, \ + .num_subslices = { 3, } + +#define GEN9_LP_FEATURES_2X6 \ + GEN9_LP_FEATURES, \ + .num_subslices = { 2, }, \ + .max_vs_threads = 56, \ + .max_tcs_threads = 56, \ + .max_tes_threads = 56, \ + .max_gs_threads = 56, \ + .max_cs_threads = 6 * 6, \ + .urb = { \ + .size = 128, \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 34, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 352, \ + [MESA_SHADER_TESS_CTRL] = 128, \ + [MESA_SHADER_TESS_EVAL] = 208, \ + [MESA_SHADER_GEOMETRY] = 128, \ + }, \ + } + +#define GEN9_FEATURES \ + GEN8_FEATURES, \ + GEN9_HW_INFO, \ + .has_sample_with_hiz = true, \ + .num_thread_per_eu = 7 + +static const struct gen_device_info gen_device_info_skl_gt1 = { + GEN9_FEATURES, .gt = 1, + .is_skylake = true, + .num_slices = 1, + .num_subslices = { 2, }, + .l3_banks = 2, + .urb.size = 192, +}; + +static const struct gen_device_info gen_device_info_skl_gt2 = { + GEN9_FEATURES, .gt = 2, + .is_skylake = true, + .num_slices = 1, + .num_subslices = { 3, }, + .l3_banks = 4, +}; + +static const struct gen_device_info gen_device_info_skl_gt3 = { + GEN9_FEATURES, .gt = 3, + .is_skylake = true, + .num_slices = 2, + .num_subslices = { 3, 3, }, + .l3_banks = 8, +}; + +static const struct gen_device_info gen_device_info_skl_gt4 = { + GEN9_FEATURES, .gt = 4, + .is_skylake = true, + .num_slices = 3, + .num_subslices = { 3, 3, 3, }, + .l3_banks = 12, + /* From the "L3 Allocation and Programming" documentation: + * + * "URB is limited to 1008KB due to programming restrictions. This is not a + * restriction of the L3 implementation, but of the FF and other clients. + * Therefore, in a GT4 implementation it is possible for the programmed + * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but + * only 1008KB of this will be used." + */ + .urb.size = 1008 / 3, +}; + +static const struct gen_device_info gen_device_info_bxt = { + GEN9_LP_FEATURES_3X6, + .is_broxton = true, + .l3_banks = 2, +}; + +static const struct gen_device_info gen_device_info_bxt_2x6 = { + GEN9_LP_FEATURES_2X6, + .is_broxton = true, + .l3_banks = 1, +}; +/* + * Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+. + * There's no KBL entry. Using the default SKL (GEN9) GS entries value. + */ + +static const struct gen_device_info gen_device_info_kbl_gt1 = { + GEN9_FEATURES, + .is_kabylake = true, + .gt = 1, + + .max_cs_threads = 7 * 6, + .urb.size = 192, + .num_slices = 1, + .num_subslices = { 2, }, + .l3_banks = 2, +}; + +static const struct gen_device_info gen_device_info_kbl_gt1_5 = { + GEN9_FEATURES, + .is_kabylake = true, + .gt = 1, + + .max_cs_threads = 7 * 6, + .num_slices = 1, + .num_subslices = { 3, }, + .l3_banks = 4, +}; + +static const struct gen_device_info gen_device_info_kbl_gt2 = { + GEN9_FEATURES, + .is_kabylake = true, + .gt = 2, + + .num_slices = 1, + .num_subslices = { 3, }, + .l3_banks = 4, +}; + +static const struct gen_device_info gen_device_info_kbl_gt3 = { + GEN9_FEATURES, + .is_kabylake = true, + .gt = 3, + + .num_slices = 2, + .num_subslices = { 3, 3, }, + .l3_banks = 8, +}; + +static const struct gen_device_info gen_device_info_kbl_gt4 = { + GEN9_FEATURES, + .is_kabylake = true, + .gt = 4, + + /* + * From the "L3 Allocation and Programming" documentation: + * + * "URB is limited to 1008KB due to programming restrictions. This + * is not a restriction of the L3 implementation, but of the FF and + * other clients. Therefore, in a GT4 implementation it is + * possible for the programmed allocation of the L3 data array to + * provide 3*384KB=1152KB for URB, but only 1008KB of this + * will be used." + */ + .urb.size = 1008 / 3, + .num_slices = 3, + .num_subslices = { 3, 3, 3, }, + .l3_banks = 12, +}; + +static const struct gen_device_info gen_device_info_glk = { + GEN9_LP_FEATURES_3X6, + .is_geminilake = true, + .l3_banks = 2, +}; + +/*TODO: Initialize l3_banks when we know the number. */ +static const struct gen_device_info gen_device_info_glk_2x6 = { + GEN9_LP_FEATURES_2X6, + .is_geminilake = true, +}; + +static const struct gen_device_info gen_device_info_cfl_gt1 = { + GEN9_FEATURES, + .is_coffeelake = true, + .gt = 1, + + .num_slices = 1, + .num_subslices = { 2, }, + .l3_banks = 2, +}; +static const struct gen_device_info gen_device_info_cfl_gt2 = { + GEN9_FEATURES, + .is_coffeelake = true, + .gt = 2, + + .num_slices = 1, + .num_subslices = { 3, }, + .l3_banks = 4, +}; + +static const struct gen_device_info gen_device_info_cfl_gt3 = { + GEN9_FEATURES, + .is_coffeelake = true, + .gt = 3, + + .num_slices = 2, + .num_subslices = { 3, 3, }, + .l3_banks = 8, +}; + +#define GEN10_HW_INFO \ + .gen = 10, \ + .num_thread_per_eu = 7, \ + .max_vs_threads = 728, \ + .max_gs_threads = 432, \ + .max_tcs_threads = 432, \ + .max_tes_threads = 624, \ + .max_cs_threads = 56, \ + .timestamp_frequency = 19200000, \ + .urb = { \ + .size = 256, \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 64, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 3936, \ + [MESA_SHADER_TESS_CTRL] = 896, \ + [MESA_SHADER_TESS_EVAL] = 2064, \ + [MESA_SHADER_GEOMETRY] = 832, \ + }, \ + } + +#define subslices(args...) { args, } + +#define GEN10_FEATURES(_gt, _slices, _subslices, _l3) \ + GEN8_FEATURES, \ + GEN10_HW_INFO, \ + .has_sample_with_hiz = true, \ + .gt = _gt, \ + .num_slices = _slices, \ + .num_subslices = _subslices, \ + .l3_banks = _l3 + +static const struct gen_device_info gen_device_info_cnl_2x8 = { + /* GT0.5 */ + GEN10_FEATURES(1, 1, subslices(2), 2), + .is_cannonlake = true, +}; + +static const struct gen_device_info gen_device_info_cnl_3x8 = { + /* GT1 */ + GEN10_FEATURES(1, 1, subslices(3), 3), + .is_cannonlake = true, +}; + +static const struct gen_device_info gen_device_info_cnl_4x8 = { + /* GT 1.5 */ + GEN10_FEATURES(1, 2, subslices(2, 2), 6), + .is_cannonlake = true, +}; + +static const struct gen_device_info gen_device_info_cnl_5x8 = { + /* GT2 */ + GEN10_FEATURES(2, 2, subslices(3, 2), 6), + .is_cannonlake = true, +}; + +#define GEN11_HW_INFO \ + .gen = 11, \ + .has_pln = false, \ + .max_vs_threads = 364, \ + .max_gs_threads = 224, \ + .max_tcs_threads = 224, \ + .max_tes_threads = 364, \ + .max_cs_threads = 56, \ + .urb = { \ + .size = 1024, \ + .min_entries = { \ + [MESA_SHADER_VERTEX] = 64, \ + [MESA_SHADER_TESS_EVAL] = 34, \ + }, \ + .max_entries = { \ + [MESA_SHADER_VERTEX] = 2384, \ + [MESA_SHADER_TESS_CTRL] = 1032, \ + [MESA_SHADER_TESS_EVAL] = 2384, \ + [MESA_SHADER_GEOMETRY] = 1032, \ + }, \ + } + +#define GEN11_FEATURES(_gt, _slices, _subslices, _l3) \ + GEN8_FEATURES, \ + GEN11_HW_INFO, \ + .has_64bit_types = false, \ + .has_integer_dword_mul = false, \ + .gt = _gt, .num_slices = _slices, .l3_banks = _l3, \ + .num_subslices = _subslices + +static const struct gen_device_info gen_device_info_icl_8x8 = { + GEN11_FEATURES(2, 1, subslices(8), 8), +}; + +static const struct gen_device_info gen_device_info_icl_6x8 = { + GEN11_FEATURES(1, 1, subslices(6), 6), +}; + +static const struct gen_device_info gen_device_info_icl_4x8 = { + GEN11_FEATURES(1, 1, subslices(4), 6), +}; + +static const struct gen_device_info gen_device_info_icl_1x8 = { + GEN11_FEATURES(1, 1, subslices(1), 6), +}; + +bool +gen_get_device_info(int devid, struct gen_device_info *devinfo) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) \ + case id: *devinfo = gen_device_info_##family; break; +#include "pci_ids/i965_pci_ids.h" + default: + fprintf(stderr, "i965_dri.so does not support the 0x%x PCI ID.\n", devid); + return false; + } + + /* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer: + * + * "Scratch Space per slice is computed based on 4 sub-slices. SW must + * allocate scratch space enough so that each slice has 4 slices allowed." + * + * The equivalent internal documentation says that this programming note + * applies to all Gen9+ platforms. + * + * The hardware typically calculates the scratch space pointer by taking + * the base address, and adding per-thread-scratch-space * thread ID. + * Extra padding can be necessary depending how the thread IDs are + * calculated for a particular shader stage. + */ + + switch(devinfo->gen) { + case 9: + case 10: + devinfo->max_wm_threads = 64 /* threads-per-PSD */ + * devinfo->num_slices + * 4; /* effective subslices per slice */ + break; + case 11: + devinfo->max_wm_threads = 128 /* threads-per-PSD */ + * devinfo->num_slices + * 8; /* subslices per slice */ + break; + default: + break; + } + + assert(devinfo->num_slices <= ARRAY_SIZE(devinfo->num_subslices)); + + return true; +} + +const char * +gen_get_device_name(int devid) +{ + switch (devid) { +#undef CHIPSET +#define CHIPSET(id, family, name) case id: return name; +#include "pci_ids/i965_pci_ids.h" + default: + return NULL; + } +} diff --git a/src/intel/dev/gen_device_info.h b/src/intel/dev/gen_device_info.h new file mode 100644 index 00000000000..b8044d00032 --- /dev/null +++ b/src/intel/dev/gen_device_info.h @@ -0,0 +1,213 @@ + /* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef GEN_DEVICE_INFO_H +#define GEN_DEVICE_INFO_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Intel hardware information and quirks + */ +struct gen_device_info +{ + int gen; /**< Generation number: 4, 5, 6, 7, ... */ + int gt; + + bool is_g4x; + bool is_ivybridge; + bool is_baytrail; + bool is_haswell; + bool is_broadwell; + bool is_cherryview; + bool is_skylake; + bool is_broxton; + bool is_kabylake; + bool is_geminilake; + bool is_coffeelake; + bool is_cannonlake; + + bool has_hiz_and_separate_stencil; + bool must_use_separate_stencil; + bool has_sample_with_hiz; + bool has_llc; + + bool has_pln; + bool has_64bit_types; + bool has_integer_dword_mul; + bool has_compr4; + bool has_surface_tile_offset; + bool supports_simd16_3src; + bool has_resource_streamer; + + /** + * \name Intel hardware quirks + * @{ + */ + bool has_negative_rhw_bug; + + /** + * Some versions of Gen hardware don't do centroid interpolation correctly + * on unlit pixels, causing incorrect values for derivatives near triangle + * edges. Enabling this flag causes the fragment shader to use + * non-centroid interpolation for unlit pixels, at the expense of two extra + * fragment shader instructions. + */ + bool needs_unlit_centroid_workaround; + /** @} */ + + /** + * \name GPU hardware limits + * + * In general, you can find shader thread maximums by looking at the "Maximum + * Number of Threads" field in the Intel PRM description of the 3DSTATE_VS, + * 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry + * limits come from the "Number of URB Entries" field in the + * 3DSTATE_URB_VS command and friends. + * + * These fields are used to calculate the scratch space to allocate. The + * amount of scratch space can be larger without being harmful on modern + * GPUs, however, prior to Haswell, programming the maximum number of threads + * to greater than the hardware maximum would cause GPU performance to tank. + * + * @{ + */ + /** + * Total number of slices present on the device whether or not they've been + * fused off. + * + * XXX: CS thread counts are limited by the inability to do cross subslice + * communication. It is the effectively the number of logical threads which + * can be executed in a subslice. Fuse configurations may cause this number + * to change, so we program @max_cs_threads as the lower maximum. + */ + unsigned num_slices; + + /** + * Number of subslices for each slice (used to be uniform until CNL). + */ + unsigned num_subslices[3]; + + /** + * Number of threads per eu, varies between 4 and 8 between generations. + */ + unsigned num_thread_per_eu; + + unsigned l3_banks; + unsigned max_vs_threads; /**< Maximum Vertex Shader threads */ + unsigned max_tcs_threads; /**< Maximum Hull Shader threads */ + unsigned max_tes_threads; /**< Maximum Domain Shader threads */ + unsigned max_gs_threads; /**< Maximum Geometry Shader threads. */ + /** + * Theoretical maximum number of Pixel Shader threads. + * + * PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will + * automatically scale pixel shader thread count, based on a single value + * programmed into 3DSTATE_PS. + * + * To calculate the maximum number of threads for Gen8 beyond (which have + * multiple Pixel Shader Dispatchers): + * + * - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD" + * - Usually there's only one PSD per subslice, so use the number of + * subslices for number of PSDs. + * - For max_wm_threads, the total should be PSD threads * #PSDs. + */ + unsigned max_wm_threads; + + /** + * Maximum Compute Shader threads. + * + * Thread count * number of EUs per subslice + */ + unsigned max_cs_threads; + + struct { + /** + * Hardware default URB size. + * + * The units this is expressed in are somewhat inconsistent: 512b units + * on Gen4-5, KB on Gen6-7, and KB times the slice count on Gen8+. + * + * Look up "URB Size" in the "Device Attributes" page, and take the + * maximum. Look up the slice count for each GT SKU on the same page. + * urb.size = URB Size (kbytes) / slice count + */ + unsigned size; + + /** + * The minimum number of URB entries. See the 3DSTATE_URB_ docs. + */ + unsigned min_entries[4]; + + /** + * The maximum number of URB entries. See the 3DSTATE_URB_ docs. + */ + unsigned max_entries[4]; + } urb; + + /** + * For the longest time the timestamp frequency for Gen's timestamp counter + * could be assumed to be 12.5MHz, where the least significant bit neatly + * corresponded to 80 nanoseconds. + * + * Since Gen9 the numbers aren't so round, with a a frequency of 12MHz for + * SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for + * BXT. + * + * For simplicty to fit with the current code scaling by a single constant + * to map from raw timestamps to nanoseconds we now do the conversion in + * floating point instead of integer arithmetic. + * + * In general it's probably worth noting that the documented constants we + * have for the per-platform timestamp frequencies aren't perfect and + * shouldn't be trusted for scaling and comparing timestamps with a large + * delta. + * + * E.g. with crude testing on my system using the 'correct' scale factor I'm + * seeing a drift of ~2 milliseconds per second. + */ + uint64_t timestamp_frequency; + + /** @} */ +}; + +#define gen_device_info_is_9lp(devinfo) \ + ((devinfo)->is_broxton || (devinfo)->is_geminilake) + +int gen_get_pci_device_id_override(void); +int gen_device_name_to_pci_device_id(const char *name); +bool gen_get_device_info(int devid, struct gen_device_info *devinfo); +const char *gen_get_device_name(int devid); + +#ifdef __cplusplus +} +#endif + +#endif /* GEN_DEVICE_INFO_H */ diff --git a/src/intel/dev/meson.build b/src/intel/dev/meson.build new file mode 100644 index 00000000000..3346fe60c07 --- /dev/null +++ b/src/intel/dev/meson.build @@ -0,0 +1,33 @@ +# Copyright © 2017 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# TODO: android? + +files_libintel_dev = files( + 'gen_device_info.c', + 'gen_device_info.h', +) + +libintel_dev = static_library( + ['intel_dev'], + files_libintel_dev, + include_directories : [inc_common, inc_intel], + c_args : [c_vis_args, no_override_init_args], +) diff --git a/src/intel/genxml/gen_bits_header.py b/src/intel/genxml/gen_bits_header.py index 965a74ed893..faba79dcd34 100644 --- a/src/intel/genxml/gen_bits_header.py +++ b/src/intel/genxml/gen_bits_header.py @@ -67,7 +67,7 @@ from operator import itemgetter #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "util/macros.h" <%def name="emit_per_gen_prop_func(item, prop)"> diff --git a/src/intel/isl/isl_drm.c b/src/intel/isl/isl_drm.c index 31895e15e16..e16d7b63917 100644 --- a/src/intel/isl/isl_drm.c +++ b/src/intel/isl/isl_drm.c @@ -28,7 +28,7 @@ #include #include "isl.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" uint32_t isl_tiling_to_i915_tiling(enum isl_tiling tiling) diff --git a/src/intel/isl/isl_format.c b/src/intel/isl/isl_format.c index 03c591071b6..a5bbdbceb1a 100644 --- a/src/intel/isl/isl_format.c +++ b/src/intel/isl/isl_format.c @@ -24,7 +24,7 @@ #include #include "isl.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" struct surface_format_info { bool exists; diff --git a/src/intel/isl/isl_priv.h b/src/intel/isl/isl_priv.h index b86167bb3af..871518409ee 100644 --- a/src/intel/isl/isl_priv.h +++ b/src/intel/isl/isl_priv.h @@ -27,7 +27,7 @@ #include #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "util/macros.h" #include "isl.h" diff --git a/src/intel/isl/meson.build b/src/intel/isl/meson.build index 36b8b8ffa20..73a8837f2ad 100644 --- a/src/intel/isl/meson.build +++ b/src/intel/isl/meson.build @@ -95,7 +95,7 @@ if with_tests 'tests/isl_surf_get_image_offset_test.c', dependencies : dep_m, include_directories : [inc_common, inc_intel], - link_with : [libisl, libintel_common], + link_with : [libisl, libintel_dev], ) ) endif diff --git a/src/intel/isl/tests/isl_surf_get_image_offset_test.c b/src/intel/isl/tests/isl_surf_get_image_offset_test.c index 05fd79f3a94..16c80c312e9 100644 --- a/src/intel/isl/tests/isl_surf_get_image_offset_test.c +++ b/src/intel/isl/tests/isl_surf_get_image_offset_test.c @@ -26,7 +26,7 @@ #include #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "isl/isl.h" #include "isl/isl_priv.h" diff --git a/src/intel/meson.build b/src/intel/meson.build index 57676082c9d..28a2d793509 100644 --- a/src/intel/meson.build +++ b/src/intel/meson.build @@ -21,6 +21,7 @@ inc_intel = include_directories('.') subdir('blorp') +subdir('dev') subdir('genxml') subdir('common') subdir('isl') diff --git a/src/intel/tools/gen_disasm.h b/src/intel/tools/gen_disasm.h index 52601cb2864..c8c18b2cf03 100644 --- a/src/intel/tools/gen_disasm.h +++ b/src/intel/tools/gen_disasm.h @@ -24,7 +24,7 @@ #ifndef GEN_DISASM_H #define GEN_DISASM_H -#include "intel/common/gen_device_info.h" +#include "intel/dev/gen_device_info.h" #ifdef __cplusplus extern "C" { diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build index 82413041b13..84b44841f75 100644 --- a/src/intel/tools/meson.build +++ b/src/intel/tools/meson.build @@ -24,7 +24,7 @@ aubinator = executable( 'gen_disasm.h', 'intel_aub.h'), dependencies : [dep_expat, dep_zlib, dep_dl, dep_thread, dep_m], include_directories : [inc_common, inc_intel], - link_with : [libintel_common, libintel_compiler, libmesa_util], + link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util], c_args : [c_vis_args, no_override_init_args], build_by_default : with_tools.contains('intel'), install : with_tools.contains('intel'), @@ -36,7 +36,7 @@ aubinator_error_decode = executable( 'gen_batch_decoder.c'), dependencies : [dep_zlib, dep_thread], include_directories : [inc_common, inc_intel], - link_with : [libintel_common, libintel_compiler, libmesa_util], + link_with : [libintel_common, libintel_compiler, libintel_dev, libmesa_util], c_args : [c_vis_args, no_override_init_args], build_by_default : with_tools.contains('intel'), install : with_tools.contains('intel'), diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index a6863f5532d..72ebd98a900 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -42,7 +42,7 @@ #endif #include "common/gen_clflush.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "blorp/blorp.h" #include "compiler/brw_compiler.h" #include "util/macros.h" diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index f0317236702..d45331febe0 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -192,8 +192,8 @@ libvulkan_intel = shared_library( ], link_whole : [libanv_common, libanv_gen_libs], link_with : [ - libintel_compiler, libintel_common, libisl, libblorp, libvulkan_util, - libvulkan_wsi, libmesa_util, + libintel_compiler, libintel_common, libintel_dev, libisl, libblorp, + libvulkan_util, libvulkan_wsi, libmesa_util, ], dependencies : [ dep_thread, dep_dl, dep_m, anv_deps, idep_nir, @@ -213,8 +213,8 @@ if with_tests ], link_whole : libanv_common, link_with : [ - libanv_gen_libs, libintel_compiler, libintel_common, libisl, libblorp, - libvulkan_util, libvulkan_wsi, libmesa_util, + libanv_gen_libs, libintel_compiler, libintel_common, libintel_dev, + libisl, libblorp, libvulkan_util, libvulkan_wsi, libmesa_util, ], dependencies : [ dep_thread, dep_dl, dep_m, anv_deps, idep_nir, diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index da56f67fa5f..824882389ea 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -99,6 +99,7 @@ libi965_dri_la_SOURCES = \ $(i965_oa_GENERATED_FILES) libi965_dri_la_LIBADD = \ $(top_builddir)/src/intel/common/libintel_common.la \ + $(top_builddir)/src/intel/dev/libintel_dev.la \ $(top_builddir)/src/intel/isl/libisl.la \ $(top_builddir)/src/intel/compiler/libintel_compiler.la \ $(top_builddir)/src/intel/blorp/libblorp.la \ diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index 2e54adb3ed2..f698123db4c 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -54,7 +54,7 @@ #endif #include "common/gen_clflush.h" #include "common/gen_debug.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "libdrm_macros.h" #include "main/macros.h" #include "util/macros.h" diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 001589a8dfe..0abe8e2d557 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -23,7 +23,7 @@ #include -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "common/gen_sample_positions.h" #include "genxml/gen_macros.h" diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index a2bce921449..8d56fcd9e7a 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -34,7 +34,7 @@ #include "isl/isl.h" #include "dri_util.h" #include "brw_bufmgr.h" -#include "common/gen_device_info.h" +#include "dev/gen_device_info.h" #include "i915_drm.h" #include "util/xmlconfig.h" diff --git a/src/mesa/drivers/dri/i965/meson.build b/src/mesa/drivers/dri/i965/meson.build index e6866147d9f..b0bf40351da 100644 --- a/src/mesa/drivers/dri/i965/meson.build +++ b/src/mesa/drivers/dri/i965/meson.build @@ -174,7 +174,8 @@ libi965 = static_library( c_args : [c_vis_args, no_override_init_args, '-msse2'], cpp_args : [cpp_vis_args, '-msse2'], link_with : [ - i965_gen_libs, libintel_common, libisl, libintel_compiler, libblorp, + i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler, + libblorp, ], dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers], )