radeonsi: move all get functions to si_get.c; disk_cache_create to si_pipe.c
authorMarek Olšák <marek.olsak@amd.com>
Sat, 25 Nov 2017 21:33:10 +0000 (22:33 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 29 Nov 2017 17:21:30 +0000 (18:21 +0100)
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeonsi/Makefile.sources
src/gallium/drivers/radeonsi/meson.build
src/gallium/drivers/radeonsi/si_get.c [new file with mode: 0644]
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h

index f410b277c25d22fedb7beeb37a8b7e304a10b472..b4e76a39921604cb7fff07aa4eb87b1cbf00980e 100644 (file)
@@ -36,7 +36,6 @@
 #include "amd/common/ac_llvm_util.h"
 #include "amd/common/sid.h"
 #include <inttypes.h>
-#include <sys/utsname.h>
 
 #include <llvm-c/TargetMachine.h>
 
@@ -630,370 +629,6 @@ static const struct debug_named_value common_debug_options[] = {
        DEBUG_NAMED_VALUE_END /* must be last */
 };
 
-static const char* r600_get_vendor(struct pipe_screen* pscreen)
-{
-       return "X.Org";
-}
-
-static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
-{
-       return "AMD";
-}
-
-static const char *r600_get_marketing_name(struct radeon_winsys *ws)
-{
-       if (!ws->get_chip_name)
-               return NULL;
-       return ws->get_chip_name(ws);
-}
-
-static const char *r600_get_family_name(const struct r600_common_screen *rscreen)
-{
-       switch (rscreen->info.family) {
-       case CHIP_TAHITI: return "AMD TAHITI";
-       case CHIP_PITCAIRN: return "AMD PITCAIRN";
-       case CHIP_VERDE: return "AMD CAPE VERDE";
-       case CHIP_OLAND: return "AMD OLAND";
-       case CHIP_HAINAN: return "AMD HAINAN";
-       case CHIP_BONAIRE: return "AMD BONAIRE";
-       case CHIP_KAVERI: return "AMD KAVERI";
-       case CHIP_KABINI: return "AMD KABINI";
-       case CHIP_HAWAII: return "AMD HAWAII";
-       case CHIP_MULLINS: return "AMD MULLINS";
-       case CHIP_TONGA: return "AMD TONGA";
-       case CHIP_ICELAND: return "AMD ICELAND";
-       case CHIP_CARRIZO: return "AMD CARRIZO";
-       case CHIP_FIJI: return "AMD FIJI";
-       case CHIP_POLARIS10: return "AMD POLARIS10";
-       case CHIP_POLARIS11: return "AMD POLARIS11";
-       case CHIP_POLARIS12: return "AMD POLARIS12";
-       case CHIP_STONEY: return "AMD STONEY";
-       case CHIP_VEGA10: return "AMD VEGA10";
-       case CHIP_RAVEN: return "AMD RAVEN";
-       default: return "AMD unknown";
-       }
-}
-
-static void r600_disk_cache_create(struct r600_common_screen *rscreen)
-{
-       /* Don't use the cache if shader dumping is enabled. */
-       if (rscreen->debug_flags & DBG_ALL_SHADERS)
-               return;
-
-       /* TODO: remove this once gallium supports a nir cache */
-       if (rscreen->debug_flags & DBG(NIR))
-               return;
-
-       uint32_t mesa_timestamp;
-       if (disk_cache_get_function_timestamp(r600_disk_cache_create,
-                                             &mesa_timestamp)) {
-               char *timestamp_str;
-               int res = -1;
-               uint32_t llvm_timestamp;
-
-               if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
-                                                     &llvm_timestamp)) {
-                       res = asprintf(&timestamp_str, "%u_%u",
-                                      mesa_timestamp, llvm_timestamp);
-               }
-
-               if (res != -1) {
-                       /* These flags affect shader compilation. */
-                       uint64_t shader_debug_flags =
-                               rscreen->debug_flags &
-                               (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
-                                DBG(SI_SCHED) |
-                                DBG(UNSAFE_MATH));
-
-                       rscreen->disk_shader_cache =
-                               disk_cache_create(r600_get_family_name(rscreen),
-                                                 timestamp_str,
-                                                 shader_debug_flags);
-                       free(timestamp_str);
-               }
-       }
-}
-
-static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-       return rscreen->disk_shader_cache;
-}
-
-static const char* r600_get_name(struct pipe_screen* pscreen)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen;
-
-       return rscreen->renderer_string;
-}
-
-static float r600_get_paramf(struct pipe_screen* pscreen,
-                            enum pipe_capf param)
-{
-       switch (param) {
-       case PIPE_CAPF_MAX_LINE_WIDTH:
-       case PIPE_CAPF_MAX_LINE_WIDTH_AA:
-       case PIPE_CAPF_MAX_POINT_WIDTH:
-       case PIPE_CAPF_MAX_POINT_WIDTH_AA:
-               return 8192.0f;
-       case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
-               return 16.0f;
-       case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
-               return 16.0f;
-       case PIPE_CAPF_GUARD_BAND_LEFT:
-       case PIPE_CAPF_GUARD_BAND_TOP:
-       case PIPE_CAPF_GUARD_BAND_RIGHT:
-       case PIPE_CAPF_GUARD_BAND_BOTTOM:
-               return 0.0f;
-       }
-       return 0.0f;
-}
-
-static int r600_get_video_param(struct pipe_screen *screen,
-                               enum pipe_video_profile profile,
-                               enum pipe_video_entrypoint entrypoint,
-                               enum pipe_video_cap param)
-{
-       switch (param) {
-       case PIPE_VIDEO_CAP_SUPPORTED:
-               return vl_profile_supported(screen, profile, entrypoint);
-       case PIPE_VIDEO_CAP_NPOT_TEXTURES:
-               return 1;
-       case PIPE_VIDEO_CAP_MAX_WIDTH:
-       case PIPE_VIDEO_CAP_MAX_HEIGHT:
-               return vl_video_buffer_max_size(screen);
-       case PIPE_VIDEO_CAP_PREFERED_FORMAT:
-               return PIPE_FORMAT_NV12;
-       case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
-               return false;
-       case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
-               return false;
-       case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
-               return true;
-       case PIPE_VIDEO_CAP_MAX_LEVEL:
-               return vl_level_supported(screen, profile);
-       default:
-               return 0;
-       }
-}
-
-static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
-                                         enum pipe_shader_ir ir_type)
-{
-       if (ir_type != PIPE_SHADER_IR_TGSI)
-               return 256;
-
-       /* Only 16 waves per thread-group on gfx9. */
-       if (screen->chip_class >= GFX9)
-               return 1024;
-
-       /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
-        * round number.
-        */
-       return 2048;
-}
-
-static int r600_get_compute_param(struct pipe_screen *screen,
-        enum pipe_shader_ir ir_type,
-        enum pipe_compute_cap param,
-        void *ret)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen *)screen;
-
-       //TODO: select these params by asic
-       switch (param) {
-       case PIPE_COMPUTE_CAP_IR_TARGET: {
-               const char *gpu;
-               const char *triple;
-
-               if (HAVE_LLVM < 0x0400)
-                       triple = "amdgcn--";
-               else
-                       triple = "amdgcn-mesa-mesa3d";
-
-               gpu = ac_get_llvm_processor_name(rscreen->family);
-               if (ret) {
-                       sprintf(ret, "%s-%s", gpu, triple);
-               }
-               /* +2 for dash and terminating NIL byte */
-               return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
-       }
-       case PIPE_COMPUTE_CAP_GRID_DIMENSION:
-               if (ret) {
-                       uint64_t *grid_dimension = ret;
-                       grid_dimension[0] = 3;
-               }
-               return 1 * sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
-               if (ret) {
-                       uint64_t *grid_size = ret;
-                       grid_size[0] = 65535;
-                       grid_size[1] = 65535;
-                       grid_size[2] = 65535;
-               }
-               return 3 * sizeof(uint64_t) ;
-
-       case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
-               if (ret) {
-                       uint64_t *block_size = ret;
-                       unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
-                       block_size[0] = threads_per_block;
-                       block_size[1] = threads_per_block;
-                       block_size[2] = threads_per_block;
-               }
-               return 3 * sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
-               if (ret) {
-                       uint64_t *max_threads_per_block = ret;
-                       *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
-               }
-               return sizeof(uint64_t);
-       case PIPE_COMPUTE_CAP_ADDRESS_BITS:
-               if (ret) {
-                       uint32_t *address_bits = ret;
-                       address_bits[0] = 64;
-               }
-               return 1 * sizeof(uint32_t);
-
-       case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
-               if (ret) {
-                       uint64_t *max_global_size = ret;
-                       uint64_t max_mem_alloc_size;
-
-                       r600_get_compute_param(screen, ir_type,
-                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
-                               &max_mem_alloc_size);
-
-                       /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
-                        * 1/4 of the MAX_GLOBAL_SIZE.  Since the
-                        * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
-                        * make sure we never report more than
-                        * 4 * MAX_MEM_ALLOC_SIZE.
-                        */
-                       *max_global_size = MIN2(4 * max_mem_alloc_size,
-                                               MAX2(rscreen->info.gart_size,
-                                                    rscreen->info.vram_size));
-               }
-               return sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
-               if (ret) {
-                       uint64_t *max_local_size = ret;
-                       /* Value reported by the closed source driver. */
-                       *max_local_size = 32768;
-               }
-               return sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
-               if (ret) {
-                       uint64_t *max_input_size = ret;
-                       /* Value reported by the closed source driver. */
-                       *max_input_size = 1024;
-               }
-               return sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
-               if (ret) {
-                       uint64_t *max_mem_alloc_size = ret;
-
-                       *max_mem_alloc_size = rscreen->info.max_alloc_size;
-               }
-               return sizeof(uint64_t);
-
-       case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
-               if (ret) {
-                       uint32_t *max_clock_frequency = ret;
-                       *max_clock_frequency = rscreen->info.max_shader_clock;
-               }
-               return sizeof(uint32_t);
-
-       case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
-               if (ret) {
-                       uint32_t *max_compute_units = ret;
-                       *max_compute_units = rscreen->info.num_good_compute_units;
-               }
-               return sizeof(uint32_t);
-
-       case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
-               if (ret) {
-                       uint32_t *images_supported = ret;
-                       *images_supported = 0;
-               }
-               return sizeof(uint32_t);
-       case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
-               break; /* unused */
-       case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
-               if (ret) {
-                       uint32_t *subgroup_size = ret;
-                       *subgroup_size = 64;
-               }
-               return sizeof(uint32_t);
-       case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
-               if (ret) {
-                       uint64_t *max_variable_threads_per_block = ret;
-                       if (ir_type == PIPE_SHADER_IR_TGSI)
-                               *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
-                       else
-                               *max_variable_threads_per_block = 0;
-               }
-               return sizeof(uint64_t);
-       }
-
-        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
-        return 0;
-}
-
-static uint64_t r600_get_timestamp(struct pipe_screen *screen)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-
-       return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) /
-                       rscreen->info.clock_crystal_freq;
-}
-
-static void r600_query_memory_info(struct pipe_screen *screen,
-                                  struct pipe_memory_info *info)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
-       struct radeon_winsys *ws = rscreen->ws;
-       unsigned vram_usage, gtt_usage;
-
-       info->total_device_memory = rscreen->info.vram_size / 1024;
-       info->total_staging_memory = rscreen->info.gart_size / 1024;
-
-       /* The real TTM memory usage is somewhat random, because:
-        *
-        * 1) TTM delays freeing memory, because it can only free it after
-        *    fences expire.
-        *
-        * 2) The memory usage can be really low if big VRAM evictions are
-        *    taking place, but the real usage is well above the size of VRAM.
-        *
-        * Instead, return statistics of this process.
-        */
-       vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
-       gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
-
-       info->avail_device_memory =
-               vram_usage <= info->total_device_memory ?
-                               info->total_device_memory - vram_usage : 0;
-       info->avail_staging_memory =
-               gtt_usage <= info->total_staging_memory ?
-                               info->total_staging_memory - gtt_usage : 0;
-
-       info->device_memory_evicted =
-               ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
-
-       if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4)
-               info->nr_device_memory_evictions =
-                       ws->query_value(ws, RADEON_NUM_EVICTIONS);
-       else
-               /* Just return the number of evicted 64KB pages. */
-               info->nr_device_memory_evictions = info->device_memory_evicted / 64;
-}
-
 struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
                                                const struct pipe_resource *templ)
 {
@@ -1007,65 +642,22 @@ struct pipe_resource *si_resource_create_common(struct pipe_screen *screen,
 bool si_common_screen_init(struct r600_common_screen *rscreen,
                           struct radeon_winsys *ws)
 {
-       char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
-       struct utsname uname_data;
-       const char *chip_name;
-
-       ws->query_info(ws, &rscreen->info);
-       rscreen->ws = ws;
-
-       if ((chip_name = r600_get_marketing_name(ws)))
-               snprintf(family_name, sizeof(family_name), "%s / ",
-                        r600_get_family_name(rscreen) + 4);
-       else
-               chip_name = r600_get_family_name(rscreen);
-
-       if (uname(&uname_data) == 0)
-               snprintf(kernel_version, sizeof(kernel_version),
-                        " / %s", uname_data.release);
-
-       if (HAVE_LLVM > 0) {
-               snprintf(llvm_string, sizeof(llvm_string),
-                        ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
-                        HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
-       }
-
-       snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string),
-                "%s (%sDRM %i.%i.%i%s%s)",
-                chip_name, family_name, rscreen->info.drm_major,
-                rscreen->info.drm_minor, rscreen->info.drm_patchlevel,
-                kernel_version, llvm_string);
-
-       rscreen->b.get_name = r600_get_name;
-       rscreen->b.get_vendor = r600_get_vendor;
-       rscreen->b.get_device_vendor = r600_get_device_vendor;
-       rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache;
-       rscreen->b.get_compute_param = r600_get_compute_param;
-       rscreen->b.get_paramf = r600_get_paramf;
-       rscreen->b.get_timestamp = r600_get_timestamp;
        rscreen->b.resource_destroy = u_resource_destroy_vtbl;
        rscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
-       rscreen->b.query_memory_info = r600_query_memory_info;
 
        if (rscreen->info.has_hw_decode) {
-               rscreen->b.get_video_param = si_vid_get_video_param;
                rscreen->b.is_video_format_supported = si_vid_is_format_supported;
        } else {
-               rscreen->b.get_video_param = r600_get_video_param;
                rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
        }
 
        si_init_screen_texture_functions(rscreen);
        si_init_screen_query_functions(rscreen);
 
-       rscreen->family = rscreen->info.family;
-       rscreen->chip_class = rscreen->info.chip_class;
        rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
        rscreen->has_rbplus = false;
        rscreen->rbplus_allowed = false;
 
-       r600_disk_cache_create(rscreen);
-
        slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
 
        rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
@@ -1083,8 +675,7 @@ bool si_common_screen_init(struct r600_common_screen *rscreen,
                       rscreen->info.pci_domain, rscreen->info.pci_bus,
                       rscreen->info.pci_dev, rscreen->info.pci_func);
                printf("pci_id = 0x%x\n", rscreen->info.pci_id);
-               printf("family = %i (%s)\n", rscreen->info.family,
-                      r600_get_family_name(rscreen));
+               printf("family = %i\n", rscreen->info.family);
                printf("chip_class = %i\n", rscreen->info.chip_class);
                printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size);
                printf("gart_page_size = %u\n", rscreen->info.gart_page_size);
index 10c99f6173e95c39ca00c21d80dce5e41e961188..b193a60b4d2520aa10f118396c84b6bf913bb70c 100644 (file)
@@ -14,6 +14,7 @@ C_SOURCES := \
        si_descriptors.c \
        si_dma.c \
        si_fence.c \
+       si_get.c \
        si_hw_context.c \
        si_pipe.c \
        si_pipe.h \
index 7eae9d6e19a25a66ba145785e559aa121532090d..c4915d060861b5f46ea665cc676001be6b0b0e2b 100644 (file)
@@ -30,6 +30,7 @@ files_libradeonsi = files(
   'si_descriptors.c',
   'si_dma.c',
   'si_fence.c',
+  'si_get.c',
   'si_hw_context.c',
   'si_pipe.c',
   'si_pipe.h',
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
new file mode 100644 (file)
index 0000000..349f7dd
--- /dev/null
@@ -0,0 +1,852 @@
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "si_pipe.h"
+#include "radeon/radeon_video.h"
+#include "ac_llvm_util.h"
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+#include "compiler/nir/nir.h"
+
+#include <sys/utsname.h>
+
+static const char *si_get_vendor(struct pipe_screen *pscreen)
+{
+       return "X.Org";
+}
+
+static const char *si_get_device_vendor(struct pipe_screen *pscreen)
+{
+       return "AMD";
+}
+
+static const char *si_get_marketing_name(struct radeon_winsys *ws)
+{
+       if (!ws->get_chip_name)
+               return NULL;
+       return ws->get_chip_name(ws);
+}
+
+const char *si_get_family_name(const struct si_screen *sscreen)
+{
+       switch (sscreen->b.info.family) {
+       case CHIP_TAHITI: return "AMD TAHITI";
+       case CHIP_PITCAIRN: return "AMD PITCAIRN";
+       case CHIP_VERDE: return "AMD CAPE VERDE";
+       case CHIP_OLAND: return "AMD OLAND";
+       case CHIP_HAINAN: return "AMD HAINAN";
+       case CHIP_BONAIRE: return "AMD BONAIRE";
+       case CHIP_KAVERI: return "AMD KAVERI";
+       case CHIP_KABINI: return "AMD KABINI";
+       case CHIP_HAWAII: return "AMD HAWAII";
+       case CHIP_MULLINS: return "AMD MULLINS";
+       case CHIP_TONGA: return "AMD TONGA";
+       case CHIP_ICELAND: return "AMD ICELAND";
+       case CHIP_CARRIZO: return "AMD CARRIZO";
+       case CHIP_FIJI: return "AMD FIJI";
+       case CHIP_POLARIS10: return "AMD POLARIS10";
+       case CHIP_POLARIS11: return "AMD POLARIS11";
+       case CHIP_POLARIS12: return "AMD POLARIS12";
+       case CHIP_STONEY: return "AMD STONEY";
+       case CHIP_VEGA10: return "AMD VEGA10";
+       case CHIP_RAVEN: return "AMD RAVEN";
+       default: return "AMD unknown";
+       }
+}
+
+static bool si_have_tgsi_compute(struct si_screen *sscreen)
+{
+       /* Old kernels disallowed some register writes for SI
+        * that are used for indirect dispatches. */
+       return (sscreen->b.chip_class >= CIK ||
+               sscreen->b.info.drm_major == 3 ||
+               (sscreen->b.info.drm_major == 2 &&
+                sscreen->b.info.drm_minor >= 45));
+}
+
+static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+       struct si_screen *sscreen = (struct si_screen *)pscreen;
+
+       switch (param) {
+       /* Supported features (boolean caps). */
+       case PIPE_CAP_ACCELERATED:
+       case PIPE_CAP_TWO_SIDED_STENCIL:
+       case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+       case PIPE_CAP_ANISOTROPIC_FILTER:
+       case PIPE_CAP_POINT_SPRITE:
+       case PIPE_CAP_OCCLUSION_QUERY:
+       case PIPE_CAP_TEXTURE_SHADOW_MAP:
+       case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+       case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+       case PIPE_CAP_TEXTURE_SWIZZLE:
+       case PIPE_CAP_DEPTH_CLIP_DISABLE:
+       case PIPE_CAP_SHADER_STENCIL_EXPORT:
+       case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+       case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+       case PIPE_CAP_SM3:
+       case PIPE_CAP_SEAMLESS_CUBE_MAP:
+       case PIPE_CAP_PRIMITIVE_RESTART:
+       case PIPE_CAP_CONDITIONAL_RENDER:
+       case PIPE_CAP_TEXTURE_BARRIER:
+       case PIPE_CAP_INDEP_BLEND_ENABLE:
+       case PIPE_CAP_INDEP_BLEND_FUNC:
+       case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+       case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+       case PIPE_CAP_USER_CONSTANT_BUFFERS:
+       case PIPE_CAP_START_INSTANCE:
+       case PIPE_CAP_NPOT_TEXTURES:
+       case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+       case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+       case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+       case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+       case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+       case PIPE_CAP_TGSI_INSTANCEID:
+       case PIPE_CAP_COMPUTE:
+       case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+       case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
+       case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+       case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
+       case PIPE_CAP_CUBE_MAP_ARRAY:
+       case PIPE_CAP_SAMPLE_SHADING:
+       case PIPE_CAP_DRAW_INDIRECT:
+       case PIPE_CAP_CLIP_HALFZ:
+       case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
+       case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+       case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
+       case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+       case PIPE_CAP_TGSI_TEXCOORD:
+       case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+       case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+       case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+       case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+       case PIPE_CAP_SHAREABLE_SHADERS:
+       case PIPE_CAP_DEPTH_BOUNDS_TEST:
+       case PIPE_CAP_SAMPLER_VIEW_TARGET:
+       case PIPE_CAP_TEXTURE_QUERY_LOD:
+       case PIPE_CAP_TEXTURE_GATHER_SM5:
+       case PIPE_CAP_TGSI_TXQS:
+       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
+       case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+       case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+       case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+       case PIPE_CAP_INVALIDATE_BUFFER:
+       case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
+       case PIPE_CAP_QUERY_MEMORY_INFO:
+       case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+       case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
+       case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
+       case PIPE_CAP_GENERATE_MIPMAP:
+       case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
+       case PIPE_CAP_STRING_MARKER:
+       case PIPE_CAP_CLEAR_TEXTURE:
+       case PIPE_CAP_CULL_DISTANCE:
+       case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
+       case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
+       case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+       case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+       case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+       case PIPE_CAP_DOUBLES:
+       case PIPE_CAP_TGSI_TEX_TXF_LZ:
+       case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
+       case PIPE_CAP_BINDLESS_TEXTURE:
+       case PIPE_CAP_QUERY_TIMESTAMP:
+       case PIPE_CAP_QUERY_TIME_ELAPSED:
+       case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
+       case PIPE_CAP_QUERY_SO_OVERFLOW:
+       case PIPE_CAP_MEMOBJ:
+       case PIPE_CAP_LOAD_CONSTBUF:
+       case PIPE_CAP_INT64:
+       case PIPE_CAP_INT64_DIVMOD:
+       case PIPE_CAP_TGSI_CLOCK:
+       case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+       case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+       case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+       case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+               return 1;
+
+       case PIPE_CAP_TGSI_VOTE:
+               return HAVE_LLVM >= 0x0400;
+
+       case PIPE_CAP_TGSI_BALLOT:
+               return HAVE_LLVM >= 0x0500;
+
+       case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+               return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
+
+       case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+               return (sscreen->b.info.drm_major == 2 &&
+                       sscreen->b.info.drm_minor >= 43) ||
+                      sscreen->b.info.drm_major == 3;
+
+       case PIPE_CAP_TEXTURE_MULTISAMPLE:
+               /* 2D tiling on CIK is supported since DRM 2.35.0 */
+               return sscreen->b.chip_class < CIK ||
+                      (sscreen->b.info.drm_major == 2 &&
+                       sscreen->b.info.drm_minor >= 35) ||
+                      sscreen->b.info.drm_major == 3;
+
+        case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+                return R600_MAP_BUFFER_ALIGNMENT;
+
+       case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+       case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+       case PIPE_CAP_MAX_VERTEX_STREAMS:
+       case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+               return 4;
+
+       case PIPE_CAP_GLSL_FEATURE_LEVEL:
+               if (sscreen->b.debug_flags & DBG(NIR))
+                       return 140; /* no geometry and tessellation shaders yet */
+               if (si_have_tgsi_compute(sscreen))
+                       return 450;
+               return 420;
+
+       case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+               return MIN2(sscreen->b.info.max_alloc_size, INT_MAX);
+
+       case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+       case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+       case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+               /* SI doesn't support unaligned loads.
+                * CIK needs DRM 2.50.0 on radeon. */
+               return sscreen->b.chip_class == SI ||
+                      (sscreen->b.info.drm_major == 2 &&
+                       sscreen->b.info.drm_minor < 50);
+
+       case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+               /* TODO: GFX9 hangs. */
+               if (sscreen->b.chip_class >= GFX9)
+                       return 0;
+               /* Disable on SI due to VM faults in CP DMA. Enable once these
+                * faults are mitigated in software.
+                */
+               if (sscreen->b.chip_class >= CIK &&
+                   sscreen->b.info.drm_major == 3 &&
+                   sscreen->b.info.drm_minor >= 13)
+                       return RADEON_SPARSE_PAGE_SIZE;
+               return 0;
+
+       /* Unsupported features. */
+       case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+       case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+       case PIPE_CAP_USER_VERTEX_BUFFERS:
+       case PIPE_CAP_FAKE_SW_MSAA:
+       case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+       case PIPE_CAP_VERTEXID_NOBASE:
+       case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+       case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+       case PIPE_CAP_TGSI_FS_FBFETCH:
+       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
+       case PIPE_CAP_UMA:
+       case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+       case PIPE_CAP_POST_DEPTH_COVERAGE:
+       case PIPE_CAP_TILE_RASTER_ORDER:
+       case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+               return 0;
+
+       case PIPE_CAP_NATIVE_FENCE_FD:
+               return sscreen->b.info.has_sync_file;
+
+       case PIPE_CAP_QUERY_BUFFER_OBJECT:
+               return si_have_tgsi_compute(sscreen);
+
+       case PIPE_CAP_DRAW_PARAMETERS:
+       case PIPE_CAP_MULTI_DRAW_INDIRECT:
+       case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+               return sscreen->has_draw_indirect_multi;
+
+       case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+               return 30;
+
+       case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+               return sscreen->b.chip_class <= VI ?
+                       PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
+
+       /* Stream output. */
+       case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+               return 32*4;
+
+       /* Geometry shader output. */
+       case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+               return 1024;
+       case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+               return 4095;
+
+       case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+               return 2048;
+
+       /* Texturing. */
+       case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+       case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+               return 15; /* 16384 */
+       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+               /* textures support 8192, but layered rendering supports 2048 */
+               return 12;
+       case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+               /* textures support 8192, but layered rendering supports 2048 */
+               return 2048;
+
+       /* Viewports and render targets. */
+       case PIPE_CAP_MAX_VIEWPORTS:
+               return SI_MAX_VIEWPORTS;
+       case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+       case PIPE_CAP_MAX_RENDER_TARGETS:
+               return 8;
+
+       case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+       case PIPE_CAP_MIN_TEXEL_OFFSET:
+               return -32;
+
+       case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+       case PIPE_CAP_MAX_TEXEL_OFFSET:
+               return 31;
+
+       case PIPE_CAP_ENDIANNESS:
+               return PIPE_ENDIAN_LITTLE;
+
+       case PIPE_CAP_VENDOR_ID:
+               return ATI_VENDOR_ID;
+       case PIPE_CAP_DEVICE_ID:
+               return sscreen->b.info.pci_id;
+       case PIPE_CAP_VIDEO_MEMORY:
+               return sscreen->b.info.vram_size >> 20;
+       case PIPE_CAP_PCI_GROUP:
+               return sscreen->b.info.pci_domain;
+       case PIPE_CAP_PCI_BUS:
+               return sscreen->b.info.pci_bus;
+       case PIPE_CAP_PCI_DEVICE:
+               return sscreen->b.info.pci_dev;
+       case PIPE_CAP_PCI_FUNCTION:
+               return sscreen->b.info.pci_func;
+       }
+       return 0;
+}
+
+static float si_get_paramf(struct pipe_screen* pscreen, enum pipe_capf param)
+{
+       switch (param) {
+       case PIPE_CAPF_MAX_LINE_WIDTH:
+       case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+       case PIPE_CAPF_MAX_POINT_WIDTH:
+       case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+               return 8192.0f;
+       case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+               return 16.0f;
+       case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+               return 16.0f;
+       case PIPE_CAPF_GUARD_BAND_LEFT:
+       case PIPE_CAPF_GUARD_BAND_TOP:
+       case PIPE_CAPF_GUARD_BAND_RIGHT:
+       case PIPE_CAPF_GUARD_BAND_BOTTOM:
+               return 0.0f;
+       }
+       return 0.0f;
+}
+
+static int si_get_shader_param(struct pipe_screen* pscreen,
+                              enum pipe_shader_type shader,
+                              enum pipe_shader_cap param)
+{
+       struct si_screen *sscreen = (struct si_screen *)pscreen;
+
+       switch(shader)
+       {
+       case PIPE_SHADER_FRAGMENT:
+       case PIPE_SHADER_VERTEX:
+       case PIPE_SHADER_GEOMETRY:
+       case PIPE_SHADER_TESS_CTRL:
+       case PIPE_SHADER_TESS_EVAL:
+               break;
+       case PIPE_SHADER_COMPUTE:
+               switch (param) {
+               case PIPE_SHADER_CAP_PREFERRED_IR:
+                       return PIPE_SHADER_IR_NATIVE;
+
+               case PIPE_SHADER_CAP_SUPPORTED_IRS: {
+                       int ir = 1 << PIPE_SHADER_IR_NATIVE;
+
+                       if (si_have_tgsi_compute(sscreen))
+                               ir |= 1 << PIPE_SHADER_IR_TGSI;
+
+                       return ir;
+               }
+
+               case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
+                       uint64_t max_const_buffer_size;
+                       pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
+                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+                               &max_const_buffer_size);
+                       return MIN2(max_const_buffer_size, INT_MAX);
+               }
+               default:
+                       /* If compute shaders don't require a special value
+                        * for this cap, we can return the same value we
+                        * do for other shader types. */
+                       break;
+               }
+               break;
+       default:
+               return 0;
+       }
+
+       switch (param) {
+       /* Shader limits. */
+       case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+       case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+               return 16384;
+       case PIPE_SHADER_CAP_MAX_INPUTS:
+               return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
+       case PIPE_SHADER_CAP_MAX_OUTPUTS:
+               return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
+       case PIPE_SHADER_CAP_MAX_TEMPS:
+               return 256; /* Max native temporaries. */
+       case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+               return 4096 * sizeof(float[4]); /* actually only memory limits this */
+       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+               return SI_NUM_CONST_BUFFERS;
+       case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+       case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+               return SI_NUM_SAMPLERS;
+       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+               return SI_NUM_SHADER_BUFFERS;
+       case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+               return SI_NUM_IMAGES;
+       case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+               return 32;
+       case PIPE_SHADER_CAP_PREFERRED_IR:
+               if (sscreen->b.debug_flags & DBG(NIR) &&
+                   (shader == PIPE_SHADER_VERTEX ||
+                    shader == PIPE_SHADER_FRAGMENT))
+                       return PIPE_SHADER_IR_NIR;
+               return PIPE_SHADER_IR_TGSI;
+       case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+               return 4;
+
+       /* Supported boolean features. */
+       case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+       case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+       case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+       case PIPE_SHADER_CAP_INTEGERS:
+       case PIPE_SHADER_CAP_INT64_ATOMICS:
+       case PIPE_SHADER_CAP_FP16:
+       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+       case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+               return 1;
+
+       case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+               /* TODO: Indirect indexing of GS inputs is unimplemented. */
+               return shader != PIPE_SHADER_GEOMETRY &&
+                      (sscreen->llvm_has_working_vgpr_indexing ||
+                       /* TCS and TES load inputs directly from LDS or
+                        * offchip memory, so indirect indexing is trivial. */
+                       shader == PIPE_SHADER_TESS_CTRL ||
+                       shader == PIPE_SHADER_TESS_EVAL);
+
+       case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+               return sscreen->llvm_has_working_vgpr_indexing ||
+                      /* TCS stores outputs directly to memory. */
+                      shader == PIPE_SHADER_TESS_CTRL;
+
+       /* Unsupported boolean features. */
+       case PIPE_SHADER_CAP_SUBROUTINES:
+       case PIPE_SHADER_CAP_SUPPORTED_IRS:
+       case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+       case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+               return 0;
+       }
+       return 0;
+}
+
+static const struct nir_shader_compiler_options nir_options = {
+       .vertex_id_zero_based = true,
+       .lower_scmp = true,
+       .lower_flrp32 = true,
+       .lower_fsat = true,
+       .lower_fdiv = true,
+       .lower_sub = true,
+       .lower_ffma = true,
+       .lower_pack_snorm_2x16 = true,
+       .lower_pack_snorm_4x8 = true,
+       .lower_pack_unorm_2x16 = true,
+       .lower_pack_unorm_4x8 = true,
+       .lower_unpack_snorm_2x16 = true,
+       .lower_unpack_snorm_4x8 = true,
+       .lower_unpack_unorm_2x16 = true,
+       .lower_unpack_unorm_4x8 = true,
+       .lower_extract_byte = true,
+       .lower_extract_word = true,
+       .max_unroll_iterations = 32,
+       .native_integers = true,
+};
+
+static const void *
+si_get_compiler_options(struct pipe_screen *screen,
+                       enum pipe_shader_ir ir,
+                       enum pipe_shader_type shader)
+{
+       assert(ir == PIPE_SHADER_IR_NIR);
+       return &nir_options;
+}
+
+static void si_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+       ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
+}
+
+static void si_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
+{
+       struct si_screen *sscreen = (struct si_screen *)pscreen;
+
+       ac_compute_device_uuid(&sscreen->b.info, uuid, PIPE_UUID_SIZE);
+}
+
+static const char* si_get_name(struct pipe_screen *pscreen)
+{
+       struct si_screen *sscreen = (struct si_screen*)pscreen;
+
+       return sscreen->b.renderer_string;
+}
+
+static int si_get_video_param_no_decode(struct pipe_screen *screen,
+                                       enum pipe_video_profile profile,
+                                       enum pipe_video_entrypoint entrypoint,
+                                       enum pipe_video_cap param)
+{
+       switch (param) {
+       case PIPE_VIDEO_CAP_SUPPORTED:
+               return vl_profile_supported(screen, profile, entrypoint);
+       case PIPE_VIDEO_CAP_NPOT_TEXTURES:
+               return 1;
+       case PIPE_VIDEO_CAP_MAX_WIDTH:
+       case PIPE_VIDEO_CAP_MAX_HEIGHT:
+               return vl_video_buffer_max_size(screen);
+       case PIPE_VIDEO_CAP_PREFERED_FORMAT:
+               return PIPE_FORMAT_NV12;
+       case PIPE_VIDEO_CAP_PREFERS_INTERLACED:
+               return false;
+       case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED:
+               return false;
+       case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE:
+               return true;
+       case PIPE_VIDEO_CAP_MAX_LEVEL:
+               return vl_level_supported(screen, profile);
+       default:
+               return 0;
+       }
+}
+
+static unsigned get_max_threads_per_block(struct si_screen *screen,
+                                         enum pipe_shader_ir ir_type)
+{
+       if (ir_type != PIPE_SHADER_IR_TGSI)
+               return 256;
+
+       /* Only 16 waves per thread-group on gfx9. */
+       if (screen->b.chip_class >= GFX9)
+               return 1024;
+
+       /* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
+        * round number.
+        */
+       return 2048;
+}
+
+static int si_get_compute_param(struct pipe_screen *screen,
+                               enum pipe_shader_ir ir_type,
+                               enum pipe_compute_cap param,
+                               void *ret)
+{
+       struct si_screen *sscreen = (struct si_screen *)screen;
+
+       //TODO: select these params by asic
+       switch (param) {
+       case PIPE_COMPUTE_CAP_IR_TARGET: {
+               const char *gpu;
+               const char *triple;
+
+               if (HAVE_LLVM < 0x0400)
+                       triple = "amdgcn--";
+               else
+                       triple = "amdgcn-mesa-mesa3d";
+
+               gpu = ac_get_llvm_processor_name(sscreen->b.family);
+               if (ret) {
+                       sprintf(ret, "%s-%s", gpu, triple);
+               }
+               /* +2 for dash and terminating NIL byte */
+               return (strlen(triple) + strlen(gpu) + 2) * sizeof(char);
+       }
+       case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+               if (ret) {
+                       uint64_t *grid_dimension = ret;
+                       grid_dimension[0] = 3;
+               }
+               return 1 * sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+               if (ret) {
+                       uint64_t *grid_size = ret;
+                       grid_size[0] = 65535;
+                       grid_size[1] = 65535;
+                       grid_size[2] = 65535;
+               }
+               return 3 * sizeof(uint64_t) ;
+
+       case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+               if (ret) {
+                       uint64_t *block_size = ret;
+                       unsigned threads_per_block = get_max_threads_per_block(sscreen, ir_type);
+                       block_size[0] = threads_per_block;
+                       block_size[1] = threads_per_block;
+                       block_size[2] = threads_per_block;
+               }
+               return 3 * sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+               if (ret) {
+                       uint64_t *max_threads_per_block = ret;
+                       *max_threads_per_block = get_max_threads_per_block(sscreen, ir_type);
+               }
+               return sizeof(uint64_t);
+       case PIPE_COMPUTE_CAP_ADDRESS_BITS:
+               if (ret) {
+                       uint32_t *address_bits = ret;
+                       address_bits[0] = 64;
+               }
+               return 1 * sizeof(uint32_t);
+
+       case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:
+               if (ret) {
+                       uint64_t *max_global_size = ret;
+                       uint64_t max_mem_alloc_size;
+
+                       si_get_compute_param(screen, ir_type,
+                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
+                               &max_mem_alloc_size);
+
+                       /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
+                        * 1/4 of the MAX_GLOBAL_SIZE.  Since the
+                        * MAX_MEM_ALLOC_SIZE is fixed for older kernels,
+                        * make sure we never report more than
+                        * 4 * MAX_MEM_ALLOC_SIZE.
+                        */
+                       *max_global_size = MIN2(4 * max_mem_alloc_size,
+                                               MAX2(sscreen->b.info.gart_size,
+                                                    sscreen->b.info.vram_size));
+               }
+               return sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:
+               if (ret) {
+                       uint64_t *max_local_size = ret;
+                       /* Value reported by the closed source driver. */
+                       *max_local_size = 32768;
+               }
+               return sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:
+               if (ret) {
+                       uint64_t *max_input_size = ret;
+                       /* Value reported by the closed source driver. */
+                       *max_input_size = 1024;
+               }
+               return sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:
+               if (ret) {
+                       uint64_t *max_mem_alloc_size = ret;
+
+                       *max_mem_alloc_size = sscreen->b.info.max_alloc_size;
+               }
+               return sizeof(uint64_t);
+
+       case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:
+               if (ret) {
+                       uint32_t *max_clock_frequency = ret;
+                       *max_clock_frequency = sscreen->b.info.max_shader_clock;
+               }
+               return sizeof(uint32_t);
+
+       case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:
+               if (ret) {
+                       uint32_t *max_compute_units = ret;
+                       *max_compute_units = sscreen->b.info.num_good_compute_units;
+               }
+               return sizeof(uint32_t);
+
+       case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:
+               if (ret) {
+                       uint32_t *images_supported = ret;
+                       *images_supported = 0;
+               }
+               return sizeof(uint32_t);
+       case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
+               break; /* unused */
+       case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:
+               if (ret) {
+                       uint32_t *subgroup_size = ret;
+                       *subgroup_size = 64;
+               }
+               return sizeof(uint32_t);
+       case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:
+               if (ret) {
+                       uint64_t *max_variable_threads_per_block = ret;
+                       if (ir_type == PIPE_SHADER_IR_TGSI)
+                               *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK;
+                       else
+                               *max_variable_threads_per_block = 0;
+               }
+               return sizeof(uint64_t);
+       }
+
+        fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param);
+        return 0;
+}
+
+static uint64_t si_get_timestamp(struct pipe_screen *screen)
+{
+       struct si_screen *sscreen = (struct si_screen*)screen;
+
+       return 1000000 * sscreen->b.ws->query_value(sscreen->b.ws, RADEON_TIMESTAMP) /
+                       sscreen->b.info.clock_crystal_freq;
+}
+
+static void si_query_memory_info(struct pipe_screen *screen,
+                                struct pipe_memory_info *info)
+{
+       struct si_screen *sscreen = (struct si_screen*)screen;
+       struct radeon_winsys *ws = sscreen->b.ws;
+       unsigned vram_usage, gtt_usage;
+
+       info->total_device_memory = sscreen->b.info.vram_size / 1024;
+       info->total_staging_memory = sscreen->b.info.gart_size / 1024;
+
+       /* The real TTM memory usage is somewhat random, because:
+        *
+        * 1) TTM delays freeing memory, because it can only free it after
+        *    fences expire.
+        *
+        * 2) The memory usage can be really low if big VRAM evictions are
+        *    taking place, but the real usage is well above the size of VRAM.
+        *
+        * Instead, return statistics of this process.
+        */
+       vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024;
+       gtt_usage =  ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024;
+
+       info->avail_device_memory =
+               vram_usage <= info->total_device_memory ?
+                               info->total_device_memory - vram_usage : 0;
+       info->avail_staging_memory =
+               gtt_usage <= info->total_staging_memory ?
+                               info->total_staging_memory - gtt_usage : 0;
+
+       info->device_memory_evicted =
+               ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024;
+
+       if (sscreen->b.info.drm_major == 3 && sscreen->b.info.drm_minor >= 4)
+               info->nr_device_memory_evictions =
+                       ws->query_value(ws, RADEON_NUM_EVICTIONS);
+       else
+               /* Just return the number of evicted 64KB pages. */
+               info->nr_device_memory_evictions = info->device_memory_evicted / 64;
+}
+
+static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
+{
+       struct si_screen *sscreen = (struct si_screen*)pscreen;
+
+       return sscreen->b.disk_shader_cache;
+}
+
+static void si_init_renderer_string(struct si_screen *sscreen)
+{
+       struct radeon_winsys *ws = sscreen->b.ws;
+       char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {};
+       struct utsname uname_data;
+
+       const char *chip_name = si_get_marketing_name(ws);
+
+       if (chip_name)
+               snprintf(family_name, sizeof(family_name), "%s / ",
+                        si_get_family_name(sscreen) + 4);
+       else
+               chip_name = si_get_family_name(sscreen);
+
+       if (uname(&uname_data) == 0)
+               snprintf(kernel_version, sizeof(kernel_version),
+                        " / %s", uname_data.release);
+
+       if (HAVE_LLVM > 0) {
+               snprintf(llvm_string, sizeof(llvm_string),
+                        ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff,
+                        HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH);
+       }
+
+       snprintf(sscreen->b.renderer_string, sizeof(sscreen->b.renderer_string),
+                "%s (%sDRM %i.%i.%i%s%s)",
+                chip_name, family_name, sscreen->b.info.drm_major,
+                sscreen->b.info.drm_minor, sscreen->b.info.drm_patchlevel,
+                kernel_version, llvm_string);
+}
+
+void si_init_screen_get_functions(struct si_screen *sscreen)
+{
+       sscreen->b.b.get_name = si_get_name;
+       sscreen->b.b.get_vendor = si_get_vendor;
+       sscreen->b.b.get_device_vendor = si_get_device_vendor;
+       sscreen->b.b.get_param = si_get_param;
+       sscreen->b.b.get_paramf = si_get_paramf;
+       sscreen->b.b.get_compute_param = si_get_compute_param;
+       sscreen->b.b.get_timestamp = si_get_timestamp;
+       sscreen->b.b.get_shader_param = si_get_shader_param;
+       sscreen->b.b.get_compiler_options = si_get_compiler_options;
+       sscreen->b.b.get_device_uuid = si_get_device_uuid;
+       sscreen->b.b.get_driver_uuid = si_get_driver_uuid;
+       sscreen->b.b.query_memory_info = si_query_memory_info;
+       sscreen->b.b.get_disk_shader_cache = si_get_disk_shader_cache;
+
+       if (sscreen->b.info.has_hw_decode) {
+               sscreen->b.b.get_video_param = si_vid_get_video_param;
+       } else {
+               sscreen->b.b.get_video_param = si_get_video_param_no_decode;
+       }
+
+       si_init_renderer_string(sscreen);
+}
index eb0283aac9a9cf01421056fe80bb802bfe14b1ec..dfb35ed82eea35b55e43662b7d1270e1734c6c1e 100644 (file)
@@ -36,8 +36,6 @@
 #include "vl/vl_decoder.h"
 #include "../ddebug/dd_util.h"
 
-#include "compiler/nir/nir.h"
-
 /*
  * pipe_context
  */
@@ -402,434 +400,6 @@ static struct pipe_context *si_pipe_create_context(struct pipe_screen *screen,
 /*
  * pipe_screen
  */
-static bool si_have_tgsi_compute(struct si_screen *sscreen)
-{
-       /* Old kernels disallowed some register writes for SI
-        * that are used for indirect dispatches. */
-       return (sscreen->b.chip_class >= CIK ||
-               sscreen->b.info.drm_major == 3 ||
-               (sscreen->b.info.drm_major == 2 &&
-                sscreen->b.info.drm_minor >= 45));
-}
-
-static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
-{
-       struct si_screen *sscreen = (struct si_screen *)pscreen;
-
-       switch (param) {
-       /* Supported features (boolean caps). */
-       case PIPE_CAP_ACCELERATED:
-       case PIPE_CAP_TWO_SIDED_STENCIL:
-       case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
-       case PIPE_CAP_ANISOTROPIC_FILTER:
-       case PIPE_CAP_POINT_SPRITE:
-       case PIPE_CAP_OCCLUSION_QUERY:
-       case PIPE_CAP_TEXTURE_SHADOW_MAP:
-       case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
-       case PIPE_CAP_BLEND_EQUATION_SEPARATE:
-       case PIPE_CAP_TEXTURE_SWIZZLE:
-       case PIPE_CAP_DEPTH_CLIP_DISABLE:
-       case PIPE_CAP_SHADER_STENCIL_EXPORT:
-       case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
-       case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
-       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
-       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
-       case PIPE_CAP_SM3:
-       case PIPE_CAP_SEAMLESS_CUBE_MAP:
-       case PIPE_CAP_PRIMITIVE_RESTART:
-       case PIPE_CAP_CONDITIONAL_RENDER:
-       case PIPE_CAP_TEXTURE_BARRIER:
-       case PIPE_CAP_INDEP_BLEND_ENABLE:
-       case PIPE_CAP_INDEP_BLEND_FUNC:
-       case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
-       case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
-       case PIPE_CAP_USER_CONSTANT_BUFFERS:
-       case PIPE_CAP_START_INSTANCE:
-       case PIPE_CAP_NPOT_TEXTURES:
-       case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
-       case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
-       case PIPE_CAP_VERTEX_COLOR_CLAMPED:
-       case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
-        case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
-       case PIPE_CAP_TGSI_INSTANCEID:
-       case PIPE_CAP_COMPUTE:
-       case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
-        case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
-       case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
-       case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
-       case PIPE_CAP_CUBE_MAP_ARRAY:
-       case PIPE_CAP_SAMPLE_SHADING:
-       case PIPE_CAP_DRAW_INDIRECT:
-       case PIPE_CAP_CLIP_HALFZ:
-       case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
-       case PIPE_CAP_POLYGON_OFFSET_CLAMP:
-       case PIPE_CAP_MULTISAMPLE_Z_RESOLVE:
-       case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
-       case PIPE_CAP_TGSI_TEXCOORD:
-       case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
-       case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
-       case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
-       case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
-       case PIPE_CAP_SHAREABLE_SHADERS:
-       case PIPE_CAP_DEPTH_BOUNDS_TEST:
-       case PIPE_CAP_SAMPLER_VIEW_TARGET:
-       case PIPE_CAP_TEXTURE_QUERY_LOD:
-       case PIPE_CAP_TEXTURE_GATHER_SM5:
-       case PIPE_CAP_TGSI_TXQS:
-       case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
-       case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
-       case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
-       case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
-       case PIPE_CAP_INVALIDATE_BUFFER:
-       case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:
-       case PIPE_CAP_QUERY_MEMORY_INFO:
-       case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
-       case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
-       case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:
-       case PIPE_CAP_GENERATE_MIPMAP:
-       case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
-       case PIPE_CAP_STRING_MARKER:
-       case PIPE_CAP_CLEAR_TEXTURE:
-       case PIPE_CAP_CULL_DISTANCE:
-       case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
-       case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
-       case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
-       case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
-       case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
-       case PIPE_CAP_DOUBLES:
-       case PIPE_CAP_TGSI_TEX_TXF_LZ:
-       case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:
-       case PIPE_CAP_BINDLESS_TEXTURE:
-       case PIPE_CAP_QUERY_TIMESTAMP:
-       case PIPE_CAP_QUERY_TIME_ELAPSED:
-       case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:
-       case PIPE_CAP_QUERY_SO_OVERFLOW:
-       case PIPE_CAP_MEMOBJ:
-       case PIPE_CAP_LOAD_CONSTBUF:
-       case PIPE_CAP_INT64:
-       case PIPE_CAP_INT64_DIVMOD:
-       case PIPE_CAP_TGSI_CLOCK:
-       case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
-       case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
-       case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
-       case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
-               return 1;
-
-       case PIPE_CAP_TGSI_VOTE:
-               return HAVE_LLVM >= 0x0400;
-
-       case PIPE_CAP_TGSI_BALLOT:
-               return HAVE_LLVM >= 0x0500;
-
-       case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
-               return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr;
-
-       case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
-               return (sscreen->b.info.drm_major == 2 &&
-                       sscreen->b.info.drm_minor >= 43) ||
-                      sscreen->b.info.drm_major == 3;
-
-       case PIPE_CAP_TEXTURE_MULTISAMPLE:
-               /* 2D tiling on CIK is supported since DRM 2.35.0 */
-               return sscreen->b.chip_class < CIK ||
-                      (sscreen->b.info.drm_major == 2 &&
-                       sscreen->b.info.drm_minor >= 35) ||
-                      sscreen->b.info.drm_major == 3;
-
-        case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
-                return R600_MAP_BUFFER_ALIGNMENT;
-
-       case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
-       case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
-       case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-       case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
-       case PIPE_CAP_MAX_VERTEX_STREAMS:
-       case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
-               return 4;
-
-       case PIPE_CAP_GLSL_FEATURE_LEVEL:
-               if (sscreen->b.debug_flags & DBG(NIR))
-                       return 140; /* no geometry and tessellation shaders yet */
-               if (si_have_tgsi_compute(sscreen))
-                       return 450;
-               return 420;
-
-       case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-               return MIN2(sscreen->b.info.max_alloc_size, INT_MAX);
-
-       case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
-       case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
-       case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
-               /* SI doesn't support unaligned loads.
-                * CIK needs DRM 2.50.0 on radeon. */
-               return sscreen->b.chip_class == SI ||
-                      (sscreen->b.info.drm_major == 2 &&
-                       sscreen->b.info.drm_minor < 50);
-
-       case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
-               /* TODO: GFX9 hangs. */
-               if (sscreen->b.chip_class >= GFX9)
-                       return 0;
-               /* Disable on SI due to VM faults in CP DMA. Enable once these
-                * faults are mitigated in software.
-                */
-               if (sscreen->b.chip_class >= CIK &&
-                   sscreen->b.info.drm_major == 3 &&
-                   sscreen->b.info.drm_minor >= 13)
-                       return RADEON_SPARSE_PAGE_SIZE;
-               return 0;
-
-       /* Unsupported features. */
-       case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
-       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
-       case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
-       case PIPE_CAP_USER_VERTEX_BUFFERS:
-       case PIPE_CAP_FAKE_SW_MSAA:
-       case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
-       case PIPE_CAP_VERTEXID_NOBASE:
-       case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
-       case PIPE_CAP_MAX_WINDOW_RECTANGLES:
-       case PIPE_CAP_TGSI_FS_FBFETCH:
-       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
-       case PIPE_CAP_UMA:
-       case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
-       case PIPE_CAP_POST_DEPTH_COVERAGE:
-       case PIPE_CAP_TILE_RASTER_ORDER:
-       case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
-               return 0;
-
-       case PIPE_CAP_NATIVE_FENCE_FD:
-               return sscreen->b.info.has_sync_file;
-
-       case PIPE_CAP_QUERY_BUFFER_OBJECT:
-               return si_have_tgsi_compute(sscreen);
-
-       case PIPE_CAP_DRAW_PARAMETERS:
-       case PIPE_CAP_MULTI_DRAW_INDIRECT:
-       case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
-               return sscreen->has_draw_indirect_multi;
-
-       case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
-               return 30;
-
-       case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
-               return sscreen->b.chip_class <= VI ?
-                       PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
-
-       /* Stream output. */
-       case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
-       case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
-               return 32*4;
-
-       /* Geometry shader output. */
-       case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
-               return 1024;
-       case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
-               return 4095;
-
-       case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
-               return 2048;
-
-       /* Texturing. */
-       case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-       case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
-               return 15; /* 16384 */
-       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-               /* textures support 8192, but layered rendering supports 2048 */
-               return 12;
-       case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
-               /* textures support 8192, but layered rendering supports 2048 */
-               return 2048;
-
-       /* Viewports and render targets. */
-       case PIPE_CAP_MAX_VIEWPORTS:
-               return SI_MAX_VIEWPORTS;
-       case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
-       case PIPE_CAP_MAX_RENDER_TARGETS:
-               return 8;
-
-       case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
-       case PIPE_CAP_MIN_TEXEL_OFFSET:
-               return -32;
-
-       case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
-       case PIPE_CAP_MAX_TEXEL_OFFSET:
-               return 31;
-
-       case PIPE_CAP_ENDIANNESS:
-               return PIPE_ENDIAN_LITTLE;
-
-       case PIPE_CAP_VENDOR_ID:
-               return ATI_VENDOR_ID;
-       case PIPE_CAP_DEVICE_ID:
-               return sscreen->b.info.pci_id;
-       case PIPE_CAP_VIDEO_MEMORY:
-               return sscreen->b.info.vram_size >> 20;
-       case PIPE_CAP_PCI_GROUP:
-               return sscreen->b.info.pci_domain;
-       case PIPE_CAP_PCI_BUS:
-               return sscreen->b.info.pci_bus;
-       case PIPE_CAP_PCI_DEVICE:
-               return sscreen->b.info.pci_dev;
-       case PIPE_CAP_PCI_FUNCTION:
-               return sscreen->b.info.pci_func;
-       }
-       return 0;
-}
-
-static int si_get_shader_param(struct pipe_screen* pscreen,
-                              enum pipe_shader_type shader,
-                              enum pipe_shader_cap param)
-{
-       struct si_screen *sscreen = (struct si_screen *)pscreen;
-
-       switch(shader)
-       {
-       case PIPE_SHADER_FRAGMENT:
-       case PIPE_SHADER_VERTEX:
-       case PIPE_SHADER_GEOMETRY:
-       case PIPE_SHADER_TESS_CTRL:
-       case PIPE_SHADER_TESS_EVAL:
-               break;
-       case PIPE_SHADER_COMPUTE:
-               switch (param) {
-               case PIPE_SHADER_CAP_PREFERRED_IR:
-                       return PIPE_SHADER_IR_NATIVE;
-
-               case PIPE_SHADER_CAP_SUPPORTED_IRS: {
-                       int ir = 1 << PIPE_SHADER_IR_NATIVE;
-
-                       if (si_have_tgsi_compute(sscreen))
-                               ir |= 1 << PIPE_SHADER_IR_TGSI;
-
-                       return ir;
-               }
-
-               case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: {
-                       uint64_t max_const_buffer_size;
-                       pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI,
-                               PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE,
-                               &max_const_buffer_size);
-                       return MIN2(max_const_buffer_size, INT_MAX);
-               }
-               default:
-                       /* If compute shaders don't require a special value
-                        * for this cap, we can return the same value we
-                        * do for other shader types. */
-                       break;
-               }
-               break;
-       default:
-               return 0;
-       }
-
-       switch (param) {
-       /* Shader limits. */
-       case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
-       case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
-       case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
-       case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
-       case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
-               return 16384;
-       case PIPE_SHADER_CAP_MAX_INPUTS:
-               return shader == PIPE_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
-       case PIPE_SHADER_CAP_MAX_OUTPUTS:
-               return shader == PIPE_SHADER_FRAGMENT ? 8 : 32;
-       case PIPE_SHADER_CAP_MAX_TEMPS:
-               return 256; /* Max native temporaries. */
-       case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
-               return 4096 * sizeof(float[4]); /* actually only memory limits this */
-       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-               return SI_NUM_CONST_BUFFERS;
-       case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
-       case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
-               return SI_NUM_SAMPLERS;
-       case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
-               return SI_NUM_SHADER_BUFFERS;
-       case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
-               return SI_NUM_IMAGES;
-       case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
-               return 32;
-       case PIPE_SHADER_CAP_PREFERRED_IR:
-               if (sscreen->b.debug_flags & DBG(NIR) &&
-                   (shader == PIPE_SHADER_VERTEX ||
-                    shader == PIPE_SHADER_FRAGMENT))
-                       return PIPE_SHADER_IR_NIR;
-               return PIPE_SHADER_IR_TGSI;
-       case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
-               return 4;
-
-       /* Supported boolean features. */
-       case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
-       case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-       case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-       case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-       case PIPE_SHADER_CAP_INTEGERS:
-       case PIPE_SHADER_CAP_INT64_ATOMICS:
-       case PIPE_SHADER_CAP_FP16:
-       case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
-       case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
-       case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
-       case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
-       case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
-       case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
-               return 1;
-
-       case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-               /* TODO: Indirect indexing of GS inputs is unimplemented. */
-               return shader != PIPE_SHADER_GEOMETRY &&
-                      (sscreen->llvm_has_working_vgpr_indexing ||
-                       /* TCS and TES load inputs directly from LDS or
-                        * offchip memory, so indirect indexing is trivial. */
-                       shader == PIPE_SHADER_TESS_CTRL ||
-                       shader == PIPE_SHADER_TESS_EVAL);
-
-       case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
-               return sscreen->llvm_has_working_vgpr_indexing ||
-                      /* TCS stores outputs directly to memory. */
-                      shader == PIPE_SHADER_TESS_CTRL;
-
-       /* Unsupported boolean features. */
-       case PIPE_SHADER_CAP_SUBROUTINES:
-       case PIPE_SHADER_CAP_SUPPORTED_IRS:
-       case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
-       case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
-               return 0;
-       }
-       return 0;
-}
-
-static const struct nir_shader_compiler_options nir_options = {
-       .vertex_id_zero_based = true,
-       .lower_scmp = true,
-       .lower_flrp32 = true,
-       .lower_fsat = true,
-       .lower_fdiv = true,
-       .lower_sub = true,
-       .lower_ffma = true,
-       .lower_pack_snorm_2x16 = true,
-       .lower_pack_snorm_4x8 = true,
-       .lower_pack_unorm_2x16 = true,
-       .lower_pack_unorm_4x8 = true,
-       .lower_unpack_snorm_2x16 = true,
-       .lower_unpack_snorm_4x8 = true,
-       .lower_unpack_unorm_2x16 = true,
-       .lower_unpack_unorm_4x8 = true,
-       .lower_extract_byte = true,
-       .lower_extract_word = true,
-       .max_unroll_iterations = 32,
-       .native_integers = true,
-};
-
-static const void *
-si_get_compiler_options(struct pipe_screen *screen,
-                       enum pipe_shader_ir ir,
-                       enum pipe_shader_type shader)
-{
-       assert(ir == PIPE_SHADER_IR_NIR);
-       return &nir_options;
-}
 
 static void si_destroy_screen(struct pipe_screen* pscreen)
 {
@@ -969,16 +539,44 @@ static void si_test_vmfault(struct si_screen *sscreen)
        exit(0);
 }
 
-static void radeonsi_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)
+static void si_disk_cache_create(struct si_screen *sscreen)
 {
-       ac_compute_driver_uuid(uuid, PIPE_UUID_SIZE);
-}
+       /* Don't use the cache if shader dumping is enabled. */
+       if (sscreen->b.debug_flags & DBG_ALL_SHADERS)
+               return;
 
-static void radeonsi_get_device_uuid(struct pipe_screen *pscreen, char *uuid)
-{
-       struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen;
+       /* TODO: remove this once gallium supports a nir cache */
+       if (sscreen->b.debug_flags & DBG(NIR))
+               return;
 
-       ac_compute_device_uuid(&rscreen->info, uuid, PIPE_UUID_SIZE);
+       uint32_t mesa_timestamp;
+       if (disk_cache_get_function_timestamp(si_disk_cache_create,
+                                             &mesa_timestamp)) {
+               char *timestamp_str;
+               int res = -1;
+               uint32_t llvm_timestamp;
+
+               if (disk_cache_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo,
+                                                     &llvm_timestamp)) {
+                       res = asprintf(&timestamp_str, "%u_%u",
+                                      mesa_timestamp, llvm_timestamp);
+               }
+
+               if (res != -1) {
+                       /* These flags affect shader compilation. */
+                       uint64_t shader_debug_flags =
+                               sscreen->b.debug_flags &
+                               (DBG(FS_CORRECT_DERIVS_AFTER_KILL) |
+                                DBG(SI_SCHED) |
+                                DBG(UNSAFE_MATH));
+
+                       sscreen->b.disk_shader_cache =
+                               disk_cache_create(si_get_family_name(sscreen),
+                                                 timestamp_str,
+                                                 shader_debug_flags);
+                       free(timestamp_str);
+               }
+       }
 }
 
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
@@ -991,16 +589,18 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
                return NULL;
        }
 
+       sscreen->b.ws = ws;
+       ws->query_info(ws, &sscreen->b.info);
+
+       sscreen->b.family = sscreen->b.info.family;
+       sscreen->b.chip_class = sscreen->b.info.chip_class;
+
        /* Set functions first. */
        sscreen->b.b.context_create = si_pipe_create_context;
        sscreen->b.b.destroy = si_destroy_screen;
-       sscreen->b.b.get_param = si_get_param;
-       sscreen->b.b.get_shader_param = si_get_shader_param;
-       sscreen->b.b.get_compiler_options = si_get_compiler_options;
-       sscreen->b.b.get_device_uuid = radeonsi_get_device_uuid;
-       sscreen->b.b.get_driver_uuid = radeonsi_get_driver_uuid;
        sscreen->b.b.resource_create = si_resource_create_common;
 
+       si_init_screen_get_functions(sscreen);
        si_init_screen_fence_functions(sscreen);
        si_init_screen_state_functions(sscreen);
 
@@ -1020,6 +620,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
                return NULL;
        }
 
+       si_disk_cache_create(sscreen);
+
        /* Only enable as many threads as we have target machines, but at most
         * the number of CPUs - 1 if there is more than one.
         */
index bdf146ff83064ec1fdb4cd582c79e10eb764b5f8..3131a8bf24bffab2a2b20ee424a276edc14012eb 100644 (file)
@@ -641,6 +641,10 @@ void si_init_screen_fence_functions(struct si_screen *screen);
 struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
                                          struct tc_unflushed_batch_token *tc_token);
 
+/* si_get.c */
+const char *si_get_family_name(const struct si_screen *sscreen);
+void si_init_screen_get_functions(struct si_screen *sscreen);
+
 /* si_hw_context.c */
 void si_destroy_saved_cs(struct si_saved_cs *scs);
 void si_context_gfx_flush(void *context, unsigned flags,