gallium: Redefine the max texture 2d cap from _LEVELS to _SIZE.
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.h
index f58978989d4dac84ac081495ed8701973e8d910f..82c521efcb7415bc9ef2327ffdeb2b68f3b341e1 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
 #include <llvm-c/Core.h> /* LLVMModuleRef */
 #include <llvm-c/TargetMachine.h>
 #include "tgsi/tgsi_scan.h"
+#include "util/u_inlines.h"
 #include "util/u_queue.h"
 
 #include "ac_binary.h"
 #include "ac_llvm_build.h"
-#include "si_state.h"
+#include "ac_llvm_util.h"
+
+#include <stdio.h>
 
 struct nir_shader;
+struct si_shader;
+struct si_context;
 
+#define SI_MAX_ATTRIBS         16
 #define SI_MAX_VS_OUTPUTS      40
 
 /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
  * index smaller than this.
  */
-#define SI_MAX_IO_GENERIC       46
+#define SI_MAX_IO_GENERIC       43
 
 /* SGPR user data indices */
 enum {
        SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
-#if !HAVE_32BIT_POINTERS
-       SI_SGPR_RW_BUFFERS_HI,
-#endif
        SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-       SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
-#endif
        SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
-#if !HAVE_32BIT_POINTERS
-       SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
-#endif
        SI_SGPR_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
-       SI_SGPR_SAMPLERS_AND_IMAGES_HI,
-#endif
        SI_NUM_RESOURCE_SGPRS,
 
+       /* API VS, TES without GS, GS copy shader */
+       SI_SGPR_VS_STATE_BITS = SI_NUM_RESOURCE_SGPRS,
+       SI_NUM_VS_STATE_RESOURCE_SGPRS,
+
        /* all VS variants */
-       SI_SGPR_BASE_VERTEX = SI_NUM_RESOURCE_SGPRS,
+       SI_SGPR_BASE_VERTEX = SI_NUM_VS_STATE_RESOURCE_SGPRS,
        SI_SGPR_START_INSTANCE,
        SI_SGPR_DRAWID,
-       SI_SGPR_VS_STATE_BITS,
        SI_VS_NUM_USER_SGPR,
 
        SI_SGPR_VS_BLIT_DATA = SI_SGPR_CONST_AND_SHADER_BUFFERS,
 
        /* TES */
-       SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
+       SI_SGPR_TES_OFFCHIP_LAYOUT = SI_NUM_VS_STATE_RESOURCE_SGPRS,
        SI_SGPR_TES_OFFCHIP_ADDR,
        SI_TES_NUM_USER_SGPR,
 
@@ -190,36 +188,21 @@ enum {
        GFX6_TCS_NUM_USER_SGPR,
 
        /* GFX9: Merged shaders. */
-#if HAVE_32BIT_POINTERS
        /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
        /* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
        GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
-#else
-       /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
-       GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
-       GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
-       GFX9_MERGED_NUM_USER_SGPR,
-#endif
 
        /* GFX9: Merged LS-HS (VS-TCS) only. */
        GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
        GFX9_SGPR_TCS_OUT_OFFSETS,
        GFX9_SGPR_TCS_OUT_LAYOUT,
-#if !HAVE_32BIT_POINTERS
-       GFX9_SGPR_align_for_vb_pointer,
-#endif
        GFX9_TCS_NUM_USER_SGPR,
 
        /* GS limits */
        GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
-#if HAVE_32BIT_POINTERS
        GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
        GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
-#else
-       GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
-       GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
-#endif
-       SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS + (HAVE_32BIT_POINTERS ? 1 : 2),
+       SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
 
        /* PS only */
        SI_SGPR_ALPHA_REF       = SI_NUM_RESOURCE_SGPRS,
@@ -266,11 +249,21 @@ enum {
 
 /* SI-specific system values. */
 enum {
+       /* Values from set_tess_state. */
        TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
        TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
+
+       /* Up to 4 dwords in user SGPRs for compute shaders. */
+       TGSI_SEMANTIC_CS_USER_DATA,
 };
 
 enum {
+       /* Use a property enum that CS wouldn't use. */
+       TGSI_PROPERTY_CS_LOCAL_SIZE = TGSI_PROPERTY_FS_COORD_ORIGIN,
+
+       /* The number of used user data dwords in the range [1, 4]. */
+       TGSI_PROPERTY_CS_USER_DATA_DWORDS = TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
+
        /* Use a property enum that VS wouldn't use. */
        TGSI_PROPERTY_VS_BLIT_SGPRS = TGSI_PROPERTY_FS_COORD_ORIGIN,
 
@@ -280,27 +273,24 @@ enum {
        SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
 };
 
-/* For VS shader key fix_fetch. */
-enum {
-       SI_FIX_FETCH_NONE = 0,
-       SI_FIX_FETCH_A2_SNORM,
-       SI_FIX_FETCH_A2_SSCALED,
-       SI_FIX_FETCH_A2_SINT,
-       SI_FIX_FETCH_RGBA_32_UNORM,
-       SI_FIX_FETCH_RGBX_32_UNORM,
-       SI_FIX_FETCH_RGBA_32_SNORM,
-       SI_FIX_FETCH_RGBX_32_SNORM,
-       SI_FIX_FETCH_RGBA_32_USCALED,
-       SI_FIX_FETCH_RGBA_32_SSCALED,
-       SI_FIX_FETCH_RGBA_32_FIXED,
-       SI_FIX_FETCH_RGBX_32_FIXED,
-       SI_FIX_FETCH_RG_64_FLOAT,
-       SI_FIX_FETCH_RGB_64_FLOAT,
-       SI_FIX_FETCH_RGBA_64_FLOAT,
-       SI_FIX_FETCH_RGB_8,     /* A = 1.0 */
-       SI_FIX_FETCH_RGB_8_INT, /* A = 1 */
-       SI_FIX_FETCH_RGB_16,
-       SI_FIX_FETCH_RGB_16_INT,
+/**
+ * For VS shader keys, describe any fixups required for vertex fetch.
+ *
+ * \ref log_size, \ref format, and the number of channels are interpreted as
+ * by \ref ac_build_opencoded_load_format.
+ *
+ * Note: all bits 0 (size = 1 byte, num channels = 1, format = float) is an
+ * impossible format and indicates that no fixup is needed (just use
+ * buffer_load_format_xyzw).
+ */
+union si_vs_fix_fetch {
+       struct {
+               uint8_t log_size : 2; /* 1, 2, 4, 8 or bytes per channel */
+               uint8_t num_channels_m1 : 2; /* number of channels minus 1 */
+               uint8_t format : 3; /* AC_FETCH_FORMAT_xxx */
+               uint8_t reverse : 1; /* reverse XYZ channels */
+       } u;
+       uint8_t bits;
 };
 
 struct si_shader;
@@ -309,7 +299,7 @@ struct si_shader;
 struct si_compiler_ctx_state {
        /* Should only be used by si_init_shader_selector_async and
         * si_build_shader_variant if thread_index == -1 (non-threaded). */
-       LLVMTargetMachineRef            tm;
+       struct ac_llvm_compiler         *compiler;
 
        /* Used if thread_index == -1 or if debug.async is true. */
        struct pipe_debug_callback      debug;
@@ -355,7 +345,8 @@ struct si_shader_selector {
        ubyte           culldist_mask;
 
        /* ES parameters. */
-       unsigned        esgs_itemsize;
+       unsigned        esgs_itemsize; /* vertex stride */
+       unsigned        lshs_vertex_stride;
 
        /* GS parameters. */
        unsigned        gs_input_verts_per_prim;
@@ -375,9 +366,7 @@ struct si_shader_selector {
         */
        unsigned        colors_written_4bit;
 
-       /* CS parameters */
-       unsigned local_size;
-
+       uint64_t        outputs_written_before_ps; /* "get_unique_index" bits */
        uint64_t        outputs_written;        /* "get_unique_index" bits */
        uint32_t        patch_outputs_written;  /* "get_unique_index_patch" bits */
 
@@ -491,7 +480,7 @@ union si_shader_part_key {
                unsigned        ancillary_vgpr_index:5;
                unsigned        wqm:1;
                char            color_attr_index[2];
-               char            color_interp_vgpr_index[2]; /* -1 == constant */
+               signed char     color_interp_vgpr_index[2]; /* -1 == constant */
        } ps_prolog;
        struct {
                struct si_ps_epilog_bits states;
@@ -532,8 +521,11 @@ struct si_shader_key {
 
        /* Flags for monolithic compilation only. */
        struct {
-               /* One byte for every input: SI_FIX_FETCH_* enums. */
-               uint8_t         vs_fix_fetch[SI_MAX_ATTRIBS];
+               /* Whether fetch should be opencoded according to vs_fix_fetch.
+                * Otherwise, if vs_fix_fetch is non-zero, buffer_load_format_xyzw
+                * with minimal fixups is used. */
+               uint16_t vs_fetch_opencode;
+               union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
 
                union {
                        uint64_t        ff_tcs_inputs_to_copy; /* for fixed-func TCS */
@@ -541,6 +533,9 @@ struct si_shader_key {
                        unsigned        vs_export_prim_id:1;
                        struct {
                                unsigned interpolate_at_sample_force_center:1;
+                               unsigned fbfetch_msaa;
+                               unsigned fbfetch_is_1D;
+                               unsigned fbfetch_layered;
                        } ps;
                } u;
        } mono;
@@ -605,8 +600,8 @@ struct si_shader {
        struct si_shader_part           *epilog;
 
        struct si_pm4_state             *pm4;
-       struct r600_resource            *bo;
-       struct r600_resource            *scratch_bo;
+       struct si_resource              *bo;
+       struct si_resource              *scratch_bo;
        struct si_shader_key            key;
        struct util_queue_fence         ready;
        bool                            compilation_failed;
@@ -625,6 +620,49 @@ struct si_shader {
         */
        char                            *shader_log;
        size_t                          shader_log_size;
+
+       /* For save precompute context registers values. */
+       union {
+               struct {
+                       unsigned        vgt_gsvs_ring_offset_1;
+                       unsigned        vgt_gsvs_ring_offset_2;
+                       unsigned        vgt_gsvs_ring_offset_3;
+                       unsigned        vgt_gs_out_prim_type;
+                       unsigned        vgt_gsvs_ring_itemsize;
+                       unsigned        vgt_gs_max_vert_out;
+                       unsigned        vgt_gs_vert_itemsize;
+                       unsigned        vgt_gs_vert_itemsize_1;
+                       unsigned        vgt_gs_vert_itemsize_2;
+                       unsigned        vgt_gs_vert_itemsize_3;
+                       unsigned        vgt_gs_instance_cnt;
+                       unsigned        vgt_gs_onchip_cntl;
+                       unsigned        vgt_gs_max_prims_per_subgroup;
+                       unsigned        vgt_esgs_ring_itemsize;
+               } gs;
+
+               struct {
+                       unsigned        vgt_gs_mode;
+                       unsigned        vgt_primitiveid_en;
+                       unsigned        vgt_reuse_off;
+                       unsigned        spi_vs_out_config;
+                       unsigned        spi_shader_pos_format;
+                       unsigned        pa_cl_vte_cntl;
+               } vs;
+
+               struct {
+                       unsigned        spi_ps_input_ena;
+                       unsigned        spi_ps_input_addr;
+                       unsigned        spi_baryc_cntl;
+                       unsigned        spi_ps_in_control;
+                       unsigned        spi_shader_z_format;
+                       unsigned        spi_shader_col_format;
+                       unsigned        cb_shader_mask;
+               } ps;
+       } ctx_reg;
+
+       /*For save precompute registers value */
+       unsigned vgt_tf_param; /* VGT_TF_PARAM */
+       unsigned vgt_vertex_reuse_block_cntl; /* VGT_VERTEX_REUSE_BLOCK_CNTL */
 };
 
 struct si_shader_part {
@@ -637,20 +675,20 @@ struct si_shader_part {
 /* si_shader.c */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct ac_llvm_compiler *compiler,
                           struct si_shader_selector *gs_selector,
                           struct pipe_debug_callback *debug);
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-                          LLVMTargetMachineRef tm,
+                          struct ac_llvm_compiler *compiler,
                           struct si_shader *shader,
-                          bool is_monolithic,
                           struct pipe_debug_callback *debug);
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
                     struct si_shader *shader,
                     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+                                      unsigned is_varying);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
                    struct pipe_debug_callback *debug, unsigned processor,
@@ -670,9 +708,9 @@ const char *si_get_shader_name(const struct si_shader *shader, unsigned processo
 void si_nir_scan_shader(const struct nir_shader *nir,
                        struct tgsi_shader_info *info);
 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
-                          const struct tgsi_shader_info *info,
                           struct tgsi_tessctrl_info *out);
 void si_lower_nir(struct si_shader_selector *sel);
+void si_nir_opts(struct nir_shader *nir);
 
 /* Inline helpers. */