radeonsi: process TGSI property NEXT_SHADER
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index e3ba1f97252646fe19a5b3bfc1d13691bda9d26a..151615eb4e7057a982d1f82839b79851b5b624f5 100644 (file)
@@ -453,7 +453,7 @@ static void declare_input_vs(
                                            input_index);
        } else if (divisor) {
                /* Build index from instance ID, start instance and divisor */
-               ctx->shader->uses_instanceid = true;
+               ctx->shader->info.uses_instanceid = true;
                buffer_index = get_instance_index_for_fetch(&ctx->radeon_bld,
                                                            SI_PARAM_START_INSTANCE,
                                                            divisor);
@@ -1893,7 +1893,8 @@ handle_semantic:
                case TGSI_SEMANTIC_COLOR:
                case TGSI_SEMANTIC_BCOLOR:
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
-                       shader->vs_output_param_offset[i] = param_count;
+                       assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+                       shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
                        break;
                case TGSI_SEMANTIC_CLIPDIST:
@@ -1907,7 +1908,8 @@ handle_semantic:
                case TGSI_SEMANTIC_TEXCOORD:
                case TGSI_SEMANTIC_GENERIC:
                        target = V_008DFC_SQ_EXP_PARAM + param_count;
-                       shader->vs_output_param_offset[i] = param_count;
+                       assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
+                       shader->info.vs_output_param_offset[i] = param_count;
                        param_count++;
                        break;
                default:
@@ -1935,7 +1937,7 @@ handle_semantic:
                }
        }
 
-       shader->nr_param_exports = param_count;
+       shader->info.nr_param_exports = param_count;
 
        /* We need to add the position output manually if it's missing. */
        if (!pos_args[0][0]) {
@@ -1997,7 +1999,7 @@ handle_semantic:
 
        for (i = 0; i < 4; i++)
                if (pos_args[i][0])
-                       shader->nr_pos_exports++;
+                       shader->info.nr_pos_exports++;
 
        pos_idx = 0;
        for (i = 0; i < 4; i++) {
@@ -2007,7 +2009,7 @@ handle_semantic:
                /* Specify the target we are exporting */
                pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
 
-               if (pos_idx == shader->nr_pos_exports)
+               if (pos_idx == shader->info.nr_pos_exports)
                        /* Specify that this is the last export */
                        pos_args[i][2] = uint->one;
 
@@ -4059,18 +4061,18 @@ static void create_function(struct si_shader_context *ctx)
                                          S_0286D0_POS_FIXED_PT_ENA(1));
        }
 
-       shader->num_input_sgprs = 0;
-       shader->num_input_vgprs = 0;
+       shader->info.num_input_sgprs = 0;
+       shader->info.num_input_vgprs = 0;
 
        for (i = 0; i <= last_sgpr; ++i)
-               shader->num_input_sgprs += llvm_get_type_size(params[i]) / 4;
+               shader->info.num_input_sgprs += llvm_get_type_size(params[i]) / 4;
 
        /* Unused fragment shader inputs are eliminated by the compiler,
         * so we don't know yet how many there will be.
         */
        if (ctx->type != TGSI_PROCESSOR_FRAGMENT)
                for (; i < num_params; ++i)
-                       shader->num_input_vgprs += llvm_get_type_size(params[i]) / 4;
+                       shader->info.num_input_vgprs += llvm_get_type_size(params[i]) / 4;
 
        if (bld_base->info &&
            (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
@@ -4404,14 +4406,14 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 
 static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary,
                                       struct pipe_debug_callback *debug,
-                                      const char *name)
+                                      const char *name, FILE *file)
 {
        char *line, *p;
        unsigned i, count;
 
        if (binary->disasm_string) {
-               fprintf(stderr, "Shader %s disassembly:\n", name);
-               fprintf(stderr, "%s", binary->disasm_string);
+               fprintf(file, "Shader %s disassembly:\n", name);
+               fprintf(file, "%s", binary->disasm_string);
 
                if (debug && debug->debug_message) {
                        /* Very long debug messages are cut off, so send the
@@ -4441,9 +4443,9 @@ static void si_shader_dump_disassembly(const struct radeon_shader_binary *binary
                                           "Shader Disassembly End");
                }
        } else {
-               fprintf(stderr, "Shader %s binary:\n", name);
+               fprintf(file, "Shader %s binary:\n", name);
                for (i = 0; i < binary->code_size; i += 4) {
-                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i,
+                       fprintf(file, "@0x%x: %02x%02x%02x%02x\n", i,
                                binary->code[i + 3], binary->code[i + 2],
                                binary->code[i + 1], binary->code[i]);
                }
@@ -4455,7 +4457,8 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
                                 unsigned num_inputs,
                                 unsigned code_size,
                                 struct pipe_debug_callback *debug,
-                                unsigned processor)
+                                unsigned processor,
+                                FILE *file)
 {
        unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
        unsigned lds_per_wave = 0;
@@ -4491,15 +4494,16 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
        if (lds_per_wave)
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
 
-       if (r600_can_dump_shader(&sscreen->b, processor)) {
+       if (file != stderr ||
+           r600_can_dump_shader(&sscreen->b, processor)) {
                if (processor == TGSI_PROCESSOR_FRAGMENT) {
-                       fprintf(stderr, "*** SHADER CONFIG ***\n"
+                       fprintf(file, "*** SHADER CONFIG ***\n"
                                "SPI_PS_INPUT_ADDR = 0x%04x\n"
                                "SPI_PS_INPUT_ENA  = 0x%04x\n",
                                conf->spi_ps_input_addr, conf->spi_ps_input_ena);
                }
 
-               fprintf(stderr, "*** SHADER STATS ***\n"
+               fprintf(file, "*** SHADER STATS ***\n"
                        "SGPRS: %d\n"
                        "VGPRS: %d\n"
                        "Code Size: %d bytes\n"
@@ -4520,28 +4524,63 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
                           max_simd_waves);
 }
 
+static const char *si_get_shader_name(struct si_shader *shader,
+                                     unsigned processor)
+{
+       switch (processor) {
+       case TGSI_PROCESSOR_VERTEX:
+               if (shader->key.vs.as_es)
+                       return "Vertex Shader as ES";
+               else if (shader->key.vs.as_ls)
+                       return "Vertex Shader as LS";
+               else
+                       return "Vertex Shader as VS";
+       case TGSI_PROCESSOR_TESS_CTRL:
+               return "Tessellation Control Shader";
+       case TGSI_PROCESSOR_TESS_EVAL:
+               if (shader->key.tes.as_es)
+                       return "Tessellation Evaluation Shader as ES";
+               else
+                       return "Tessellation Evaluation Shader as VS";
+       case TGSI_PROCESSOR_GEOMETRY:
+               if (shader->gs_copy_shader == NULL)
+                       return "GS Copy Shader as VS";
+               else
+                       return "Geometry Shader";
+       case TGSI_PROCESSOR_FRAGMENT:
+               return "Pixel Shader";
+       case TGSI_PROCESSOR_COMPUTE:
+               return "Compute Shader";
+       default:
+               return "Unknown Shader";
+       }
+}
+
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
-                   struct pipe_debug_callback *debug, unsigned processor)
+                   struct pipe_debug_callback *debug, unsigned processor,
+                   FILE *file)
 {
-       if (r600_can_dump_shader(&sscreen->b, processor) &&
-           !(sscreen->b.debug_flags & DBG_NO_ASM)) {
-               fprintf(stderr, "\n");
+       if (file != stderr ||
+           (r600_can_dump_shader(&sscreen->b, processor) &&
+            !(sscreen->b.debug_flags & DBG_NO_ASM))) {
+               fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
 
                if (shader->prolog)
                        si_shader_dump_disassembly(&shader->prolog->binary,
-                                                  debug, "prolog");
+                                                  debug, "prolog", file);
 
-               si_shader_dump_disassembly(&shader->binary, debug, "main");
+               si_shader_dump_disassembly(&shader->binary, debug, "main", file);
 
                if (shader->epilog)
                        si_shader_dump_disassembly(&shader->epilog->binary,
-                                                  debug, "epilog");
-               fprintf(stderr, "\n");
+                                                  debug, "epilog", file);
+               fprintf(file, "\n");
        }
 
        si_shader_dump_stats(sscreen, &shader->config,
                             shader->selector ? shader->selector->info.num_inputs : 0,
-                            si_get_shader_binary_size(shader), debug, processor);
+                            si_get_shader_binary_size(shader), debug, processor,
+                            file);
 }
 
 int si_compile_llvm(struct si_screen *sscreen,
@@ -4689,7 +4728,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
                if (r600_can_dump_shader(&sscreen->b, TGSI_PROCESSOR_GEOMETRY))
                        fprintf(stderr, "GS Copy Shader:\n");
                si_shader_dump(sscreen, ctx->shader, debug,
-                              TGSI_PROCESSOR_GEOMETRY);
+                              TGSI_PROCESSOR_GEOMETRY, stderr);
                r = si_shader_binary_upload(sscreen, ctx->shader);
        }
 
@@ -4839,7 +4878,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        si_init_shader_ctx(&ctx, sscreen, shader, tm);
        ctx.is_monolithic = is_monolithic;
 
-       shader->uses_instanceid = sel->info.uses_instanceid;
+       shader->info.uses_instanceid = sel->info.uses_instanceid;
 
        bld_base = &ctx.radeon_bld.soa.bld_base;
        ctx.radeon_bld.load_system_value = declare_system_value;
@@ -4933,43 +4972,43 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 
        /* Calculate the number of fragment input VGPRs. */
        if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
-               shader->num_input_vgprs = 0;
-               shader->face_vgpr_index = -1;
+               shader->info.num_input_vgprs = 0;
+               shader->info.face_vgpr_index = -1;
 
                if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_PERSP_PULL_MODEL_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 3;
+                       shader->info.num_input_vgprs += 3;
                if (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 2;
+                       shader->info.num_input_vgprs += 2;
                if (G_0286CC_LINE_STIPPLE_TEX_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_X_FLOAT_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_Y_FLOAT_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_Z_FLOAT_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_FRONT_FACE_ENA(shader->config.spi_ps_input_addr)) {
-                       shader->face_vgpr_index = shader->num_input_vgprs;
-                       shader->num_input_vgprs += 1;
+                       shader->info.face_vgpr_index = shader->info.num_input_vgprs;
+                       shader->info.num_input_vgprs += 1;
                }
                if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
                if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
-                       shader->num_input_vgprs += 1;
+                       shader->info.num_input_vgprs += 1;
        }
 
        if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
@@ -5245,10 +5284,11 @@ static bool si_get_vs_epilog(struct si_screen *sscreen,
        /* Set up the PrimitiveID output. */
        if (shader->key.vs.epilog.export_prim_id) {
                unsigned index = shader->selector->info.num_outputs;
-               unsigned offset = shader->nr_param_exports++;
+               unsigned offset = shader->info.nr_param_exports++;
 
                epilog_key.vs_epilog.prim_id_param_offset = offset;
-               shader->vs_output_param_offset[index] = offset;
+               assert(index < ARRAY_SIZE(shader->info.vs_output_param_offset));
+               shader->info.vs_output_param_offset[index] = offset;
        }
 
        shader->epilog = si_get_shader_part(sscreen, &sscreen->vs_epilogs,
@@ -5272,7 +5312,7 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
        /* Get the prolog. */
        memset(&prolog_key, 0, sizeof(prolog_key));
        prolog_key.vs_prolog.states = shader->key.vs.prolog;
-       prolog_key.vs_prolog.num_input_sgprs = shader->num_input_sgprs;
+       prolog_key.vs_prolog.num_input_sgprs = shader->info.num_input_sgprs;
        prolog_key.vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
 
        /* The prolog is a no-op if there are no inputs. */
@@ -5294,7 +5334,7 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen,
        /* Set the instanceID flag. */
        for (i = 0; i < info->num_inputs; i++)
                if (prolog_key.vs_prolog.states.instance_divisors[i])
-                       shader->uses_instanceid = true;
+                       shader->info.uses_instanceid = true;
 
        return true;
 }
@@ -5700,8 +5740,8 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
        memset(&prolog_key, 0, sizeof(prolog_key));
        prolog_key.ps_prolog.states = shader->key.ps.prolog;
        prolog_key.ps_prolog.colors_read = info->colors_read;
-       prolog_key.ps_prolog.num_input_sgprs = shader->num_input_sgprs;
-       prolog_key.ps_prolog.num_input_vgprs = shader->num_input_vgprs;
+       prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
+       prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
 
        if (info->colors_read) {
                unsigned *color = shader->selector->color_attr_index;
@@ -5709,7 +5749,7 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
                if (shader->key.ps.prolog.color_two_side) {
                        /* BCOLORs are stored after the last input. */
                        prolog_key.ps_prolog.num_interp_inputs = info->num_inputs;
-                       prolog_key.ps_prolog.face_vgpr_index = shader->face_vgpr_index;
+                       prolog_key.ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
                        shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
                }
 
@@ -5857,12 +5897,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
        struct si_shader *mainp = shader->selector->main_shader_part;
        int r;
 
-       /* LS and ES are always compiled on demand. */
+       /* LS, ES, VS are compiled on demand if the main part hasn't been
+        * compiled for that stage.
+        */
        if (!mainp ||
            (shader->selector->type == PIPE_SHADER_VERTEX &&
-            (shader->key.vs.as_es || shader->key.vs.as_ls)) ||
+            (shader->key.vs.as_es != mainp->key.vs.as_es ||
+             shader->key.vs.as_ls != mainp->key.vs.as_ls)) ||
            (shader->selector->type == PIPE_SHADER_TESS_EVAL &&
-            shader->key.tes.as_es)) {
+            shader->key.tes.as_es != mainp->key.tes.as_es)) {
                /* Monolithic shader (compiled as a whole, has many variants,
                 * may take a long time to compile).
                 */
@@ -5885,15 +5928,15 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                shader->is_binary_shared = true;
                shader->binary = mainp->binary;
                shader->config = mainp->config;
-               shader->num_input_sgprs = mainp->num_input_sgprs;
-               shader->num_input_vgprs = mainp->num_input_vgprs;
-               shader->face_vgpr_index = mainp->face_vgpr_index;
-               memcpy(shader->vs_output_param_offset,
-                      mainp->vs_output_param_offset,
-                      sizeof(mainp->vs_output_param_offset));
-               shader->uses_instanceid = mainp->uses_instanceid;
-               shader->nr_pos_exports = mainp->nr_pos_exports;
-               shader->nr_param_exports = mainp->nr_param_exports;
+               shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
+               shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
+               shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
+               memcpy(shader->info.vs_output_param_offset,
+                      mainp->info.vs_output_param_offset,
+                      sizeof(mainp->info.vs_output_param_offset));
+               shader->info.uses_instanceid = mainp->info.uses_instanceid;
+               shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
+               shader->info.nr_param_exports = mainp->info.nr_param_exports;
 
                /* Select prologs and/or epilogs. */
                switch (shader->selector->type) {
@@ -5917,7 +5960,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                         * are allocated inputs.
                         */
                        shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
-                                                       shader->num_input_vgprs);
+                                                       shader->info.num_input_vgprs);
                        break;
                }
 
@@ -5936,7 +5979,8 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
                }
        }
 
-       si_shader_dump(sscreen, shader, debug, shader->selector->info.processor);
+       si_shader_dump(sscreen, shader, debug, shader->selector->info.processor,
+                      stderr);
 
        /* Upload. */
        r = si_shader_binary_upload(sscreen, shader);