radeonsi: Add header and footer to shader stat dump

[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index fb1419ddb4d5181c5315ca81d6ba8adcf811daa0..89f02ab041097b9fb58693fd4d2792ee83fba619 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -37,6 +37,7 @@
  #include "radeon/radeon_elf_util.h"
  #include "radeon/radeon_llvm_emit.h"
  #include "util/u_memory.h"
+#include "util/u_pstipple.h"
  #include "tgsi/tgsi_parse.h"
  #include "tgsi/tgsi_util.h"
  #include "tgsi/tgsi_dump.h"
@@ -63,8 +64,6 @@ struct si_shader_output_values
  struct si_shader_context
  {
         struct radeon_llvm_context radeon_bld;
-       struct tgsi_parse_context parse;
-       struct tgsi_token * tokens;
         struct si_shader *shader;
         struct si_screen *screen;
         unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
@@ -73,6 +72,7 @@ struct si_shader_context
         int param_streamout_offset[4];
         int param_vertex_id;
         int param_instance_id;
+       LLVMTargetMachineRef tm;
         LLVMValueRef const_md;
         LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
         LLVMValueRef ddxy_lds;
@@ -192,6 +192,30 @@ static int get_param_index(unsigned semantic_name, unsigned index,
         return -1;
  }
  
+/**
+ * Get the value of a shader input parameter and extract a bitfield.
+ */
+static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
+                                unsigned param, unsigned rshift,
+                                unsigned bitwidth)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                         param);
+
+       if (rshift)
+               value = LLVMBuildLShr(gallivm->builder, value,
+                                     lp_build_const_int32(gallivm, rshift), "");
+
+       if (rshift + bitwidth < 32) {
+               unsigned mask = (1 << bitwidth) - 1;
+               value = LLVMBuildAnd(gallivm->builder, value,
+                                    lp_build_const_int32(gallivm, mask), "");
+       }
+
+       return value;
+}
+
  /**
   * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
   * It's equivalent to doing a load from &base_ptr[index].
@@ -236,14 +260,14 @@ static LLVMValueRef get_instance_index_for_fetch(
  
         LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
                                            si_shader_ctx->param_instance_id);
-       result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
-                       radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
  
+       /* The division must be done before START_INSTANCE is added. */
         if (divisor > 1)
                 result = LLVMBuildUDiv(gallivm->builder, result,
                                 lp_build_const_int32(gallivm, divisor), "");
  
-       return result;
+       return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
+                       radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
  }
  
  static void declare_input_vs(
@@ -562,14 +586,8 @@ static void declare_input_fs(
  
  static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
  {
-       struct gallivm_state *gallivm = &radeon_bld->gallivm;
-       LLVMValueRef value = LLVMGetParam(radeon_bld->main_fn,
-                                         SI_PARAM_ANCILLARY);
-       value = LLVMBuildLShr(gallivm->builder, value,
-                             lp_build_const_int32(gallivm, 8), "");
-       value = LLVMBuildAnd(gallivm->builder, value,
-                            lp_build_const_int32(gallivm, 0xf), "");
-       return value;
+       return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
+                           SI_PARAM_ANCILLARY, 8, 4);
  }
  
  /**
@@ -644,6 +662,15 @@ static void declare_system_value(
                 break;
         }
  
+       case TGSI_SEMANTIC_SAMPLEMASK:
+               /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
+                * Therefore, force gl_SampleMaskIn to 1 for GL. */
+               if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+                       value = uint_bld->one;
+               else
+                       value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
+               break;
+
         default:
                 assert(!"unknown system value");
                 return;
@@ -791,7 +818,7 @@ static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base
  }
  
  static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
-                         LLVMValueRef *out_ptr)
+                         LLVMValueRef alpha_ptr)
  {
         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -803,7 +830,7 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
                 LLVMValueRef alpha_pass =
                         lp_build_cmp(&bld_base->base,
                                      si_shader_ctx->shader->key.ps.alpha_func,
-                                    LLVMBuildLoad(gallivm->builder, out_ptr[3], ""),
+                                    LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
                                      alpha_ref);
                 LLVMValueRef arg =
                         lp_build_select(&bld_base->base,
@@ -825,6 +852,34 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
         si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
  }
  
+static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+                                         LLVMValueRef alpha_ptr)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef coverage, alpha;
+
+       /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
+       coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                               SI_PARAM_SAMPLE_COVERAGE);
+       coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
+
+       coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+                                  bld_base->int_bld.elem_type,
+                                  &coverage, 1, LLVMReadNoneAttribute);
+
+       coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
+                                  bld_base->base.elem_type, "");
+
+       coverage = LLVMBuildFMul(gallivm->builder, coverage,
+                                lp_build_const_float(gallivm,
+                                       1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
+
+       alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
+       alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
+       LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
+}
+
  static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
                                     LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
  {
@@ -977,16 +1032,9 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
  
         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
  
-       LLVMValueRef so_param =
-               LLVMGetParam(shader->radeon_bld.main_fn,
-                            shader->param_streamout_config);
-
         /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
         LLVMValueRef so_vtx_count =
-               LLVMBuildAnd(builder,
-                            LLVMBuildLShr(builder, so_param,
-                                          LLVMConstInt(i32, 16, 0), ""),
-                            LLVMConstInt(i32, 127, 0), "");
+               unpack_param(shader, shader->param_streamout_config, 16, 7);
  
         LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
                                            NULL, 0, LLVMReadNoneAttribute);
@@ -1334,6 +1382,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
                 unsigned semantic_name = info->output_semantic_name[i];
                 unsigned semantic_index = info->output_semantic_index[i];
                 unsigned target;
+               LLVMValueRef alpha_ptr;
  
                 /* Select the correct target */
                 switch (semantic_name) {
@@ -1348,15 +1397,18 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
                         continue;
                 case TGSI_SEMANTIC_COLOR:
                         target = V_008DFC_SQ_EXP_MRT + semantic_index;
+                       alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
+
                         if (si_shader_ctx->shader->key.ps.alpha_to_one)
-                               LLVMBuildStore(bld_base->base.gallivm->builder,
-                                              bld_base->base.one,
-                                              si_shader_ctx->radeon_bld.soa.outputs[i][3]);
+                               LLVMBuildStore(base->gallivm->builder,
+                                              base->one, alpha_ptr);
  
                         if (semantic_index == 0 &&
                             si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
-                               si_alpha_test(bld_base,
-                                             si_shader_ctx->radeon_bld.soa.outputs[i]);
+                               si_alpha_test(bld_base, alpha_ptr);
+
+                       if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+                               si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
                         break;
                 default:
                         target = 0;
@@ -1521,7 +1573,7 @@ static void tex_fetch_args(
         const struct tgsi_full_instruction * inst = emit_data->inst;
         unsigned opcode = inst->Instruction.Opcode;
         unsigned target = inst->Texture.Texture;
-       LLVMValueRef coords[4];
+       LLVMValueRef coords[5];
         LLVMValueRef address[16];
         int ref_pos;
         unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
@@ -1540,7 +1592,7 @@ static void tex_fetch_args(
                 /* Bitcast and truncate v8i32 to v16i8. */
                 LLVMValueRef res = si_shader_ctx->resources[sampler_index];
                 res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
-               res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.zero, "");
+               res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
                 res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
  
                 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
@@ -2018,7 +2070,7 @@ static void txq_fetch_args(
                 LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
                 size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
                 size = LLVMBuildExtractElement(gallivm->builder, size,
-                                             lp_build_const_int32(gallivm, 2), "");
+                                             lp_build_const_int32(gallivm, 6), "");
                 emit_data->args[0] = size;
                 return;
         }
@@ -2547,6 +2599,7 @@ void si_shader_binary_read_config(const struct si_screen *sscreen,
                 case R_00B848_COMPUTE_PGM_RSRC1:
                         shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
                         shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
+                       shader->float_mode =  G_00B028_FLOAT_MODE(value);
                         break;
                 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
                         shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
@@ -2605,16 +2658,24 @@ int si_shader_binary_read(struct si_screen *sscreen,
         bool dump  = r600_can_dump_shader(&sscreen->b,
                 shader->selector ? shader->selector->tokens : NULL);
  
-       if (dump && !binary->disassembled) {
-               fprintf(stderr, "SI CODE:\n");
-               for (i = 0; i < binary->code_size; i+=4 ) {
-                       fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
+       si_shader_binary_read_config(sscreen, shader, 0);
+
+       if (dump) {
+               if (!binary->disassembled) {
+                       fprintf(stderr, "SI CODE:\n");
+                       for (i = 0; i < binary->code_size; i+=4 ) {
+                               fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
                                 binary->code[i + 2], binary->code[i + 1],
                                 binary->code[i]);
+                       }
                 }
-       }
  
-       si_shader_binary_read_config(sscreen, shader, 0);
+               fprintf(stderr, "*** SHADER STATS ***\n"
+                       "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
+                       "Scratch: %d bytes per wave\n********************\n",
+                       shader->num_sgprs, shader->num_vgprs, binary->code_size,
+                       shader->lds_size, shader->scratch_bytes_per_wave);
+       }
  
         /* copy new shader */
         code_size = binary->code_size + binary->rodata_size;
@@ -2639,13 +2700,13 @@ int si_shader_binary_read(struct si_screen *sscreen,
  }
  
  int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-                                                       LLVMModuleRef mod)
+                   LLVMTargetMachineRef tm, LLVMModuleRef mod)
  {
         int r = 0;
         bool dump = r600_can_dump_shader(&sscreen->b,
                         shader->selector ? shader->selector->tokens : NULL);
         r = radeon_llvm_compile(mod, &shader->binary,
-               r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
+               r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
  
         if (r) {
                 return r;
@@ -2733,7 +2794,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
                 fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
  
         r = si_compile_llvm(sscreen, si_shader_ctx->shader,
-                           bld_base->base.gallivm->module);
+                           si_shader_ctx->tm, bld_base->base.gallivm->module);
  
         radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
  
@@ -2741,19 +2802,68 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
         return r;
  }
  
-int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
+static void si_dump_key(unsigned shader, union si_shader_key *key)
+{
+       int i;
+
+       fprintf(stderr, "SHADER KEY\n");
+
+       switch (shader) {
+       case PIPE_SHADER_VERTEX:
+               fprintf(stderr, "  instance_divisors = {");
+               for (i = 0; i < Elements(key->vs.instance_divisors); i++)
+                       fprintf(stderr, !i ? "%u" : ", %u",
+                               key->vs.instance_divisors[i]);
+               fprintf(stderr, "}\n");
+
+               if (key->vs.as_es)
+                       fprintf(stderr, "  gs_used_inputs = 0x%"PRIx64"\n",
+                               key->vs.gs_used_inputs);
+               fprintf(stderr, "  as_es = %u\n", key->vs.as_es);
+               break;
+
+       case PIPE_SHADER_GEOMETRY:
+               break;
+
+       case PIPE_SHADER_FRAGMENT:
+               fprintf(stderr, "  export_16bpc = 0x%X\n", key->ps.export_16bpc);
+               fprintf(stderr, "  last_cbuf = %u\n", key->ps.last_cbuf);
+               fprintf(stderr, "  color_two_side = %u\n", key->ps.color_two_side);
+               fprintf(stderr, "  alpha_func = %u\n", key->ps.alpha_func);
+               fprintf(stderr, "  alpha_to_one = %u\n", key->ps.alpha_to_one);
+               fprintf(stderr, "  poly_stipple = %u\n", key->ps.poly_stipple);
+               break;
+
+       default:
+               assert(0);
+       }
+}
+
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+                    struct si_shader *shader)
  {
         struct si_shader_selector *sel = shader->selector;
+       struct tgsi_token *tokens = sel->tokens;
         struct si_shader_context si_shader_ctx;
         struct lp_build_tgsi_context * bld_base;
+       struct tgsi_shader_info stipple_shader_info;
         LLVMModuleRef mod;
         int r = 0;
+       bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
+                           shader->key.ps.poly_stipple;
         bool dump = r600_can_dump_shader(&sscreen->b, sel->tokens);
  
+       if (poly_stipple) {
+               tokens = util_pstipple_create_fragment_shader(tokens, NULL,
+                                               SI_POLY_STIPPLE_SAMPLER);
+               tgsi_scan_shader(tokens, &stipple_shader_info);
+       }
+
         /* Dump TGSI code before doing TGSI->LLVM conversion in case the
          * conversion fails. */
         if (dump) {
-               tgsi_dump(sel->tokens, 0);
+               si_dump_key(sel->type, &shader->key);
+               tgsi_dump(tokens, 0);
                 si_dump_streamout(&sel->so);
         }
  
@@ -2770,7 +2880,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
                 shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
  
         shader->uses_instanceid = sel->info.uses_instanceid;
-       bld_base->info = &sel->info;
+       bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
         bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
  
         bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
@@ -2800,11 +2910,10 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
         }
  
         si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
-       si_shader_ctx.tokens = sel->tokens;
-       tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
         si_shader_ctx.shader = shader;
-       si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
+       si_shader_ctx.type = tgsi_get_processor_type(tokens);
         si_shader_ctx.screen = sscreen;
+       si_shader_ctx.tm = tm;
  
         switch (si_shader_ctx.type) {
         case TGSI_PROCESSOR_VERTEX:
@@ -2852,7 +2961,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
                                         bld_base->uint_bld.elem_type, "");
         }
  
-       if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
+       if (!lp_build_tgsi_llvm(bld_base, tokens)) {
                 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
                 goto out;
         }
@@ -2860,7 +2969,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
         radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
  
         mod = bld_base->base.gallivm->module;
-       r = si_compile_llvm(sscreen, shader, mod);
+       r = si_compile_llvm(sscreen, shader, tm, mod);
         if (r) {
                 fprintf(stderr, "LLVM failed to compile shader\n");
                 goto out;
@@ -2881,12 +2990,11 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
                 }
         }
  
-       tgsi_parse_free(&si_shader_ctx.parse);
-
  out:
         for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
                 FREE(si_shader_ctx.constants[i]);
-
+       if (poly_stipple)
+               tgsi_free_tokens(tokens);
         return r;
  }