radeonsi: Add header and footer to shader stat dump

[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index 0ef58a7310e7fd5be8898d22ce7734df84689fd7..89f02ab041097b9fb58693fd4d2792ee83fba619 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -72,6 +72,7 @@ struct si_shader_context
         int param_streamout_offset[4];
         int param_vertex_id;
         int param_instance_id;
+       LLVMTargetMachineRef tm;
         LLVMValueRef const_md;
         LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
         LLVMValueRef ddxy_lds;
@@ -191,6 +192,30 @@ static int get_param_index(unsigned semantic_name, unsigned index,
         return -1;
  }
  
+/**
+ * Get the value of a shader input parameter and extract a bitfield.
+ */
+static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
+                                unsigned param, unsigned rshift,
+                                unsigned bitwidth)
+{
+       struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
+       LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                         param);
+
+       if (rshift)
+               value = LLVMBuildLShr(gallivm->builder, value,
+                                     lp_build_const_int32(gallivm, rshift), "");
+
+       if (rshift + bitwidth < 32) {
+               unsigned mask = (1 << bitwidth) - 1;
+               value = LLVMBuildAnd(gallivm->builder, value,
+                                    lp_build_const_int32(gallivm, mask), "");
+       }
+
+       return value;
+}
+
  /**
   * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
   * It's equivalent to doing a load from &base_ptr[index].
@@ -561,14 +586,8 @@ static void declare_input_fs(
  
  static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
  {
-       struct gallivm_state *gallivm = &radeon_bld->gallivm;
-       LLVMValueRef value = LLVMGetParam(radeon_bld->main_fn,
-                                         SI_PARAM_ANCILLARY);
-       value = LLVMBuildLShr(gallivm->builder, value,
-                             lp_build_const_int32(gallivm, 8), "");
-       value = LLVMBuildAnd(gallivm->builder, value,
-                            lp_build_const_int32(gallivm, 0xf), "");
-       return value;
+       return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
+                           SI_PARAM_ANCILLARY, 8, 4);
  }
  
  /**
@@ -643,6 +662,15 @@ static void declare_system_value(
                 break;
         }
  
+       case TGSI_SEMANTIC_SAMPLEMASK:
+               /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
+                * Therefore, force gl_SampleMaskIn to 1 for GL. */
+               if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+                       value = uint_bld->one;
+               else
+                       value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
+               break;
+
         default:
                 assert(!"unknown system value");
                 return;
@@ -790,7 +818,7 @@ static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base
  }
  
  static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
-                         LLVMValueRef *out_ptr)
+                         LLVMValueRef alpha_ptr)
  {
         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
@@ -802,7 +830,7 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
                 LLVMValueRef alpha_pass =
                         lp_build_cmp(&bld_base->base,
                                      si_shader_ctx->shader->key.ps.alpha_func,
-                                    LLVMBuildLoad(gallivm->builder, out_ptr[3], ""),
+                                    LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
                                      alpha_ref);
                 LLVMValueRef arg =
                         lp_build_select(&bld_base->base,
@@ -824,6 +852,34 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
         si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
  }
  
+static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
+                                         LLVMValueRef alpha_ptr)
+{
+       struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+       struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMValueRef coverage, alpha;
+
+       /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
+       coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                               SI_PARAM_SAMPLE_COVERAGE);
+       coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
+
+       coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
+                                  bld_base->int_bld.elem_type,
+                                  &coverage, 1, LLVMReadNoneAttribute);
+
+       coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
+                                  bld_base->base.elem_type, "");
+
+       coverage = LLVMBuildFMul(gallivm->builder, coverage,
+                                lp_build_const_float(gallivm,
+                                       1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
+
+       alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
+       alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
+       LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
+}
+
  static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
                                     LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
  {
@@ -976,16 +1032,9 @@ static void si_llvm_emit_streamout(struct si_shader_context *shader,
  
         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
  
-       LLVMValueRef so_param =
-               LLVMGetParam(shader->radeon_bld.main_fn,
-                            shader->param_streamout_config);
-
         /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
         LLVMValueRef so_vtx_count =
-               LLVMBuildAnd(builder,
-                            LLVMBuildLShr(builder, so_param,
-                                          LLVMConstInt(i32, 16, 0), ""),
-                            LLVMConstInt(i32, 127, 0), "");
+               unpack_param(shader, shader->param_streamout_config, 16, 7);
  
         LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
                                            NULL, 0, LLVMReadNoneAttribute);
@@ -1333,6 +1382,7 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
                 unsigned semantic_name = info->output_semantic_name[i];
                 unsigned semantic_index = info->output_semantic_index[i];
                 unsigned target;
+               LLVMValueRef alpha_ptr;
  
                 /* Select the correct target */
                 switch (semantic_name) {
@@ -1347,15 +1397,18 @@ static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
                         continue;
                 case TGSI_SEMANTIC_COLOR:
                         target = V_008DFC_SQ_EXP_MRT + semantic_index;
+                       alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
+
                         if (si_shader_ctx->shader->key.ps.alpha_to_one)
-                               LLVMBuildStore(bld_base->base.gallivm->builder,
-                                              bld_base->base.one,
-                                              si_shader_ctx->radeon_bld.soa.outputs[i][3]);
+                               LLVMBuildStore(base->gallivm->builder,
+                                              base->one, alpha_ptr);
  
                         if (semantic_index == 0 &&
                             si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
-                               si_alpha_test(bld_base,
-                                             si_shader_ctx->radeon_bld.soa.outputs[i]);
+                               si_alpha_test(bld_base, alpha_ptr);
+
+                       if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+                               si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
                         break;
                 default:
                         target = 0;
@@ -1520,7 +1573,7 @@ static void tex_fetch_args(
         const struct tgsi_full_instruction * inst = emit_data->inst;
         unsigned opcode = inst->Instruction.Opcode;
         unsigned target = inst->Texture.Texture;
-       LLVMValueRef coords[4];
+       LLVMValueRef coords[5];
         LLVMValueRef address[16];
         int ref_pos;
         unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
@@ -2616,8 +2669,10 @@ int si_shader_binary_read(struct si_screen *sscreen,
                                 binary->code[i]);
                         }
                 }
-               fprintf(stderr, "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
-                               "Scratch: %d bytes per wave\n",
+
+               fprintf(stderr, "*** SHADER STATS ***\n"
+                       "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
+                       "Scratch: %d bytes per wave\n********************\n",
                         shader->num_sgprs, shader->num_vgprs, binary->code_size,
                         shader->lds_size, shader->scratch_bytes_per_wave);
         }
@@ -2645,13 +2700,13 @@ int si_shader_binary_read(struct si_screen *sscreen,
  }
  
  int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
-                                                       LLVMModuleRef mod)
+                   LLVMTargetMachineRef tm, LLVMModuleRef mod)
  {
         int r = 0;
         bool dump = r600_can_dump_shader(&sscreen->b,
                         shader->selector ? shader->selector->tokens : NULL);
         r = radeon_llvm_compile(mod, &shader->binary,
-               r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
+               r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
  
         if (r) {
                 return r;
@@ -2739,7 +2794,7 @@ static int si_generate_gs_copy_shader(struct si_screen *sscreen,
                 fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
  
         r = si_compile_llvm(sscreen, si_shader_ctx->shader,
-                           bld_base->base.gallivm->module);
+                           si_shader_ctx->tm, bld_base->base.gallivm->module);
  
         radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
  
@@ -2784,7 +2839,8 @@ static void si_dump_key(unsigned shader, union si_shader_key *key)
         }
  }
  
-int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
+int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+                    struct si_shader *shader)
  {
         struct si_shader_selector *sel = shader->selector;
         struct tgsi_token *tokens = sel->tokens;
@@ -2857,6 +2913,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
         si_shader_ctx.shader = shader;
         si_shader_ctx.type = tgsi_get_processor_type(tokens);
         si_shader_ctx.screen = sscreen;
+       si_shader_ctx.tm = tm;
  
         switch (si_shader_ctx.type) {
         case TGSI_PROCESSOR_VERTEX:
@@ -2912,7 +2969,7 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
         radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
  
         mod = bld_base->base.gallivm->module;
-       r = si_compile_llvm(sscreen, shader, mod);
+       r = si_compile_llvm(sscreen, shader, tm, mod);
         if (r) {
                 fprintf(stderr, "LLVM failed to compile shader\n");
                 goto out;