aco: add various compiler statistics
authorRhys Perry <pendingchaos02@gmail.com>
Wed, 4 Dec 2019 15:19:56 +0000 (15:19 +0000)
committerMarge Bot <eric+marge@anholt.net>
Fri, 3 Apr 2020 12:12:08 +0000 (12:12 +0000)
Adds these statistics:
- hash of code and constant data
- number of instructions
- number of copies from pseudo-instructions
- number of branches
- estimate of cycles spent not waiting in s_waitcnt
- number of vmem/smem "clauses"
- sgpr/vgpr usage before scheduling

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2965>

src/amd/compiler/aco_interface.cpp
src/amd/compiler/aco_ir.h
src/amd/compiler/aco_lower_to_hw_instr.cpp
src/amd/compiler/aco_statistics.cpp [new file with mode: 0644]
src/amd/compiler/meson.build

index 378a138d245440ac352b673b8c8b235f8c460cd9..104436d33c6b296f6382801909ebbe8109158de7 100644 (file)
@@ -54,6 +54,18 @@ static void init()
 }
 }
 
+static radv_compiler_statistic_info statistic_infos[] = {
+   [aco::statistic_hash] = {"Hash", "CRC32 hash of code and constant data"},
+   [aco::statistic_instructions] = {"Instructions", "Instruction count"},
+   [aco::statistic_copies] = {"Copies", "Copy instructions created for pseudo-instructions"},
+   [aco::statistic_branches] = {"Branches", "Branch instructions"},
+   [aco::statistic_cycles] = {"Busy Cycles", "Estimate of busy cycles"},
+   [aco::statistic_vmem_clauses] = {"VMEM Clause", "Number of VMEM clauses (includes 1-sized clauses)"},
+   [aco::statistic_smem_clauses] = {"SMEM Clause", "Number of SMEM clauses (includes 1-sized clauses)"},
+   [aco::statistic_sgpr_presched] = {"Pre-Sched SGPRs", "SGPR usage before scheduling"},
+   [aco::statistic_vgpr_presched] = {"Pre-Sched VGPRs", "VGPR usage before scheduling"},
+};
+
 void aco_compile_shader(unsigned shader_count,
                         struct nir_shader *const *shaders,
                         struct radv_shader_binary **binary,
@@ -64,6 +76,10 @@ void aco_compile_shader(unsigned shader_count,
    ac_shader_config config = {0};
    std::unique_ptr<aco::Program> program{new aco::Program};
 
+   program->collect_statistics = args->options->record_ir;
+   if (program->collect_statistics)
+      memset(program->statistics, 0, sizeof(program->statistics));
+
    /* Instruction Selection */
    if (args->is_gs_copy_shader)
       aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
@@ -94,6 +110,9 @@ void aco_compile_shader(unsigned shader_count,
    aco::live live_vars = aco::live_var_analysis(program.get(), args->options);
    aco::spill(program.get(), live_vars, args->options);
 
+   if (program->collect_statistics)
+      aco::collect_presched_stats(program.get());
+
    //std::cerr << "Before Schedule:\n";
    //aco_print_program(program.get(), stderr);
    aco::schedule_program(program.get(), live_vars);
@@ -139,10 +158,16 @@ void aco_compile_shader(unsigned shader_count,
    //std::cerr << "After Insert-Waitcnt:\n";
    //aco_print_program(program.get(), stderr);
 
+   if (program->collect_statistics)
+      aco::collect_preasm_stats(program.get());
+
    /* Assembly */
    std::vector<uint32_t> code;
    unsigned exec_size = aco::emit_program(program.get(), code);
 
+   if (program->collect_statistics)
+      aco::collect_postasm_stats(program.get(), code);
+
    bool get_disasm = args->options->dump_shader || args->options->record_ir;
 
    size_t size = llvm_ir.size();
@@ -156,6 +181,11 @@ void aco_compile_shader(unsigned shader_count,
       size += disasm.size();
    }
 
+   size_t stats_size = 0;
+   if (program->collect_statistics)
+      stats_size = sizeof(radv_compiler_statistics) + aco::num_statistics * sizeof(uint32_t);
+   size += stats_size;
+
    size += code.size() * sizeof(uint32_t) + sizeof(radv_shader_binary_legacy);
    /* We need to calloc to prevent unintialized data because this will be used
     * directly for the disk cache. Uninitialized data can appear because of
@@ -168,9 +198,15 @@ void aco_compile_shader(unsigned shader_count,
    legacy_binary->base.is_gs_copy_shader = args->is_gs_copy_shader;
    legacy_binary->base.total_size = size;
 
-   legacy_binary->stats_size = 0;
+   if (program->collect_statistics) {
+      radv_compiler_statistics *statistics = (radv_compiler_statistics *)legacy_binary->data;
+      statistics->count = aco::num_statistics;
+      statistics->infos = statistic_infos;
+      memcpy(statistics->values, program->statistics, aco::num_statistics * sizeof(uint32_t));
+   }
+   legacy_binary->stats_size = stats_size;
 
-   memcpy(legacy_binary->data, code.data(), code.size() * sizeof(uint32_t));
+   memcpy(legacy_binary->data + legacy_binary->stats_size, code.data(), code.size() * sizeof(uint32_t));
    legacy_binary->exec_size = exec_size;
    legacy_binary->code_size = code.size() * sizeof(uint32_t);
 
@@ -178,10 +214,10 @@ void aco_compile_shader(unsigned shader_count,
    legacy_binary->disasm_size = 0;
    legacy_binary->ir_size = llvm_ir.size();
 
-   llvm_ir.copy((char*) legacy_binary->data + legacy_binary->code_size, llvm_ir.size());
+   llvm_ir.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size, llvm_ir.size());
 
    if (get_disasm) {
-      disasm.copy((char*) legacy_binary->data + legacy_binary->code_size + llvm_ir.size(), disasm.size());
+      disasm.copy((char*) legacy_binary->data + legacy_binary->stats_size + legacy_binary->code_size + llvm_ir.size(), disasm.size());
       legacy_binary->disasm_size = disasm.size();
    }
 
index ace84db1018c3841421a6da4a767fa71627fb783..c6213e0c04ec241785e7c5b6b76392b6e5d95211 100644 (file)
@@ -1219,6 +1219,19 @@ static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
 static constexpr Stage geometry_gs = sw_gs | hw_gs;
 
+enum statistic {
+   statistic_hash,
+   statistic_instructions,
+   statistic_copies,
+   statistic_branches,
+   statistic_cycles,
+   statistic_vmem_clauses,
+   statistic_smem_clauses,
+   statistic_sgpr_presched,
+   statistic_vgpr_presched,
+   num_statistics
+};
+
 class Program final {
 public:
    float_mode next_fp_mode;
@@ -1257,6 +1270,9 @@ public:
    bool needs_vcc = false;
    bool needs_flat_scr = false;
 
+   bool collect_statistics = false;
+   uint32_t statistics[num_statistics];
+
    uint32_t allocateId()
    {
       assert(allocationID <= 16777215);
@@ -1337,6 +1353,10 @@ void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
 #define perfwarn(program, cond, msg, ...) do {} while(0)
 #endif
 
+void collect_presched_stats(Program *program);
+void collect_preasm_stats(Program *program);
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
+
 void aco_print_instr(Instruction *instr, FILE *output);
 void aco_print_program(Program *program, FILE *output);
 
index 6f2f54f6992fa9044ecb2a36619c573950c227ff..606f2fde65cf8a80e20b43864bafbd19a88d748d 100644 (file)
@@ -784,6 +784,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
 
          copy_map.erase(it);
          it = copy_map.begin();
+         ctx->program->statistics[statistic_copies]++;
          continue;
       } else {
          /* the target reg is used as operand, check the next entry */
@@ -813,6 +814,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
       Definition op_as_def = Definition(swap.op.physReg(), swap.op.regClass());
       if (chip_class >= GFX9 && swap.def.getTemp().type() == RegType::vgpr) {
          bld.vop1(aco_opcode::v_swap_b32, swap.def, op_as_def, swap.op, def_as_op);
+         ctx->program->statistics[statistic_copies]++;
       } else if (swap.op.physReg() == scc || swap.def.physReg() == scc) {
          /* we need to swap scc and another sgpr */
          assert(!preserve_scc);
@@ -822,6 +824,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
          bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1));
          bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u));
          bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1));
+         ctx->program->statistics[statistic_copies] += 3;
       } else if (swap.def.getTemp().type() == RegType::sgpr) {
          if (preserve_scc) {
             bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), swap.op);
@@ -832,10 +835,12 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
             bld.sop2(aco_opcode::s_xor_b32, swap.def, Definition(scc, s1), swap.op, def_as_op);
             bld.sop2(aco_opcode::s_xor_b32, op_as_def, Definition(scc, s1), swap.op, def_as_op);
          }
+         ctx->program->statistics[statistic_copies] += 3;
       } else {
          bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
          bld.vop2(aco_opcode::v_xor_b32, swap.def, swap.op, def_as_op);
          bld.vop2(aco_opcode::v_xor_b32, op_as_def, swap.op, def_as_op);
+         ctx->program->statistics[statistic_copies] += 3;
       }
 
       /* change the operand reg of the target's use */
diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp
new file mode 100644 (file)
index 0000000..2e78ab6
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2020 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+#include "aco_ir.h"
+#include "util/crc32.h"
+
+namespace aco {
+
+/* sgpr_presched/vgpr_presched */
+void collect_presched_stats(Program *program)
+{
+   RegisterDemand presched_demand;
+   for (Block& block : program->blocks)
+      presched_demand.update(block.register_demand);
+   program->statistics[statistic_sgpr_presched] = presched_demand.sgpr;
+   program->statistics[statistic_vgpr_presched] = presched_demand.vgpr;
+}
+
+/* instructions/branches/vmem_clauses/smem_clauses/cycles */
+void collect_preasm_stats(Program *program)
+{
+   for (Block& block : program->blocks) {
+      std::set<Temp> vmem_clause_res;
+      std::set<Temp> smem_clause_res;
+
+      program->statistics[statistic_instructions] += block.instructions.size();
+
+      for (aco_ptr<Instruction>& instr : block.instructions) {
+         if (instr->format == Format::SOPP && static_cast<SOPP_instruction*>(instr.get())->block != -1)
+            program->statistics[statistic_branches]++;
+
+         if (instr->opcode == aco_opcode::p_constaddr)
+            program->statistics[statistic_instructions] += 2;
+
+         if (instr->isVMEM() && !instr->operands.empty()) {
+            vmem_clause_res.insert(instr->operands[0].getTemp());
+         } else {
+            program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
+            vmem_clause_res.clear();
+         }
+
+         if (instr->format == Format::SMEM && !instr->operands.empty()) {
+            if (instr->operands[0].size() == 2)
+               smem_clause_res.insert(Temp(0, s2));
+            else
+               smem_clause_res.insert(instr->operands[0].getTemp());
+         } else {
+            program->statistics[statistic_smem_clauses] += smem_clause_res.size();
+            smem_clause_res.clear();
+          }
+
+         /* TODO: this incorrectly assumes instructions always take 4 cycles */
+         /* assume loops execute 4 times (TODO: it would be nice to be able to consider loop unrolling) */
+         unsigned iter = 1 << (block.loop_nest_depth * 2);
+         program->statistics[statistic_cycles] += 4 * iter;
+      }
+
+      program->statistics[statistic_vmem_clauses] += vmem_clause_res.size();
+      program->statistics[statistic_smem_clauses] += smem_clause_res.size();
+   }
+}
+
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code)
+{
+   program->statistics[aco::statistic_hash] = util_hash_crc32(code.data(), code.size() * 4);
+}
+
+}
index 8a0ebb0befc5f71bb1271243cb958a32aeee489d..44b56baab4c21a0283797ab956d7a101f2085589 100644 (file)
@@ -76,8 +76,9 @@ libaco_files = files(
   'aco_print_asm.cpp',
   'aco_print_ir.cpp',
   'aco_scheduler.cpp',
-  'aco_ssa_elimination.cpp',
   'aco_spill.cpp',
+  'aco_ssa_elimination.cpp',
+  'aco_statistics.cpp',
   'aco_util.h',
   'aco_validate.cpp',
 )