Added few more stubs so that control reaches to DestroyDevice().
[mesa.git] / src / intel / perf / gen_perf.py
index ef3aca64f2e6de20ad5dceba1e3b0054486aba6f..0d0aae90b5f3b089434dd98d7b3741c4d9119a3d 100644 (file)
@@ -24,7 +24,7 @@ import os
 import sys
 import textwrap
 
-import xml.etree.cElementTree as et
+import xml.etree.ElementTree as et
 
 hashed_funcs = {}
 
@@ -105,7 +105,11 @@ def emit_uadd(tmp_id, args):
 def emit_udiv(tmp_id, args):
     c("uint64_t tmp{0} = {1};".format(tmp_id, args[1]))
     c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0]))
-    c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
+    if args[0].isdigit():
+        assert int(args[0]) > 0
+        c("uint64_t tmp{0} = tmp{2} / tmp{1};".format(tmp_id + 2, tmp_id + 1, tmp_id))
+    else:
+        c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
     return tmp_id + 3
 
 def emit_umul(tmp_id, args):
@@ -181,7 +185,10 @@ hw_vars["$EuSlicesTotalCount"] = "perf->sys_vars.n_eu_slices"
 hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
 hw_vars["$EuThreadsCount"] = "perf->sys_vars.eu_threads_count"
 hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
+# subslice_mask is interchangeable with subslice/dual-subslice since Gen12+
+# only has dual subslices which can be assimilated with 16EUs subslices.
 hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
+hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
 hw_vars["$GpuTimestampFrequency"] = "perf->sys_vars.timestamp_frequency"
 hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
 hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
@@ -278,7 +285,7 @@ def output_counter_read(gen, set, counter):
         read_eq = counter.get('equation')
 
         c("static " + ret_type)
-        c(counter.read_sym + "(UNUSED struct gen_perf *perf,\n")
+        c(counter.read_sym + "(UNUSED struct gen_perf_config *perf,\n")
         c_indent(len(counter.read_sym) + 1)
         c("const struct gen_perf_query_info *query,\n")
         c("const uint64_t *accumulator)\n")
@@ -313,7 +320,7 @@ def output_counter_max(gen, set, counter):
             ret_type = "uint64_t"
 
         c("static " + ret_type)
-        c(counter.max_sym() + "(struct gen_perf *perf)\n")
+        c(counter.max_sym() + "(struct gen_perf_config *perf)\n")
         c("{")
         c_indent(3)
         output_rpn_equation_code(set, counter, max_eq)
@@ -350,6 +357,10 @@ def output_availability(set, availability, counter_name):
         c_outdent(4)
 
 
+def output_units(unit):
+    return unit.replace(' ', '_').upper()
+
+
 def output_counter_report(set, counter, current_offset):
     data_type = counter.get('data_type')
     data_type_uc = data_type.upper()
@@ -375,8 +386,11 @@ def output_counter_report(set, counter, current_offset):
     c("counter->oa_counter_read_" + data_type + " = " + set.read_funcs[counter.get('symbol_name')] + ";\n")
     c("counter->name = \"" + counter.get('name') + "\";\n")
     c("counter->desc = \"" + counter.get('description') + "\";\n")
+    c("counter->symbol_name = \"" + counter.get('symbol_name') + "\";\n")
+    c("counter->category = \"" + counter.get('mdapi_group') + "\";\n")
     c("counter->type = GEN_PERF_COUNTER_TYPE_" + semantic_type_uc + ";\n")
     c("counter->data_type = GEN_PERF_COUNTER_DATA_TYPE_" + data_type_uc + ";\n")
+    c("counter->units = GEN_PERF_COUNTER_UNITS_" + output_units(counter.get('units')) + ";\n")
     c("counter->raw_max = " + set.max_values[counter.get('symbol_name')] + ";\n")
 
     current_offset = pot_align(current_offset, sizeof(c_type))
@@ -410,6 +424,7 @@ def compute_register_lengths(set):
 
 def generate_register_configs(set):
     register_configs = set.findall('register_config')
+
     for register_config in register_configs:
         t = register_types[register_config.get('type')]
 
@@ -418,9 +433,15 @@ def generate_register_configs(set):
             output_availability(set, availability, register_config.get('type') + ' register config')
             c_indent(3)
 
-        for register in register_config.findall('register'):
-            c("query->%s[query->n_%s++] = (struct gen_perf_query_register_prog) { .reg = %s, .val = %s };" %
-              (t, t, register.get('address'), register.get('value')))
+        registers = register_config.findall('register')
+        c("static const struct gen_perf_query_register_prog %s[] = {" % t)
+        c_indent(3)
+        for register in registers:
+            c("{ .reg = %s, .val = %s }," % (register.get('address'), register.get('value')))
+        c_outdent(3)
+        c("};")
+        c("query->config.%s = %s;" % (t, t))
+        c("query->config.n_%s = ARRAY_SIZE(%s);" % (t, t))
 
         if availability:
             c_outdent(3)
@@ -611,7 +632,7 @@ def main():
     h(textwrap.dedent("""\
         #pragma once
 
-        struct gen_perf;
+        struct gen_perf_config;
 
         """))
 
@@ -623,6 +644,7 @@ def main():
         #include <drm-uapi/i915_drm.h>
 
         #include "util/hash_table.h"
+        #include "util/ralloc.h"
 
         """))
 
@@ -652,60 +674,45 @@ def main():
             counters = set.counters
 
             c("\n")
-            register_lengths = compute_register_lengths(set);
-            for reg_type, reg_length in register_lengths.items():
-                c("static struct gen_perf_query_register_prog {0}_{1}_{2}[{3}];".format(gen.chipset,
-                                                                                        set.underscore_name,
-                                                                                        reg_type, reg_length))
-
-            c("\nstatic struct gen_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(gen.chipset, set.underscore_name, len(counters)))
-            c("static struct gen_perf_query_info " + gen.chipset + "_" + set.underscore_name + "_query = {\n")
+            c("\nstatic void\n")
+            c("{0}_register_{1}_counter_query(struct gen_perf_config *perf)\n".format(gen.chipset, set.underscore_name))
+            c("{\n")
             c_indent(3)
 
-            c(".kind = GEN_PERF_QUERY_TYPE_OA,\n")
-            c(".name = \"" + set.name + "\",\n")
-            c(".guid = \"" + set.hw_config_guid + "\",\n")
+            c("struct gen_perf_query_info *query = rzalloc(perf, struct gen_perf_query_info);\n")
+            c("\n")
+            c("query->kind = GEN_PERF_QUERY_TYPE_OA;\n")
+            c("query->name = \"" + set.name + "\";\n")
+            c("query->symbol_name = \"" + set.symbol_name + "\";\n")
+            c("query->guid = \"" + set.hw_config_guid + "\";\n")
 
-            c(".counters = {0}_{1}_query_counters,".format(gen.chipset, set.underscore_name))
-            c(".n_counters = 0,")
-            c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */")
+            c("query->counters = rzalloc_array(query, struct gen_perf_query_counter, %u);" % len(counters))
+            c("query->n_counters = 0;")
+            c("query->oa_metrics_set_id = 0; /* determined at runtime, via sysfs */")
 
             if gen.chipset == "hsw":
                 c(textwrap.dedent("""\
-                    .oa_format = I915_OA_FORMAT_A45_B8_C8,
-
+                    query->oa_format = I915_OA_FORMAT_A45_B8_C8;
                     /* Accumulation buffer offsets... */
-                    .gpu_time_offset = 0,
-                    .a_offset = 1,
-                    .b_offset = 46,
-                    .c_offset = 54,
+                    query->gpu_time_offset = 0;
+                    query->a_offset = 1;
+                    query->b_offset = 46;
+                    query->c_offset = 54;
                 """))
             else:
                 c(textwrap.dedent("""\
-                    .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,
-
+                    query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
                     /* Accumulation buffer offsets... */
-                    .gpu_time_offset = 0,
-                    .gpu_clock_offset = 1,
-                    .a_offset = 2,
-                    .b_offset = 38,
-                    .c_offset = 46,
+                    query->gpu_time_offset = 0;
+                    query->gpu_clock_offset = 1;
+                    query->a_offset = 2;
+                    query->b_offset = 38;
+                    query->c_offset = 46;
                 """))
 
-            for reg_type, reg_length in register_lengths.items():
-                c(".{0} = {1}_{2}_{3},".format(reg_type, gen.chipset, set.underscore_name, reg_type))
-                c(".n_{0} = 0, /* Determined at runtime */".format(reg_type))
 
-            c_outdent(3)
-            c("};\n")
-
-            c("\nstatic void\n")
-            c("{0}_register_{1}_counter_query(struct gen_perf *perf)\n".format(gen.chipset, set.underscore_name))
-            c("{\n")
-            c_indent(3)
-
-            c("static struct gen_perf_query_info *query = &" + gen.chipset + "_" + set.underscore_name + "_query;\n")
-            c("struct gen_perf_query_counter *counter;\n")
+            c("\n")
+            c("struct gen_perf_query_counter *counter = query->counters;\n")
 
             c("\n")
             c("/* Note: we're assuming there can't be any variation in the definition ")
@@ -731,10 +738,10 @@ def main():
             c_outdent(3)
             c("}\n")
 
-        h("void gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf);\n")
+        h("void gen_oa_register_queries_" + gen.chipset + "(struct gen_perf_config *perf);\n")
 
         c("\nvoid")
-        c("gen_oa_register_queries_" + gen.chipset + "(struct gen_perf *perf)")
+        c("gen_oa_register_queries_" + gen.chipset + "(struct gen_perf_config *perf)")
         c("{")
         c_indent(3)