1 # Copyright (c) 2015-2017 Intel Corporation
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the "Software"),
5 # to deal in the Software without restriction, including without limitation
6 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 # and/or sell copies of the Software, and to permit persons to whom the
8 # Software is furnished to do so, subject to the following conditions:
10 # The above copyright notice and this permission notice (including the next
11 # paragraph) shall be included in all copies or substantial portions of the
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 import xml
.etree
.cElementTree
as et
37 code
= ' '.join(map(str,args
))
38 for line
in code
.splitlines():
39 text
= ''.rjust(_c_indent
) + line
40 c_file
.write(text
.rstrip() + "\n")
42 # indented, but no trailing newline...
43 def c_line_start(code
):
45 c_file
.write(''.rjust(_c_indent
) + code
)
52 _c_indent
= _c_indent
+ n
55 _c_indent
= _c_indent
- n
62 code
= ' '.join(map(str,args
))
63 for line
in code
.splitlines():
64 text
= ''.rjust(_h_indent
) + line
65 header_file
.write(text
.rstrip() + "\n")
69 _h_indent
= _h_indent
+ n
72 _h_indent
= _h_indent
- n
75 def emit_fadd(tmp_id
, args
):
76 c("double tmp{0} = {1} + {2};".format(tmp_id
, args
[1], args
[0]))
79 # Be careful to check for divide by zero...
80 def emit_fdiv(tmp_id
, args
):
81 c("double tmp{0} = {1};".format(tmp_id
, args
[1]))
82 c("double tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
83 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id
+ 2, tmp_id
+ 1, tmp_id
))
86 def emit_fmax(tmp_id
, args
):
87 c("double tmp{0} = {1};".format(tmp_id
, args
[1]))
88 c("double tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
89 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id
+ 2, tmp_id
, tmp_id
+ 1))
92 def emit_fmul(tmp_id
, args
):
93 c("double tmp{0} = {1} * {2};".format(tmp_id
, args
[1], args
[0]))
96 def emit_fsub(tmp_id
, args
):
97 c("double tmp{0} = {1} - {2};".format(tmp_id
, args
[1], args
[0]))
100 def emit_read(tmp_id
, args
):
101 type = args
[1].lower()
102 c("uint64_t tmp{0} = accumulator[query->{1}_offset + {2}];".format(tmp_id
, type, args
[0]))
105 def emit_uadd(tmp_id
, args
):
106 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id
, args
[1], args
[0]))
109 # Be careful to check for divide by zero...
110 def emit_udiv(tmp_id
, args
):
111 c("uint64_t tmp{0} = {1};".format(tmp_id
, args
[1]))
112 c("uint64_t tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
113 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id
+ 2, tmp_id
+ 1, tmp_id
))
116 def emit_umul(tmp_id
, args
):
117 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id
, args
[1], args
[0]))
120 def emit_usub(tmp_id
, args
):
121 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id
, args
[1], args
[0]))
124 def emit_umin(tmp_id
, args
):
125 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id
, args
[1], args
[0]))
129 # (n operands, emitter)
130 ops
["FADD"] = (2, emit_fadd
)
131 ops
["FDIV"] = (2, emit_fdiv
)
132 ops
["FMAX"] = (2, emit_fmax
)
133 ops
["FMUL"] = (2, emit_fmul
)
134 ops
["FSUB"] = (2, emit_fsub
)
135 ops
["READ"] = (2, emit_read
)
136 ops
["UADD"] = (2, emit_uadd
)
137 ops
["UDIV"] = (2, emit_udiv
)
138 ops
["UMUL"] = (2, emit_umul
)
139 ops
["USUB"] = (2, emit_usub
)
140 ops
["UMIN"] = (2, emit_umin
)
144 return "(" + subexp
+ ")"
148 def splice_bitwise_and(args
):
149 return brkt(args
[1]) + " & " + brkt(args
[0])
151 def splice_logical_and(args
):
152 return brkt(args
[1]) + " && " + brkt(args
[0])
154 def splice_ult(args
):
155 return brkt(args
[1]) + " < " + brkt(args
[0])
157 def splice_ugte(args
):
158 return brkt(args
[1]) + " >= " + brkt(args
[0])
161 # (n operands, splicer)
162 exp_ops
["AND"] = (2, splice_bitwise_and
)
163 exp_ops
["UGTE"] = (2, splice_ugte
)
164 exp_ops
["ULT"] = (2, splice_ult
)
165 exp_ops
["&&"] = (2, splice_logical_and
)
169 hw_vars
["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus"
170 hw_vars
["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices"
171 hw_vars
["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices"
172 hw_vars
["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count"
173 hw_vars
["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask"
174 hw_vars
["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask"
175 hw_vars
["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency"
176 hw_vars
["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
177 hw_vars
["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
179 def output_rpn_equation_code(set, counter
, equation
, counter_vars
):
180 c("/* RPN equation: " + equation
+ " */")
181 tokens
= equation
.split()
188 while stack
and stack
[-1] in ops
:
190 argc
, callback
= ops
[op
]
192 for i
in range(0, argc
):
193 operand
= stack
.pop()
194 if operand
[0] == "$":
195 if operand
in hw_vars
:
196 operand
= hw_vars
[operand
]
197 elif operand
in counter_vars
:
198 reference
= counter_vars
[operand
]
199 operand
= read_funcs
[operand
[1:]] + "(brw, query, accumulator)"
201 raise Exception("Failed to resolve variable " + operand
+ " in equation " + equation
+ " for " + set.get('name') + " :: " + counter
.get('name'));
204 tmp_id
= callback(tmp_id
, args
)
206 tmp
= "tmp{0}".format(tmp_id
- 1)
210 raise Exception("Spurious empty rpn code for " + set.get('name') + " :: " +
211 counter
.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" +
217 value
= hw_vars
[value
]
218 if value
in counter_vars
:
219 value
= read_funcs
[value
[1:]] + "(brw, query, accumulator)"
221 c("\nreturn " + value
+ ";")
223 def splice_rpn_expression(set, counter
, expression
):
224 tokens
= expression
.split()
229 while stack
and stack
[-1] in exp_ops
:
231 argc
, callback
= exp_ops
[op
]
233 for i
in range(0, argc
):
234 operand
= stack
.pop()
235 if operand
[0] == "$":
236 if operand
in hw_vars
:
237 operand
= hw_vars
[operand
]
239 raise Exception("Failed to resolve variable " + operand
+ " in expression " + expression
+ " for " + set.get('name') + " :: " + counter
.get('name'));
242 subexp
= callback(args
)
247 raise Exception("Spurious empty rpn expression for " + set.get('name') + " :: " +
248 counter
.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" +
253 def output_counter_read(set, counter
, counter_vars
):
255 c("/* {0} :: {1} */".format(set.get('name'), counter
.get('name')))
256 ret_type
= counter
.get('data_type')
257 if ret_type
== "uint64":
258 ret_type
= "uint64_t"
260 c("static " + ret_type
)
261 read_sym
= "{0}__{1}__{2}__read".format(set.get('chipset').lower(), set.get('underscore_name'), counter
.get('underscore_name'))
262 c(read_sym
+ "(struct brw_context *brw,\n")
263 c_indent(len(read_sym
) + 1)
264 c("const struct brw_perf_query_info *query,\n")
265 c("uint64_t *accumulator)\n")
266 c_outdent(len(read_sym
) + 1)
271 output_rpn_equation_code(set, counter
, counter
.get('equation'), counter_vars
)
278 def output_counter_max(set, counter
, counter_vars
):
279 max_eq
= counter
.get('max_equation')
282 return "0; /* undefined */"
290 # We can only report constant maximum values via INTEL_performance_query
291 for token
in max_eq
.split():
292 if token
[0] == '$' and token
not in hw_vars
:
293 return "0; /* unsupported (varies over time) */"
296 c("/* {0} :: {1} */".format(set.get('name'), counter
.get('name')))
297 ret_type
= counter
.get('data_type')
298 if ret_type
== "uint64":
299 ret_type
= "uint64_t"
301 c("static " + ret_type
)
302 max_sym
= "{0}__{1}__{2}__max".format(set.get('chipset').lower(), set.get('underscore_name'), counter
.get('underscore_name'))
303 c(max_sym
+ "(struct brw_context *brw)\n")
308 output_rpn_equation_code(set, counter
, max_eq
, counter_vars
)
313 return max_sym
+ "(brw);"
315 c_type_sizes
= { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
317 return c_type_sizes
[c_type
]
319 def pot_align(base
, pot_alignment
):
320 return (base
+ pot_alignment
- 1) & ~
(pot_alignment
- 1);
322 semantic_type_map
= {
327 def output_counter_report(set, counter
, current_offset
):
328 data_type
= counter
.get('data_type')
329 data_type_uc
= data_type
.upper()
333 c_type
= c_type
+ "_t"
335 semantic_type
= counter
.get('semantic_type')
336 if semantic_type
in semantic_type_map
:
337 semantic_type
= semantic_type_map
[semantic_type
]
339 semantic_type_uc
= semantic_type
.upper()
343 availability
= counter
.get('availability')
345 expression
= splice_rpn_expression(set, counter
, availability
)
346 lines
= expression
.split(' && ')
349 c("if (" + lines
[0] + ") {")
351 c("if (" + lines
[0] + " &&")
353 for i
in range(1, (n_lines
- 1)):
355 c(lines
[(n_lines
- 1)] + ") {")
359 c("counter = &query->counters[query->n_counters++];\n")
360 c("counter->oa_counter_read_" + data_type
+ " = " + read_funcs
[counter
.get('symbol_name')] + ";\n")
361 c("counter->name = \"" + counter
.get('name') + "\";\n")
362 c("counter->desc = \"" + counter
.get('description') + "\";\n")
363 c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc
+ "_INTEL;\n")
364 c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc
+ "_INTEL;\n")
365 c("counter->raw_max = " + max_values
[counter
.get('symbol_name')] + "\n")
367 current_offset
= pot_align(current_offset
, sizeof(c_type
))
368 c("counter->offset = " + str(current_offset
) + ";\n")
369 c("counter->size = sizeof(" + c_type
+ ");\n")
375 return current_offset
+ sizeof(c_type
)
384 parser
= argparse
.ArgumentParser()
385 parser
.add_argument("xml", help="XML description of metrics")
386 parser
.add_argument("--header", help="Header file to write")
387 parser
.add_argument("--code", help="C file to write")
388 parser
.add_argument("--chipset", help="Chipset to generate code for", required
=True)
390 args
= parser
.parse_args()
392 chipset
= args
.chipset
.lower()
395 header_file
= open(args
.header
, 'w')
398 c_file
= open(args
.code
, 'w')
400 tree
= et
.parse(args
.xml
)
403 copyright
= textwrap
.dedent("""\
404 /* Autogenerated file, DO NOT EDIT manually! generated by {}
406 * Copyright (c) 2015 Intel Corporation
408 * Permission is hereby granted, free of charge, to any person obtaining a
409 * copy of this software and associated documentation files (the "Software"),
410 * to deal in the Software without restriction, including without limitation
411 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
412 * and/or sell copies of the Software, and to permit persons to whom the
413 * Software is furnished to do so, subject to the following conditions:
415 * The above copyright notice and this permission notice (including the next
416 * paragraph) shall be included in all copies or substantial portions of the
419 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
420 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
421 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
422 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
423 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
424 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
425 * DEALINGS IN THE SOFTWARE.
428 """).format(os
.path
.basename(__file__
))
431 h(textwrap
.dedent("""\
439 c(textwrap
.dedent("""\
443 #include "util/hash_table.h"
447 c("#include \"brw_oa_" + chipset
+ ".h\"")
449 c(textwrap
.dedent("""\
450 #include "brw_context.h"
451 #include "brw_performance_query.h"
454 #define MIN(a, b) ((a < b) ? (a) : (b))
455 #define MAX(a, b) ((a > b) ? (a) : (b))
459 for set in tree
.findall(".//set"):
463 counters
= set.findall("counter")
465 assert set.get('chipset').lower() == chipset
467 for counter
in counters
:
469 read_funcs
[counter
.get('symbol_name')] = output_counter_read(set, counter
, counter_vars
)
470 max_values
[counter
.get('symbol_name')] = output_counter_max(set, counter
, empty_vars
)
471 counter_vars
["$" + counter
.get('symbol_name')] = counter
474 c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset
, set.get('underscore_name'), len(counters
)))
475 c("static struct brw_perf_query_info " + chipset
+ "_" + set.get('underscore_name') + "_query = {\n")
478 c(".kind = OA_COUNTERS,\n")
479 c(".name = \"" + set.get('name') + "\",\n")
480 c(".guid = \"" + set.get('hw_config_guid') + "\",\n")
482 c(".counters = {0}_{1}_query_counters,".format(chipset
, set.get('underscore_name')))
483 c(".n_counters = 0,")
484 c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */")
487 c(textwrap
.dedent("""\
488 .oa_format = I915_OA_FORMAT_A45_B8_C8,
490 /* Accumulation buffer offsets... */
491 .gpu_time_offset = 0,
497 c(textwrap
.dedent("""\
498 .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,
500 /* Accumulation buffer offsets... */
501 .gpu_time_offset = 0,
502 .gpu_clock_offset = 1,
512 c("register_" + set.get('underscore_name') + "_counter_query(struct brw_context *brw)\n")
516 c("static struct brw_perf_query_info *query = &" + chipset
+ "_" + set.get('underscore_name') + "_query;\n")
517 c("struct brw_perf_query_counter *counter;\n")
520 c("/* Note: we're assuming there can't be any variation in the definition ")
521 c(" * of a query between contexts so it's ok to describe a query within a ")
522 c(" * global variable which only needs to be initialized once... */")
523 c("\nif (!query->data_size) {")
527 for counter
in counters
:
528 offset
= output_counter_report(set, counter
, offset
)
531 c("\nquery->data_size = counter->offset + counter->size;\n")
536 c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);")
541 h("void brw_oa_register_queries_" + chipset
+ "(struct brw_context *brw);\n")
544 c("brw_oa_register_queries_" + chipset
+ "(struct brw_context *brw)")
548 for set in tree
.findall(".//set"):
549 c("register_" + set.get('underscore_name') + "_counter_query(brw);")
555 if __name__
== '__main__':