1 # Copyright (c) 2015-2017 Intel Corporation
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the "Software"),
5 # to deal in the Software without restriction, including without limitation
6 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 # and/or sell copies of the Software, and to permit persons to whom the
8 # Software is furnished to do so, subject to the following conditions:
10 # The above copyright notice and this permission notice (including the next
11 # paragraph) shall be included in all copies or substantial portions of the
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 import xml
.etree
.cElementTree
as et
37 code
= ' '.join(map(str,args
))
38 for line
in code
.splitlines():
39 text
= ''.rjust(_c_indent
) + line
40 c_file
.write(text
.rstrip() + "\n")
42 # indented, but no trailing newline...
43 def c_line_start(code
):
45 c_file
.write(''.rjust(_c_indent
) + code
)
52 _c_indent
= _c_indent
+ n
55 _c_indent
= _c_indent
- n
62 code
= ' '.join(map(str,args
))
63 for line
in code
.splitlines():
64 text
= ''.rjust(_h_indent
) + line
65 header_file
.write(text
.rstrip() + "\n")
69 _h_indent
= _h_indent
+ n
72 _h_indent
= _h_indent
- n
75 def emit_fadd(tmp_id
, args
):
76 c("double tmp{0} = {1} + {2};".format(tmp_id
, args
[1], args
[0]))
79 # Be careful to check for divide by zero...
80 def emit_fdiv(tmp_id
, args
):
81 c("double tmp{0} = {1};".format(tmp_id
, args
[1]))
82 c("double tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
83 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id
+ 2, tmp_id
+ 1, tmp_id
))
86 def emit_fmax(tmp_id
, args
):
87 c("double tmp{0} = {1};".format(tmp_id
, args
[1]))
88 c("double tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
89 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id
+ 2, tmp_id
, tmp_id
+ 1))
92 def emit_fmul(tmp_id
, args
):
93 c("double tmp{0} = {1} * {2};".format(tmp_id
, args
[1], args
[0]))
96 def emit_fsub(tmp_id
, args
):
97 c("double tmp{0} = {1} - {2};".format(tmp_id
, args
[1], args
[0]))
100 def emit_read(tmp_id
, args
):
101 type = args
[1].lower()
102 c("uint64_t tmp{0} = accumulator[query->{1}_offset + {2}];".format(tmp_id
, type, args
[0]))
105 def emit_uadd(tmp_id
, args
):
106 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id
, args
[1], args
[0]))
109 # Be careful to check for divide by zero...
110 def emit_udiv(tmp_id
, args
):
111 c("uint64_t tmp{0} = {1};".format(tmp_id
, args
[1]))
112 c("uint64_t tmp{0} = {1};".format(tmp_id
+ 1, args
[0]))
113 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id
+ 2, tmp_id
+ 1, tmp_id
))
116 def emit_umul(tmp_id
, args
):
117 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id
, args
[1], args
[0]))
120 def emit_usub(tmp_id
, args
):
121 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id
, args
[1], args
[0]))
124 def emit_umin(tmp_id
, args
):
125 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id
, args
[1], args
[0]))
129 # (n operands, emitter)
130 ops
["FADD"] = (2, emit_fadd
)
131 ops
["FDIV"] = (2, emit_fdiv
)
132 ops
["FMAX"] = (2, emit_fmax
)
133 ops
["FMUL"] = (2, emit_fmul
)
134 ops
["FSUB"] = (2, emit_fsub
)
135 ops
["READ"] = (2, emit_read
)
136 ops
["UADD"] = (2, emit_uadd
)
137 ops
["UDIV"] = (2, emit_udiv
)
138 ops
["UMUL"] = (2, emit_umul
)
139 ops
["USUB"] = (2, emit_usub
)
140 ops
["UMIN"] = (2, emit_umin
)
144 return "(" + subexp
+ ")"
148 def splice_bitwise_and(args
):
149 return brkt(args
[1]) + " & " + brkt(args
[0])
151 def splice_logical_and(args
):
152 return brkt(args
[1]) + " && " + brkt(args
[0])
154 def splice_ult(args
):
155 return brkt(args
[1]) + " < " + brkt(args
[0])
157 def splice_ugte(args
):
158 return brkt(args
[1]) + " >= " + brkt(args
[0])
161 # (n operands, splicer)
162 exp_ops
["AND"] = (2, splice_bitwise_and
)
163 exp_ops
["UGTE"] = (2, splice_ugte
)
164 exp_ops
["ULT"] = (2, splice_ult
)
165 exp_ops
["&&"] = (2, splice_logical_and
)
169 hw_vars
["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus"
170 hw_vars
["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices"
171 hw_vars
["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices"
172 hw_vars
["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count"
173 hw_vars
["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask"
174 hw_vars
["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask"
175 hw_vars
["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency"
176 hw_vars
["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
177 hw_vars
["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
179 def output_rpn_equation_code(set, counter
, equation
, counter_vars
):
180 c("/* RPN equation: " + equation
+ " */")
181 tokens
= equation
.split()
188 while stack
and stack
[-1] in ops
:
190 argc
, callback
= ops
[op
]
192 for i
in range(0, argc
):
193 operand
= stack
.pop()
194 if operand
[0] == "$":
195 if operand
in hw_vars
:
196 operand
= hw_vars
[operand
]
197 elif operand
in counter_vars
:
198 reference
= counter_vars
[operand
]
199 operand
= read_funcs
[operand
[1:]] + "(brw, query, accumulator)"
201 raise Exception("Failed to resolve variable " + operand
+ " in equation " + equation
+ " for " + set.get('name') + " :: " + counter
.get('name'));
204 tmp_id
= callback(tmp_id
, args
)
206 tmp
= "tmp{0}".format(tmp_id
- 1)
210 raise Exception("Spurious empty rpn code for " + set.get('name') + " :: " +
211 counter
.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" +
217 value
= hw_vars
[value
];
219 c("\nreturn " + value
+ ";")
221 def splice_rpn_expression(set, counter
, expression
):
222 tokens
= expression
.split()
227 while stack
and stack
[-1] in exp_ops
:
229 argc
, callback
= exp_ops
[op
]
231 for i
in range(0, argc
):
232 operand
= stack
.pop()
233 if operand
[0] == "$":
234 if operand
in hw_vars
:
235 operand
= hw_vars
[operand
]
237 raise Exception("Failed to resolve variable " + operand
+ " in expression " + expression
+ " for " + set.get('name') + " :: " + counter
.get('name'));
240 subexp
= callback(args
)
245 raise Exception("Spurious empty rpn expression for " + set.get('name') + " :: " +
246 counter
.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" +
251 def output_counter_read(set, counter
, counter_vars
):
253 c("/* {0} :: {1} */".format(set.get('name'), counter
.get('name')))
254 ret_type
= counter
.get('data_type')
255 if ret_type
== "uint64":
256 ret_type
= "uint64_t"
258 c("static " + ret_type
)
259 read_sym
= "{0}__{1}__{2}__read".format(set.get('chipset').lower(), set.get('underscore_name'), counter
.get('underscore_name'))
260 c(read_sym
+ "(struct brw_context *brw,\n")
261 c_indent(len(read_sym
) + 1)
262 c("const struct brw_perf_query_info *query,\n")
263 c("uint64_t *accumulator)\n")
264 c_outdent(len(read_sym
) + 1)
269 output_rpn_equation_code(set, counter
, counter
.get('equation'), counter_vars
)
276 def output_counter_max(set, counter
, counter_vars
):
277 max_eq
= counter
.get('max_equation')
280 return "0; /* undefined */"
288 # We can only report constant maximum values via INTEL_performance_query
289 for token
in max_eq
.split():
290 if token
[0] == '$' and token
not in hw_vars
:
291 return "0; /* unsupported (varies over time) */"
294 c("/* {0} :: {1} */".format(set.get('name'), counter
.get('name')))
295 ret_type
= counter
.get('data_type')
296 if ret_type
== "uint64":
297 ret_type
= "uint64_t"
299 c("static " + ret_type
)
300 max_sym
= "{0}__{1}__{2}__max".format(set.get('chipset').lower(), set.get('underscore_name'), counter
.get('underscore_name'))
301 c(max_sym
+ "(struct brw_context *brw)\n")
306 output_rpn_equation_code(set, counter
, max_eq
, counter_vars
)
311 return max_sym
+ "(brw);"
313 c_type_sizes
= { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
315 return c_type_sizes
[c_type
]
317 def pot_align(base
, pot_alignment
):
318 return (base
+ pot_alignment
- 1) & ~
(pot_alignment
- 1);
320 semantic_type_map
= {
325 def output_counter_report(set, counter
, current_offset
):
326 data_type
= counter
.get('data_type')
327 data_type_uc
= data_type
.upper()
331 c_type
= c_type
+ "_t"
333 semantic_type
= counter
.get('semantic_type')
334 if semantic_type
in semantic_type_map
:
335 semantic_type
= semantic_type_map
[semantic_type
]
337 semantic_type_uc
= semantic_type
.upper()
341 availability
= counter
.get('availability')
343 expression
= splice_rpn_expression(set, counter
, availability
)
344 lines
= expression
.split(' && ')
347 c("if (" + lines
[0] + ") {")
349 c("if (" + lines
[0] + " &&")
351 for i
in range(1, (n_lines
- 1)):
353 c(lines
[(n_lines
- 1)] + ") {")
357 c("counter = &query->counters[query->n_counters++];\n")
358 c("counter->oa_counter_read_" + data_type
+ " = " + read_funcs
[counter
.get('symbol_name')] + ";\n")
359 c("counter->name = \"" + counter
.get('name') + "\";\n")
360 c("counter->desc = \"" + counter
.get('description') + "\";\n")
361 c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc
+ "_INTEL;\n")
362 c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc
+ "_INTEL;\n")
363 c("counter->raw_max = " + max_values
[counter
.get('symbol_name')] + "\n")
365 current_offset
= pot_align(current_offset
, sizeof(c_type
))
366 c("counter->offset = " + str(current_offset
) + ";\n")
367 c("counter->size = sizeof(" + c_type
+ ");\n")
373 return current_offset
+ sizeof(c_type
)
382 parser
= argparse
.ArgumentParser()
383 parser
.add_argument("xml", help="XML description of metrics")
384 parser
.add_argument("--header", help="Header file to write")
385 parser
.add_argument("--code", help="C file to write")
386 parser
.add_argument("--chipset", help="Chipset to generate code for", required
=True)
388 args
= parser
.parse_args()
390 chipset
= args
.chipset
.lower()
393 header_file
= open(args
.header
, 'w')
396 c_file
= open(args
.code
, 'w')
398 tree
= et
.parse(args
.xml
)
401 copyright
= textwrap
.dedent("""\
402 /* Autogenerated file, DO NOT EDIT manually! generated by {}
404 * Copyright (c) 2015 Intel Corporation
406 * Permission is hereby granted, free of charge, to any person obtaining a
407 * copy of this software and associated documentation files (the "Software"),
408 * to deal in the Software without restriction, including without limitation
409 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
410 * and/or sell copies of the Software, and to permit persons to whom the
411 * Software is furnished to do so, subject to the following conditions:
413 * The above copyright notice and this permission notice (including the next
414 * paragraph) shall be included in all copies or substantial portions of the
417 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
418 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
419 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
420 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
421 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
422 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
423 * DEALINGS IN THE SOFTWARE.
426 """).format(os
.path
.basename(__file__
))
429 h(textwrap
.dedent("""\
437 c(textwrap
.dedent("""\
441 #include "util/hash_table.h"
445 c("#include \"brw_oa_" + chipset
+ ".h\"")
447 c(textwrap
.dedent("""\
448 #include "brw_context.h"
449 #include "brw_performance_query.h"
452 #define MIN(a, b) ((a < b) ? (a) : (b))
453 #define MAX(a, b) ((a > b) ? (a) : (b))
457 for set in tree
.findall(".//set"):
461 counters
= set.findall("counter")
463 assert set.get('chipset').lower() == chipset
465 for counter
in counters
:
467 read_funcs
[counter
.get('symbol_name')] = output_counter_read(set, counter
, counter_vars
)
468 max_values
[counter
.get('symbol_name')] = output_counter_max(set, counter
, empty_vars
)
469 counter_vars
["$" + counter
.get('symbol_name')] = counter
472 c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset
, set.get('underscore_name'), len(counters
)))
473 c("static struct brw_perf_query_info " + chipset
+ "_" + set.get('underscore_name') + "_query = {\n")
476 c(".kind = OA_COUNTERS,\n")
477 c(".name = \"" + set.get('name') + "\",\n")
478 c(".guid = \"" + set.get('hw_config_guid') + "\",\n")
480 c(".counters = {0}_{1}_query_counters,".format(chipset
, set.get('underscore_name')))
481 c(".n_counters = 0,")
482 c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */")
485 c(textwrap
.dedent("""\
486 .oa_format = I915_OA_FORMAT_A45_B8_C8,
488 /* Accumulation buffer offsets... */
489 .gpu_time_offset = 0,
495 c(textwrap
.dedent("""\
496 .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,
498 /* Accumulation buffer offsets... */
499 .gpu_time_offset = 0,
500 .gpu_clock_offset = 1,
510 c("register_" + set.get('underscore_name') + "_counter_query(struct brw_context *brw)\n")
514 c("static struct brw_perf_query_info *query = &" + chipset
+ "_" + set.get('underscore_name') + "_query;\n")
515 c("struct brw_perf_query_counter *counter;\n")
518 c("/* Note: we're assuming there can't be any variation in the definition ")
519 c(" * of a query between contexts so it's ok to describe a query within a ")
520 c(" * global variable which only needs to be initialized once... */")
521 c("\nif (!query->data_size) {")
525 for counter
in counters
:
526 offset
= output_counter_report(set, counter
, offset
)
529 c("\nquery->data_size = counter->offset + counter->size;\n")
534 c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);")
539 h("void brw_oa_register_queries_" + chipset
+ "(struct brw_context *brw);\n")
542 c("brw_oa_register_queries_" + chipset
+ "(struct brw_context *brw)")
546 for set in tree
.findall(".//set"):
547 c("register_" + set.get('underscore_name') + "_counter_query(brw);")
553 if __name__
== '__main__':