i965: perf: cleanup detection of kernel support for loadable configs
[mesa.git] / src / mesa / drivers / dri / i965 / brw_oa.py
1 # Copyright (c) 2015-2017 Intel Corporation
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the "Software"),
5 # to deal in the Software without restriction, including without limitation
6 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 # and/or sell copies of the Software, and to permit persons to whom the
8 # Software is furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice (including the next
11 # paragraph) shall be included in all copies or substantial portions of the
12 # Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
21
22 import argparse
23 import os
24 import sys
25 import textwrap
26
27 import xml.etree.cElementTree as et
28
29 max_values = {}
30 read_funcs = {}
31
32 c_file = None
33 _c_indent = 0
34
35 def c(*args):
36 if c_file:
37 code = ' '.join(map(str,args))
38 for line in code.splitlines():
39 text = ''.rjust(_c_indent) + line
40 c_file.write(text.rstrip() + "\n")
41
42 # indented, but no trailing newline...
43 def c_line_start(code):
44 if c_file:
45 c_file.write(''.rjust(_c_indent) + code)
46 def c_raw(code):
47 if c_file:
48 c_file.write(code)
49
50 def c_indent(n):
51 global _c_indent
52 _c_indent = _c_indent + n
53 def c_outdent(n):
54 global _c_indent
55 _c_indent = _c_indent - n
56
57 header_file = None
58 _h_indent = 0
59
60 def h(*args):
61 if header_file:
62 code = ' '.join(map(str,args))
63 for line in code.splitlines():
64 text = ''.rjust(_h_indent) + line
65 header_file.write(text.rstrip() + "\n")
66
67 def h_indent(n):
68 global _c_indent
69 _h_indent = _h_indent + n
70 def h_outdent(n):
71 global _c_indent
72 _h_indent = _h_indent - n
73
74
75 def emit_fadd(tmp_id, args):
76 c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
77 return tmp_id + 1
78
79 # Be careful to check for divide by zero...
80 def emit_fdiv(tmp_id, args):
81 c("double tmp{0} = {1};".format(tmp_id, args[1]))
82 c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
83 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
84 return tmp_id + 3
85
86 def emit_fmax(tmp_id, args):
87 c("double tmp{0} = {1};".format(tmp_id, args[1]))
88 c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
89 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1))
90 return tmp_id + 3
91
92 def emit_fmul(tmp_id, args):
93 c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
94 return tmp_id + 1
95
96 def emit_fsub(tmp_id, args):
97 c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
98 return tmp_id + 1
99
100 def emit_read(tmp_id, args):
101 type = args[1].lower()
102 c("uint64_t tmp{0} = accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0]))
103 return tmp_id + 1
104
105 def emit_uadd(tmp_id, args):
106 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
107 return tmp_id + 1
108
109 # Be careful to check for divide by zero...
110 def emit_udiv(tmp_id, args):
111 c("uint64_t tmp{0} = {1};".format(tmp_id, args[1]))
112 c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0]))
113 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
114 return tmp_id + 3
115
116 def emit_umul(tmp_id, args):
117 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
118 return tmp_id + 1
119
120 def emit_usub(tmp_id, args):
121 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
122 return tmp_id + 1
123
124 def emit_umin(tmp_id, args):
125 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0]))
126 return tmp_id + 1
127
128 ops = {}
129 # (n operands, emitter)
130 ops["FADD"] = (2, emit_fadd)
131 ops["FDIV"] = (2, emit_fdiv)
132 ops["FMAX"] = (2, emit_fmax)
133 ops["FMUL"] = (2, emit_fmul)
134 ops["FSUB"] = (2, emit_fsub)
135 ops["READ"] = (2, emit_read)
136 ops["UADD"] = (2, emit_uadd)
137 ops["UDIV"] = (2, emit_udiv)
138 ops["UMUL"] = (2, emit_umul)
139 ops["USUB"] = (2, emit_usub)
140 ops["UMIN"] = (2, emit_umin)
141
142 def brkt(subexp):
143 if " " in subexp:
144 return "(" + subexp + ")"
145 else:
146 return subexp
147
148 def splice_bitwise_and(args):
149 return brkt(args[1]) + " & " + brkt(args[0])
150
151 def splice_logical_and(args):
152 return brkt(args[1]) + " && " + brkt(args[0])
153
154 def splice_ult(args):
155 return brkt(args[1]) + " < " + brkt(args[0])
156
157 def splice_ugte(args):
158 return brkt(args[1]) + " >= " + brkt(args[0])
159
160 exp_ops = {}
161 # (n operands, splicer)
162 exp_ops["AND"] = (2, splice_bitwise_and)
163 exp_ops["UGTE"] = (2, splice_ugte)
164 exp_ops["ULT"] = (2, splice_ult)
165 exp_ops["&&"] = (2, splice_logical_and)
166
167
168 hw_vars = {}
169 hw_vars["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus"
170 hw_vars["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices"
171 hw_vars["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices"
172 hw_vars["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count"
173 hw_vars["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask"
174 hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask"
175 hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency"
176 hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
177 hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
178 hw_vars["$SkuRevisionId"] = "brw->perfquery.sys_vars.revision"
179
180 def output_rpn_equation_code(set, counter, equation, counter_vars):
181 c("/* RPN equation: " + equation + " */")
182 tokens = equation.split()
183 stack = []
184 tmp_id = 0
185 tmp = None
186
187 for token in tokens:
188 stack.append(token)
189 while stack and stack[-1] in ops:
190 op = stack.pop()
191 argc, callback = ops[op]
192 args = []
193 for i in range(0, argc):
194 operand = stack.pop()
195 if operand[0] == "$":
196 if operand in hw_vars:
197 operand = hw_vars[operand]
198 elif operand in counter_vars:
199 reference = counter_vars[operand]
200 operand = read_funcs[operand[1:]] + "(brw, query, accumulator)"
201 else:
202 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.get('name') + " :: " + counter.get('name'));
203 args.append(operand)
204
205 tmp_id = callback(tmp_id, args)
206
207 tmp = "tmp{0}".format(tmp_id - 1)
208 stack.append(tmp)
209
210 if len(stack) != 1:
211 raise Exception("Spurious empty rpn code for " + set.get('name') + " :: " +
212 counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" +
213 equation + "\"")
214
215 value = stack[-1]
216
217 if value in hw_vars:
218 value = hw_vars[value]
219 if value in counter_vars:
220 value = read_funcs[value[1:]] + "(brw, query, accumulator)"
221
222 c("\nreturn " + value + ";")
223
224 def splice_rpn_expression(set, counter, expression):
225 tokens = expression.split()
226 stack = []
227
228 for token in tokens:
229 stack.append(token)
230 while stack and stack[-1] in exp_ops:
231 op = stack.pop()
232 argc, callback = exp_ops[op]
233 args = []
234 for i in range(0, argc):
235 operand = stack.pop()
236 if operand[0] == "$":
237 if operand in hw_vars:
238 operand = hw_vars[operand]
239 else:
240 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.get('name') + " :: " + counter.get('name'));
241 args.append(operand)
242
243 subexp = callback(args)
244
245 stack.append(subexp)
246
247 if len(stack) != 1:
248 raise Exception("Spurious empty rpn expression for " + set.get('name') + " :: " +
249 counter.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" +
250 expression + "\"")
251
252 return stack[-1]
253
254 def output_counter_read(set, counter, counter_vars):
255 c("\n")
256 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name')))
257 ret_type = counter.get('data_type')
258 if ret_type == "uint64":
259 ret_type = "uint64_t"
260
261 c("static " + ret_type)
262 read_sym = "{0}__{1}__{2}__read".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name'))
263 c(read_sym + "(struct brw_context *brw,\n")
264 c_indent(len(read_sym) + 1)
265 c("const struct brw_perf_query_info *query,\n")
266 c("uint64_t *accumulator)\n")
267 c_outdent(len(read_sym) + 1)
268
269 c("{")
270 c_indent(3)
271
272 output_rpn_equation_code(set, counter, counter.get('equation'), counter_vars)
273
274 c_outdent(3)
275 c("}")
276
277 return read_sym
278
279 def output_counter_max(set, counter, counter_vars):
280 max_eq = counter.get('max_equation')
281
282 if not max_eq:
283 return "0; /* undefined */"
284
285 try:
286 val = float(max_eq)
287 return max_eq + ";"
288 except ValueError:
289 pass
290
291 # We can only report constant maximum values via INTEL_performance_query
292 for token in max_eq.split():
293 if token[0] == '$' and token not in hw_vars:
294 return "0; /* unsupported (varies over time) */"
295
296 c("\n")
297 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name')))
298 ret_type = counter.get('data_type')
299 if ret_type == "uint64":
300 ret_type = "uint64_t"
301
302 c("static " + ret_type)
303 max_sym = "{0}__{1}__{2}__max".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name'))
304 c(max_sym + "(struct brw_context *brw)\n")
305
306 c("{")
307 c_indent(3)
308
309 output_rpn_equation_code(set, counter, max_eq, counter_vars)
310
311 c_outdent(3)
312 c("}")
313
314 return max_sym + "(brw);"
315
316 c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
317 def sizeof(c_type):
318 return c_type_sizes[c_type]
319
320 def pot_align(base, pot_alignment):
321 return (base + pot_alignment - 1) & ~(pot_alignment - 1);
322
323 semantic_type_map = {
324 "duration": "raw",
325 "ratio": "event"
326 }
327
328 def output_availability(set, availability, counter_name):
329 expression = splice_rpn_expression(set, counter_name, availability)
330 lines = expression.split(' && ')
331 n_lines = len(lines)
332 if n_lines == 1:
333 c("if (" + lines[0] + ") {")
334 else:
335 c("if (" + lines[0] + " &&")
336 c_indent(4)
337 for i in range(1, (n_lines - 1)):
338 c(lines[i] + " &&")
339 c(lines[(n_lines - 1)] + ") {")
340 c_outdent(4)
341
342
343 def output_counter_report(set, counter, current_offset):
344 data_type = counter.get('data_type')
345 data_type_uc = data_type.upper()
346 c_type = data_type
347
348 if "uint" in c_type:
349 c_type = c_type + "_t"
350
351 semantic_type = counter.get('semantic_type')
352 if semantic_type in semantic_type_map:
353 semantic_type = semantic_type_map[semantic_type]
354
355 semantic_type_uc = semantic_type.upper()
356
357 c("\n")
358
359 availability = counter.get('availability')
360 if availability:
361 output_availability(set, availability, counter.get('name'))
362 c_indent(3)
363
364 c("counter = &query->counters[query->n_counters++];\n")
365 c("counter->oa_counter_read_" + data_type + " = " + read_funcs[counter.get('symbol_name')] + ";\n")
366 c("counter->name = \"" + counter.get('name') + "\";\n")
367 c("counter->desc = \"" + counter.get('description') + "\";\n")
368 c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc + "_INTEL;\n")
369 c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc + "_INTEL;\n")
370 c("counter->raw_max = " + max_values[counter.get('symbol_name')] + "\n")
371
372 current_offset = pot_align(current_offset, sizeof(c_type))
373 c("counter->offset = " + str(current_offset) + ";\n")
374 c("counter->size = sizeof(" + c_type + ");\n")
375
376 if availability:
377 c_outdent(3);
378 c("}")
379
380 return current_offset + sizeof(c_type)
381
382
383 register_types = {
384 'FLEX': 'flex_regs',
385 'NOA': 'mux_regs',
386 'OA': 'b_counter_regs',
387 }
388
389 def compute_register_lengths(set):
390 register_lengths = {}
391 register_configs = set.findall('register_config')
392 for register_config in register_configs:
393 t = register_types[register_config.get('type')]
394 if t not in register_lengths:
395 register_lengths[t] = len(register_config.findall('register'))
396 else:
397 register_lengths[t] += len(register_config.findall('register'))
398
399 return register_lengths
400
401
402 def generate_register_configs(set):
403 register_configs = set.findall('register_config')
404 for register_config in register_configs:
405 t = register_types[register_config.get('type')]
406
407 availability = register_config.get('availability')
408 if availability:
409 output_availability(set, availability, register_config.get('type') + ' register config')
410 c_indent(3)
411
412 for register in register_config.findall('register'):
413 c("query->%s[query->n_%s++] = (struct brw_perf_query_register_prog) { .reg = %s, .val = %s };" %
414 (t, t, register.get('address'), register.get('value')))
415
416 if availability:
417 c_outdent(3)
418 c("}")
419 c("\n")
420
421
422 def main():
423 global c_file
424 global header_file
425 global max_values
426 global read_funcs
427
428 parser = argparse.ArgumentParser()
429 parser.add_argument("xml", help="XML description of metrics")
430 parser.add_argument("--header", help="Header file to write")
431 parser.add_argument("--code", help="C file to write")
432 parser.add_argument("--chipset", help="Chipset to generate code for", required=True)
433
434 args = parser.parse_args()
435
436 chipset = args.chipset.lower()
437
438 if args.header:
439 header_file = open(args.header, 'w')
440
441 if args.code:
442 c_file = open(args.code, 'w')
443
444 tree = et.parse(args.xml)
445
446
447 copyright = textwrap.dedent("""\
448 /* Autogenerated file, DO NOT EDIT manually! generated by {}
449 *
450 * Copyright (c) 2015 Intel Corporation
451 *
452 * Permission is hereby granted, free of charge, to any person obtaining a
453 * copy of this software and associated documentation files (the "Software"),
454 * to deal in the Software without restriction, including without limitation
455 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
456 * and/or sell copies of the Software, and to permit persons to whom the
457 * Software is furnished to do so, subject to the following conditions:
458 *
459 * The above copyright notice and this permission notice (including the next
460 * paragraph) shall be included in all copies or substantial portions of the
461 * Software.
462 *
463 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
464 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
465 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
466 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
467 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
468 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
469 * DEALINGS IN THE SOFTWARE.
470 */
471
472 """).format(os.path.basename(__file__))
473
474 h(copyright)
475 h(textwrap.dedent("""\
476 #pragma once
477
478 struct brw_context;
479
480 """))
481
482 c(copyright)
483 c(textwrap.dedent("""\
484 #include <stdint.h>
485 #include <stdbool.h>
486
487 #include "util/hash_table.h"
488
489 """))
490
491 c("#include \"brw_oa_" + chipset + ".h\"")
492
493 c(textwrap.dedent("""\
494 #include "brw_context.h"
495 #include "brw_performance_query.h"
496
497
498 #define MIN(a, b) ((a < b) ? (a) : (b))
499 #define MAX(a, b) ((a > b) ? (a) : (b))
500
501 """))
502
503 for set in tree.findall(".//set"):
504 max_values = {}
505 read_funcs = {}
506 counter_vars = {}
507 counters = set.findall("counter")
508
509 assert set.get('chipset').lower() == chipset
510
511 for counter in counters:
512 empty_vars = {}
513 read_funcs[counter.get('symbol_name')] = output_counter_read(set, counter, counter_vars)
514 max_values[counter.get('symbol_name')] = output_counter_max(set, counter, empty_vars)
515 counter_vars["$" + counter.get('symbol_name')] = counter
516
517 c("\n")
518 register_lengths = compute_register_lengths(set);
519 for reg_type, reg_length in register_lengths.iteritems():
520 c("static struct brw_perf_query_register_prog {0}_{1}_{2}[{3}];".format(chipset,
521 set.get('underscore_name'),
522 reg_type, reg_length))
523
524 c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), len(counters)))
525 c("static struct brw_perf_query_info " + chipset + "_" + set.get('underscore_name') + "_query = {\n")
526 c_indent(3)
527
528 c(".kind = OA_COUNTERS,\n")
529 c(".name = \"" + set.get('name') + "\",\n")
530 c(".guid = \"" + set.get('hw_config_guid') + "\",\n")
531
532 c(".counters = {0}_{1}_query_counters,".format(chipset, set.get('underscore_name')))
533 c(".n_counters = 0,")
534 c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */")
535
536 if chipset == "hsw":
537 c(textwrap.dedent("""\
538 .oa_format = I915_OA_FORMAT_A45_B8_C8,
539
540 /* Accumulation buffer offsets... */
541 .gpu_time_offset = 0,
542 .a_offset = 1,
543 .b_offset = 46,
544 .c_offset = 54,
545 """))
546 else:
547 c(textwrap.dedent("""\
548 .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,
549
550 /* Accumulation buffer offsets... */
551 .gpu_time_offset = 0,
552 .gpu_clock_offset = 1,
553 .a_offset = 2,
554 .b_offset = 38,
555 .c_offset = 46,
556 """))
557
558 for reg_type, reg_length in register_lengths.iteritems():
559 c(".{0} = {1}_{2}_{3},".format(reg_type, chipset, set.get('underscore_name'), reg_type))
560 c(".n_{0} = 0, /* Determined at runtime */".format(reg_type))
561
562 c_outdent(3)
563 c("};\n")
564
565 c("\nstatic void\n")
566 c("register_" + set.get('underscore_name') + "_counter_query(struct brw_context *brw)\n")
567 c("{\n")
568 c_indent(3)
569
570 c("static struct brw_perf_query_info *query = &" + chipset + "_" + set.get('underscore_name') + "_query;\n")
571 c("struct brw_perf_query_counter *counter;\n")
572
573 c("\n")
574 c("/* Note: we're assuming there can't be any variation in the definition ")
575 c(" * of a query between contexts so it's ok to describe a query within a ")
576 c(" * global variable which only needs to be initialized once... */")
577 c("\nif (!query->data_size) {")
578 c_indent(3)
579
580 generate_register_configs(set)
581
582 offset = 0
583 for counter in counters:
584 offset = output_counter_report(set, counter, offset)
585
586
587 c("\nquery->data_size = counter->offset + counter->size;\n")
588
589 c_outdent(3)
590 c("}");
591
592 c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);")
593
594 c_outdent(3)
595 c("}\n")
596
597 h("void brw_oa_register_queries_" + chipset + "(struct brw_context *brw);\n")
598
599 c("\nvoid")
600 c("brw_oa_register_queries_" + chipset + "(struct brw_context *brw)")
601 c("{")
602 c_indent(3)
603
604 for set in tree.findall(".//set"):
605 c("register_" + set.get('underscore_name') + "_counter_query(brw);")
606
607 c_outdent(3)
608 c("}")
609
610
611 if __name__ == '__main__':
612 main()