i965: Calculate thread_count in brw_alloc_stage_scratch
[mesa.git] / src / mesa / drivers / dri / i965 / brw_oa.py
1 # Copyright (c) 2015-2017 Intel Corporation
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the "Software"),
5 # to deal in the Software without restriction, including without limitation
6 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
7 # and/or sell copies of the Software, and to permit persons to whom the
8 # Software is furnished to do so, subject to the following conditions:
9 #
10 # The above copyright notice and this permission notice (including the next
11 # paragraph) shall be included in all copies or substantial portions of the
12 # Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
21
22 import argparse
23 import os
24 import sys
25 import textwrap
26
27 import xml.etree.cElementTree as et
28
29 max_values = {}
30 read_funcs = {}
31
32 c_file = None
33 _c_indent = 0
34
35 def c(*args):
36 if c_file:
37 code = ' '.join(map(str,args))
38 for line in code.splitlines():
39 text = ''.rjust(_c_indent) + line
40 c_file.write(text.rstrip() + "\n")
41
42 # indented, but no trailing newline...
43 def c_line_start(code):
44 if c_file:
45 c_file.write(''.rjust(_c_indent) + code)
46 def c_raw(code):
47 if c_file:
48 c_file.write(code)
49
50 def c_indent(n):
51 global _c_indent
52 _c_indent = _c_indent + n
53 def c_outdent(n):
54 global _c_indent
55 _c_indent = _c_indent - n
56
57 header_file = None
58 _h_indent = 0
59
60 def h(*args):
61 if header_file:
62 code = ' '.join(map(str,args))
63 for line in code.splitlines():
64 text = ''.rjust(_h_indent) + line
65 header_file.write(text.rstrip() + "\n")
66
67 def h_indent(n):
68 global _c_indent
69 _h_indent = _h_indent + n
70 def h_outdent(n):
71 global _c_indent
72 _h_indent = _h_indent - n
73
74
75 def emit_fadd(tmp_id, args):
76 c("double tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
77 return tmp_id + 1
78
79 # Be careful to check for divide by zero...
80 def emit_fdiv(tmp_id, args):
81 c("double tmp{0} = {1};".format(tmp_id, args[1]))
82 c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
83 c("double tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
84 return tmp_id + 3
85
86 def emit_fmax(tmp_id, args):
87 c("double tmp{0} = {1};".format(tmp_id, args[1]))
88 c("double tmp{0} = {1};".format(tmp_id + 1, args[0]))
89 c("double tmp{0} = MAX(tmp{1}, tmp{2});".format(tmp_id + 2, tmp_id, tmp_id + 1))
90 return tmp_id + 3
91
92 def emit_fmul(tmp_id, args):
93 c("double tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
94 return tmp_id + 1
95
96 def emit_fsub(tmp_id, args):
97 c("double tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
98 return tmp_id + 1
99
100 def emit_read(tmp_id, args):
101 type = args[1].lower()
102 c("uint64_t tmp{0} = accumulator[query->{1}_offset + {2}];".format(tmp_id, type, args[0]))
103 return tmp_id + 1
104
105 def emit_uadd(tmp_id, args):
106 c("uint64_t tmp{0} = {1} + {2};".format(tmp_id, args[1], args[0]))
107 return tmp_id + 1
108
109 # Be careful to check for divide by zero...
110 def emit_udiv(tmp_id, args):
111 c("uint64_t tmp{0} = {1};".format(tmp_id, args[1]))
112 c("uint64_t tmp{0} = {1};".format(tmp_id + 1, args[0]))
113 c("uint64_t tmp{0} = tmp{1} ? tmp{2} / tmp{1} : 0;".format(tmp_id + 2, tmp_id + 1, tmp_id))
114 return tmp_id + 3
115
116 def emit_umul(tmp_id, args):
117 c("uint64_t tmp{0} = {1} * {2};".format(tmp_id, args[1], args[0]))
118 return tmp_id + 1
119
120 def emit_usub(tmp_id, args):
121 c("uint64_t tmp{0} = {1} - {2};".format(tmp_id, args[1], args[0]))
122 return tmp_id + 1
123
124 def emit_umin(tmp_id, args):
125 c("uint64_t tmp{0} = MIN({1}, {2});".format(tmp_id, args[1], args[0]))
126 return tmp_id + 1
127
128 ops = {}
129 # (n operands, emitter)
130 ops["FADD"] = (2, emit_fadd)
131 ops["FDIV"] = (2, emit_fdiv)
132 ops["FMAX"] = (2, emit_fmax)
133 ops["FMUL"] = (2, emit_fmul)
134 ops["FSUB"] = (2, emit_fsub)
135 ops["READ"] = (2, emit_read)
136 ops["UADD"] = (2, emit_uadd)
137 ops["UDIV"] = (2, emit_udiv)
138 ops["UMUL"] = (2, emit_umul)
139 ops["USUB"] = (2, emit_usub)
140 ops["UMIN"] = (2, emit_umin)
141
142 def brkt(subexp):
143 if " " in subexp:
144 return "(" + subexp + ")"
145 else:
146 return subexp
147
148 def splice_bitwise_and(args):
149 return brkt(args[1]) + " & " + brkt(args[0])
150
151 def splice_logical_and(args):
152 return brkt(args[1]) + " && " + brkt(args[0])
153
154 def splice_ult(args):
155 return brkt(args[1]) + " < " + brkt(args[0])
156
157 def splice_ugte(args):
158 return brkt(args[1]) + " >= " + brkt(args[0])
159
160 exp_ops = {}
161 # (n operands, splicer)
162 exp_ops["AND"] = (2, splice_bitwise_and)
163 exp_ops["UGTE"] = (2, splice_ugte)
164 exp_ops["ULT"] = (2, splice_ult)
165 exp_ops["&&"] = (2, splice_logical_and)
166
167
168 hw_vars = {}
169 hw_vars["$EuCoresTotalCount"] = "brw->perfquery.sys_vars.n_eus"
170 hw_vars["$EuSlicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_slices"
171 hw_vars["$EuSubslicesTotalCount"] = "brw->perfquery.sys_vars.n_eu_sub_slices"
172 hw_vars["$EuThreadsCount"] = "brw->perfquery.sys_vars.eu_threads_count"
173 hw_vars["$SliceMask"] = "brw->perfquery.sys_vars.slice_mask"
174 hw_vars["$SubsliceMask"] = "brw->perfquery.sys_vars.subslice_mask"
175 hw_vars["$GpuTimestampFrequency"] = "brw->perfquery.sys_vars.timestamp_frequency"
176 hw_vars["$GpuMinFrequency"] = "brw->perfquery.sys_vars.gt_min_freq"
177 hw_vars["$GpuMaxFrequency"] = "brw->perfquery.sys_vars.gt_max_freq"
178
179 def output_rpn_equation_code(set, counter, equation, counter_vars):
180 c("/* RPN equation: " + equation + " */")
181 tokens = equation.split()
182 stack = []
183 tmp_id = 0
184 tmp = None
185
186 for token in tokens:
187 stack.append(token)
188 while stack and stack[-1] in ops:
189 op = stack.pop()
190 argc, callback = ops[op]
191 args = []
192 for i in range(0, argc):
193 operand = stack.pop()
194 if operand[0] == "$":
195 if operand in hw_vars:
196 operand = hw_vars[operand]
197 elif operand in counter_vars:
198 reference = counter_vars[operand]
199 operand = read_funcs[operand[1:]] + "(brw, query, accumulator)"
200 else:
201 raise Exception("Failed to resolve variable " + operand + " in equation " + equation + " for " + set.get('name') + " :: " + counter.get('name'));
202 args.append(operand)
203
204 tmp_id = callback(tmp_id, args)
205
206 tmp = "tmp{0}".format(tmp_id - 1)
207 stack.append(tmp)
208
209 if len(stack) != 1:
210 raise Exception("Spurious empty rpn code for " + set.get('name') + " :: " +
211 counter.get('name') + ".\nThis is probably due to some unhandled RPN function, in the equation \"" +
212 equation + "\"")
213
214 value = stack[-1]
215
216 if value in hw_vars:
217 value = hw_vars[value]
218 if value in counter_vars:
219 value = read_funcs[value[1:]] + "(brw, query, accumulator)"
220
221 c("\nreturn " + value + ";")
222
223 def splice_rpn_expression(set, counter, expression):
224 tokens = expression.split()
225 stack = []
226
227 for token in tokens:
228 stack.append(token)
229 while stack and stack[-1] in exp_ops:
230 op = stack.pop()
231 argc, callback = exp_ops[op]
232 args = []
233 for i in range(0, argc):
234 operand = stack.pop()
235 if operand[0] == "$":
236 if operand in hw_vars:
237 operand = hw_vars[operand]
238 else:
239 raise Exception("Failed to resolve variable " + operand + " in expression " + expression + " for " + set.get('name') + " :: " + counter.get('name'));
240 args.append(operand)
241
242 subexp = callback(args)
243
244 stack.append(subexp)
245
246 if len(stack) != 1:
247 raise Exception("Spurious empty rpn expression for " + set.get('name') + " :: " +
248 counter.get('name') + ".\nThis is probably due to some unhandled RPN operation, in the expression \"" +
249 expression + "\"")
250
251 return stack[-1]
252
253 def output_counter_read(set, counter, counter_vars):
254 c("\n")
255 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name')))
256 ret_type = counter.get('data_type')
257 if ret_type == "uint64":
258 ret_type = "uint64_t"
259
260 c("static " + ret_type)
261 read_sym = "{0}__{1}__{2}__read".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name'))
262 c(read_sym + "(struct brw_context *brw,\n")
263 c_indent(len(read_sym) + 1)
264 c("const struct brw_perf_query_info *query,\n")
265 c("uint64_t *accumulator)\n")
266 c_outdent(len(read_sym) + 1)
267
268 c("{")
269 c_indent(3)
270
271 output_rpn_equation_code(set, counter, counter.get('equation'), counter_vars)
272
273 c_outdent(3)
274 c("}")
275
276 return read_sym
277
278 def output_counter_max(set, counter, counter_vars):
279 max_eq = counter.get('max_equation')
280
281 if not max_eq:
282 return "0; /* undefined */"
283
284 try:
285 val = float(max_eq)
286 return max_eq + ";"
287 except ValueError:
288 pass
289
290 # We can only report constant maximum values via INTEL_performance_query
291 for token in max_eq.split():
292 if token[0] == '$' and token not in hw_vars:
293 return "0; /* unsupported (varies over time) */"
294
295 c("\n")
296 c("/* {0} :: {1} */".format(set.get('name'), counter.get('name')))
297 ret_type = counter.get('data_type')
298 if ret_type == "uint64":
299 ret_type = "uint64_t"
300
301 c("static " + ret_type)
302 max_sym = "{0}__{1}__{2}__max".format(set.get('chipset').lower(), set.get('underscore_name'), counter.get('underscore_name'))
303 c(max_sym + "(struct brw_context *brw)\n")
304
305 c("{")
306 c_indent(3)
307
308 output_rpn_equation_code(set, counter, max_eq, counter_vars)
309
310 c_outdent(3)
311 c("}")
312
313 return max_sym + "(brw);"
314
315 c_type_sizes = { "uint32_t": 4, "uint64_t": 8, "float": 4, "double": 8, "bool": 4 }
316 def sizeof(c_type):
317 return c_type_sizes[c_type]
318
319 def pot_align(base, pot_alignment):
320 return (base + pot_alignment - 1) & ~(pot_alignment - 1);
321
322 semantic_type_map = {
323 "duration": "raw",
324 "ratio": "event"
325 }
326
327 def output_counter_report(set, counter, current_offset):
328 data_type = counter.get('data_type')
329 data_type_uc = data_type.upper()
330 c_type = data_type
331
332 if "uint" in c_type:
333 c_type = c_type + "_t"
334
335 semantic_type = counter.get('semantic_type')
336 if semantic_type in semantic_type_map:
337 semantic_type = semantic_type_map[semantic_type]
338
339 semantic_type_uc = semantic_type.upper()
340
341 c("\n")
342
343 availability = counter.get('availability')
344 if availability:
345 expression = splice_rpn_expression(set, counter, availability)
346 lines = expression.split(' && ')
347 n_lines = len(lines)
348 if n_lines == 1:
349 c("if (" + lines[0] + ") {")
350 else:
351 c("if (" + lines[0] + " &&")
352 c_indent(4)
353 for i in range(1, (n_lines - 1)):
354 c(lines[i] + " &&")
355 c(lines[(n_lines - 1)] + ") {")
356 c_outdent(4)
357 c_indent(3)
358
359 c("counter = &query->counters[query->n_counters++];\n")
360 c("counter->oa_counter_read_" + data_type + " = " + read_funcs[counter.get('symbol_name')] + ";\n")
361 c("counter->name = \"" + counter.get('name') + "\";\n")
362 c("counter->desc = \"" + counter.get('description') + "\";\n")
363 c("counter->type = GL_PERFQUERY_COUNTER_" + semantic_type_uc + "_INTEL;\n")
364 c("counter->data_type = GL_PERFQUERY_COUNTER_DATA_" + data_type_uc + "_INTEL;\n")
365 c("counter->raw_max = " + max_values[counter.get('symbol_name')] + "\n")
366
367 current_offset = pot_align(current_offset, sizeof(c_type))
368 c("counter->offset = " + str(current_offset) + ";\n")
369 c("counter->size = sizeof(" + c_type + ");\n")
370
371 if availability:
372 c_outdent(3);
373 c("}")
374
375 return current_offset + sizeof(c_type)
376
377
378 def main():
379 global c_file
380 global header_file
381 global max_values
382 global read_funcs
383
384 parser = argparse.ArgumentParser()
385 parser.add_argument("xml", help="XML description of metrics")
386 parser.add_argument("--header", help="Header file to write")
387 parser.add_argument("--code", help="C file to write")
388 parser.add_argument("--chipset", help="Chipset to generate code for", required=True)
389
390 args = parser.parse_args()
391
392 chipset = args.chipset.lower()
393
394 if args.header:
395 header_file = open(args.header, 'w')
396
397 if args.code:
398 c_file = open(args.code, 'w')
399
400 tree = et.parse(args.xml)
401
402
403 copyright = textwrap.dedent("""\
404 /* Autogenerated file, DO NOT EDIT manually! generated by {}
405 *
406 * Copyright (c) 2015 Intel Corporation
407 *
408 * Permission is hereby granted, free of charge, to any person obtaining a
409 * copy of this software and associated documentation files (the "Software"),
410 * to deal in the Software without restriction, including without limitation
411 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
412 * and/or sell copies of the Software, and to permit persons to whom the
413 * Software is furnished to do so, subject to the following conditions:
414 *
415 * The above copyright notice and this permission notice (including the next
416 * paragraph) shall be included in all copies or substantial portions of the
417 * Software.
418 *
419 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
420 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
421 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
422 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
423 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
424 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
425 * DEALINGS IN THE SOFTWARE.
426 */
427
428 """).format(os.path.basename(__file__))
429
430 h(copyright)
431 h(textwrap.dedent("""\
432 #pragma once
433
434 struct brw_context;
435
436 """))
437
438 c(copyright)
439 c(textwrap.dedent("""\
440 #include <stdint.h>
441 #include <stdbool.h>
442
443 #include "util/hash_table.h"
444
445 """))
446
447 c("#include \"brw_oa_" + chipset + ".h\"")
448
449 c(textwrap.dedent("""\
450 #include "brw_context.h"
451 #include "brw_performance_query.h"
452
453
454 #define MIN(a, b) ((a < b) ? (a) : (b))
455 #define MAX(a, b) ((a > b) ? (a) : (b))
456
457 """))
458
459 for set in tree.findall(".//set"):
460 max_values = {}
461 read_funcs = {}
462 counter_vars = {}
463 counters = set.findall("counter")
464
465 assert set.get('chipset').lower() == chipset
466
467 for counter in counters:
468 empty_vars = {}
469 read_funcs[counter.get('symbol_name')] = output_counter_read(set, counter, counter_vars)
470 max_values[counter.get('symbol_name')] = output_counter_max(set, counter, empty_vars)
471 counter_vars["$" + counter.get('symbol_name')] = counter
472
473
474 c("\nstatic struct brw_perf_query_counter {0}_{1}_query_counters[{2}];\n".format(chipset, set.get('underscore_name'), len(counters)))
475 c("static struct brw_perf_query_info " + chipset + "_" + set.get('underscore_name') + "_query = {\n")
476 c_indent(3)
477
478 c(".kind = OA_COUNTERS,\n")
479 c(".name = \"" + set.get('name') + "\",\n")
480 c(".guid = \"" + set.get('hw_config_guid') + "\",\n")
481
482 c(".counters = {0}_{1}_query_counters,".format(chipset, set.get('underscore_name')))
483 c(".n_counters = 0,")
484 c(".oa_metrics_set_id = 0, /* determined at runtime, via sysfs */")
485
486 if chipset == "hsw":
487 c(textwrap.dedent("""\
488 .oa_format = I915_OA_FORMAT_A45_B8_C8,
489
490 /* Accumulation buffer offsets... */
491 .gpu_time_offset = 0,
492 .a_offset = 1,
493 .b_offset = 46,
494 .c_offset = 54,
495 """))
496 else:
497 c(textwrap.dedent("""\
498 .oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8,
499
500 /* Accumulation buffer offsets... */
501 .gpu_time_offset = 0,
502 .gpu_clock_offset = 1,
503 .a_offset = 2,
504 .b_offset = 38,
505 .c_offset = 46,
506 """))
507
508 c_outdent(3)
509 c("};\n")
510
511 c("\nstatic void\n")
512 c("register_" + set.get('underscore_name') + "_counter_query(struct brw_context *brw)\n")
513 c("{\n")
514 c_indent(3)
515
516 c("static struct brw_perf_query_info *query = &" + chipset + "_" + set.get('underscore_name') + "_query;\n")
517 c("struct brw_perf_query_counter *counter;\n")
518
519 c("\n")
520 c("/* Note: we're assuming there can't be any variation in the definition ")
521 c(" * of a query between contexts so it's ok to describe a query within a ")
522 c(" * global variable which only needs to be initialized once... */")
523 c("\nif (!query->data_size) {")
524 c_indent(3)
525
526 offset = 0
527 for counter in counters:
528 offset = output_counter_report(set, counter, offset)
529
530
531 c("\nquery->data_size = counter->offset + counter->size;\n")
532
533 c_outdent(3)
534 c("}");
535
536 c("\n_mesa_hash_table_insert(brw->perfquery.oa_metrics_table, query->guid, query);")
537
538 c_outdent(3)
539 c("}\n")
540
541 h("void brw_oa_register_queries_" + chipset + "(struct brw_context *brw);\n")
542
543 c("\nvoid")
544 c("brw_oa_register_queries_" + chipset + "(struct brw_context *brw)")
545 c("{")
546 c_indent(3)
547
548 for set in tree.findall(".//set"):
549 c("register_" + set.get('underscore_name') + "_counter_query(brw);")
550
551 c_outdent(3)
552 c("}")
553
554
555 if __name__ == '__main__':
556 main()