2 * Copyright © 2020 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "radv_private.h"
30 #define SQTT_BUFFER_ALIGN_SHIFT 12
33 radv_thread_trace_get_info_offset(unsigned se
)
35 return sizeof(struct radv_thread_trace_info
) * se
;
39 radv_thread_trace_get_data_offset(struct radv_device
*device
, unsigned se
)
43 data_offset
= align64(sizeof(struct radv_thread_trace_info
) * 4,
44 1 << SQTT_BUFFER_ALIGN_SHIFT
);
45 data_offset
+= device
->thread_trace_buffer_size
* se
;
51 radv_thread_trace_get_info_va(struct radv_device
*device
, unsigned se
)
53 uint64_t va
= radv_buffer_get_va(device
->thread_trace_bo
);
54 return va
+ radv_thread_trace_get_info_offset(se
);
58 radv_thread_trace_get_data_va(struct radv_device
*device
, unsigned se
)
60 uint64_t va
= radv_buffer_get_va(device
->thread_trace_bo
);
61 return va
+ radv_thread_trace_get_data_offset(device
, se
);
65 radv_emit_thread_trace_start(struct radv_device
*device
,
66 struct radeon_cmdbuf
*cs
,
67 uint32_t queue_family_index
)
69 uint32_t shifted_size
= device
->thread_trace_buffer_size
>> SQTT_BUFFER_ALIGN_SHIFT
;
70 unsigned max_se
= device
->physical_device
->rad_info
.max_se
;
72 assert(device
->physical_device
->rad_info
.chip_class
>= GFX8
);
74 for (unsigned se
= 0; se
< max_se
; se
++) {
75 uint64_t data_va
= radv_thread_trace_get_data_va(device
, se
);
76 uint64_t shifted_va
= data_va
>> SQTT_BUFFER_ALIGN_SHIFT
;
78 /* Target SEx and SH0. */
79 radeon_set_uconfig_reg(cs
, R_030800_GRBM_GFX_INDEX
,
80 S_030800_SE_INDEX(se
) |
81 S_030800_SH_INDEX(0) |
82 S_030800_INSTANCE_BROADCAST_WRITES(1));
84 if (device
->physical_device
->rad_info
.chip_class
== GFX10
) {
85 /* Order seems important for the following 2 registers. */
86 radeon_set_privileged_config_reg(cs
, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE
,
87 S_008D04_SIZE(shifted_size
) |
88 S_008D04_BASE_HI(shifted_va
>> 32));
90 radeon_set_privileged_config_reg(cs
, R_008D00_SQ_THREAD_TRACE_BUF0_BASE
,
91 S_008D00_BASE_LO(shifted_va
));
93 radeon_set_privileged_config_reg(cs
, R_008D14_SQ_THREAD_TRACE_MASK
,
94 S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
97 S_008D14_SIMD_SEL(0));
99 radeon_set_privileged_config_reg(cs
, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK
,
100 S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC
|
101 V_008D18_REG_INCLUDE_SHDEC
|
102 V_008D18_REG_INCLUDE_GFXUDEC
|
103 V_008D18_REG_INCLUDE_CONTEXT
|
104 V_008D18_REG_INCLUDE_COMP
|
105 V_008D18_REG_INCLUDE_CONTEXT
|
106 V_008D18_REG_INCLUDE_CONFIG
) |
107 S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF
));
109 /* Should be emitted last (it enables thread traces). */
110 radeon_set_privileged_config_reg(cs
, R_008D1C_SQ_THREAD_TRACE_CTRL
,
112 S_008D1C_HIWATER(5) |
113 S_008D1C_UTIL_TIMER(1) |
114 S_008D1C_RT_FREQ(2) | /* 4096 clk */
115 S_008D1C_DRAW_EVENT_EN(1) |
116 S_008D1C_REG_STALL_EN(1) |
117 S_008D1C_SPI_STALL_EN(1) |
118 S_008D1C_SQ_STALL_EN(1) |
119 S_008D1C_REG_DROP_ON_STALL(0));
121 /* Order seems important for the following 4 registers. */
122 radeon_set_uconfig_reg(cs
, R_030CDC_SQ_THREAD_TRACE_BASE2
,
123 S_030CDC_ADDR_HI(shifted_va
>> 32));
125 radeon_set_uconfig_reg(cs
, R_030CC0_SQ_THREAD_TRACE_BASE
,
126 S_030CC0_ADDR(shifted_va
));
128 radeon_set_uconfig_reg(cs
, R_030CC4_SQ_THREAD_TRACE_SIZE
,
129 S_030CC4_SIZE(shifted_size
));
131 radeon_set_uconfig_reg(cs
, R_030CD4_SQ_THREAD_TRACE_CTRL
,
132 S_030CD4_RESET_BUFFER(1));
134 uint32_t thread_trace_mask
= S_030CC8_CU_SEL(2) |
136 S_030CC8_SIMD_EN(0xf) |
137 S_030CC8_VM_ID_MASK(0) |
138 S_030CC8_REG_STALL_EN(1) |
139 S_030CC8_SPI_STALL_EN(1) |
140 S_030CC8_SQ_STALL_EN(1);
142 if (device
->physical_device
->rad_info
.chip_class
< GFX9
) {
143 thread_trace_mask
|= S_030CC8_RANDOM_SEED(0xffff);
146 radeon_set_uconfig_reg(cs
, R_030CC8_SQ_THREAD_TRACE_MASK
,
149 /* Trace all tokens and registers. */
150 radeon_set_uconfig_reg(cs
, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK
,
151 S_030CCC_TOKEN_MASK(0xbfff) |
152 S_030CCC_REG_MASK(0xff) |
153 S_030CCC_REG_DROP_ON_STALL(0));
155 /* Enable SQTT perf counters for all CUs. */
156 radeon_set_uconfig_reg(cs
, R_030CD0_SQ_THREAD_TRACE_PERF_MASK
,
157 S_030CD0_SH0_MASK(0xffff) |
158 S_030CD0_SH1_MASK(0xffff));
160 radeon_set_uconfig_reg(cs
, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2
,
161 S_030CE0_INST_MASK(0xffffffff));
163 radeon_set_uconfig_reg(cs
, R_030CEC_SQ_THREAD_TRACE_HIWATER
,
164 S_030CEC_HIWATER(4));
166 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
167 /* Reset thread trace status errors. */
168 radeon_set_uconfig_reg(cs
, R_030CE8_SQ_THREAD_TRACE_STATUS
,
169 S_030CE8_UTC_ERROR(0));
172 /* Enable the thread trace mode. */
173 uint32_t thread_trace_mode
= S_030CD8_MASK_PS(1) |
174 S_030CD8_MASK_VS(1) |
175 S_030CD8_MASK_GS(1) |
176 S_030CD8_MASK_ES(1) |
177 S_030CD8_MASK_HS(1) |
178 S_030CD8_MASK_LS(1) |
179 S_030CD8_MASK_CS(1) |
180 S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
183 if (device
->physical_device
->rad_info
.chip_class
== GFX9
) {
184 /* Count SQTT traffic in TCC perf counters. */
185 thread_trace_mode
|= S_030CD8_TC_PERF_EN(1);
188 radeon_set_uconfig_reg(cs
, R_030CD8_SQ_THREAD_TRACE_MODE
,
193 /* Restore global broadcasting. */
194 radeon_set_uconfig_reg(cs
, R_030800_GRBM_GFX_INDEX
,
195 S_030800_SE_BROADCAST_WRITES(1) |
196 S_030800_SH_BROADCAST_WRITES(1) |
197 S_030800_INSTANCE_BROADCAST_WRITES(1));
199 /* Start the thread trace with a different event based on the queue. */
200 if (queue_family_index
== RADV_QUEUE_COMPUTE
&&
201 device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
202 radeon_set_sh_reg(cs
, R_00B878_COMPUTE_THREAD_TRACE_ENABLE
,
203 S_00B878_THREAD_TRACE_ENABLE(1));
205 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
206 radeon_emit(cs
, EVENT_TYPE(V_028A90_THREAD_TRACE_START
) | EVENT_INDEX(0));
210 static const uint32_t gfx8_thread_trace_info_regs
[] =
212 R_030CE4_SQ_THREAD_TRACE_WPTR
,
213 R_030CE8_SQ_THREAD_TRACE_STATUS
,
214 R_008E40_SQ_THREAD_TRACE_CNTR
,
217 static const uint32_t gfx9_thread_trace_info_regs
[] =
219 R_030CE4_SQ_THREAD_TRACE_WPTR
,
220 R_030CE8_SQ_THREAD_TRACE_STATUS
,
221 R_030CF0_SQ_THREAD_TRACE_CNTR
,
224 static const uint32_t gfx10_thread_trace_info_regs
[] =
226 R_008D10_SQ_THREAD_TRACE_WPTR
,
227 R_008D20_SQ_THREAD_TRACE_STATUS
,
228 R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR
,
232 radv_copy_thread_trace_info_regs(struct radv_device
*device
,
233 struct radeon_cmdbuf
*cs
,
236 const uint32_t *thread_trace_info_regs
= NULL
;
238 switch (device
->physical_device
->rad_info
.chip_class
) {
240 thread_trace_info_regs
= gfx10_thread_trace_info_regs
;
243 thread_trace_info_regs
= gfx9_thread_trace_info_regs
;
246 thread_trace_info_regs
= gfx8_thread_trace_info_regs
;
249 unreachable("Unsupported chip_class");
252 /* Get the VA where the info struct is stored for this SE. */
253 uint64_t info_va
= radv_thread_trace_get_info_va(device
, se_index
);
255 /* Copy back the info struct one DWORD at a time. */
256 for (unsigned i
= 0; i
< 3; i
++) {
257 radeon_emit(cs
, PKT3(PKT3_COPY_DATA
, 4, 0));
258 radeon_emit(cs
, COPY_DATA_SRC_SEL(COPY_DATA_PERF
) |
259 COPY_DATA_DST_SEL(COPY_DATA_TC_L2
) |
260 COPY_DATA_WR_CONFIRM
);
261 radeon_emit(cs
, thread_trace_info_regs
[i
] >> 2);
262 radeon_emit(cs
, 0); /* unused */
263 radeon_emit(cs
, (info_va
+ i
* 4));
264 radeon_emit(cs
, (info_va
+ i
* 4) >> 32);
269 radv_emit_thread_trace_stop(struct radv_device
*device
,
270 struct radeon_cmdbuf
*cs
,
271 uint32_t queue_family_index
)
273 unsigned max_se
= device
->physical_device
->rad_info
.max_se
;
275 assert(device
->physical_device
->rad_info
.chip_class
>= GFX8
);
277 /* Stop the thread trace with a different event based on the queue. */
278 if (queue_family_index
== RADV_QUEUE_COMPUTE
&&
279 device
->physical_device
->rad_info
.chip_class
>= GFX7
) {
280 radeon_set_sh_reg(cs
, R_00B878_COMPUTE_THREAD_TRACE_ENABLE
,
281 S_00B878_THREAD_TRACE_ENABLE(0));
283 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
284 radeon_emit(cs
, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP
) | EVENT_INDEX(0));
287 radeon_emit(cs
, PKT3(PKT3_EVENT_WRITE
, 0, 0));
288 radeon_emit(cs
, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH
) | EVENT_INDEX(0));
290 for (unsigned se
= 0; se
< max_se
; se
++) {
291 /* Target SEi and SH0. */
292 radeon_set_uconfig_reg(cs
, R_030800_GRBM_GFX_INDEX
,
293 S_030800_SE_INDEX(se
) |
294 S_030800_SH_INDEX(0) |
295 S_030800_INSTANCE_BROADCAST_WRITES(1));
297 if (device
->physical_device
->rad_info
.chip_class
== GFX10
) {
298 /* Make sure to wait for the trace buffer. */
299 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
300 radeon_emit(cs
, WAIT_REG_MEM_NOT_EQUAL
); /* wait until the register is equal to the reference value */
301 radeon_emit(cs
, R_008D20_SQ_THREAD_TRACE_STATUS
>> 2); /* register */
303 radeon_emit(cs
, 0); /* reference value */
304 radeon_emit(cs
, S_008D20_FINISH_DONE(1)); /* mask */
305 radeon_emit(cs
, 4); /* poll interval */
307 /* Disable the thread trace mode. */
308 radeon_set_privileged_config_reg(cs
, R_008D1C_SQ_THREAD_TRACE_CTRL
,
311 /* Wait for thread trace completion. */
312 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
313 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
); /* wait until the register is equal to the reference value */
314 radeon_emit(cs
, R_008D20_SQ_THREAD_TRACE_STATUS
>> 2); /* register */
316 radeon_emit(cs
, 0); /* reference value */
317 radeon_emit(cs
, S_008D20_BUSY(1)); /* mask */
318 radeon_emit(cs
, 4); /* poll interval */
320 /* Disable the thread trace mode. */
321 radeon_set_uconfig_reg(cs
, R_030CD8_SQ_THREAD_TRACE_MODE
,
324 /* Wait for thread trace completion. */
325 radeon_emit(cs
, PKT3(PKT3_WAIT_REG_MEM
, 5, 0));
326 radeon_emit(cs
, WAIT_REG_MEM_EQUAL
); /* wait until the register is equal to the reference value */
327 radeon_emit(cs
, R_030CE8_SQ_THREAD_TRACE_STATUS
>> 2); /* register */
329 radeon_emit(cs
, 0); /* reference value */
330 radeon_emit(cs
, S_030CE8_BUSY(1)); /* mask */
331 radeon_emit(cs
, 4); /* poll interval */
334 radv_copy_thread_trace_info_regs(device
, cs
, se
);
337 /* Restore global broadcasting. */
338 radeon_set_uconfig_reg(cs
, R_030800_GRBM_GFX_INDEX
,
339 S_030800_SE_BROADCAST_WRITES(1) |
340 S_030800_SH_BROADCAST_WRITES(1) |
341 S_030800_INSTANCE_BROADCAST_WRITES(1));
345 radv_emit_thread_trace_userdata(struct radeon_cmdbuf
*cs
,
346 const void *data
, uint32_t num_dwords
)
348 const uint32_t *dwords
= (uint32_t *)data
;
350 while (num_dwords
> 0) {
351 uint32_t count
= MIN2(num_dwords
, 2);
353 radeon_set_uconfig_reg_seq(cs
, R_030D08_SQ_THREAD_TRACE_USERDATA_2
, count
);
354 radeon_emit_array(cs
, dwords
, count
);
362 radv_emit_spi_config_cntl(struct radv_device
*device
,
363 struct radeon_cmdbuf
*cs
, bool enable
)
365 if (device
->physical_device
->rad_info
.chip_class
>= GFX9
) {
366 uint32_t spi_config_cntl
= S_031100_GPR_WRITE_PRIORITY(0x2c688) |
367 S_031100_EXP_PRIORITY_ORDER(3) |
368 S_031100_ENABLE_SQG_TOP_EVENTS(enable
) |
369 S_031100_ENABLE_SQG_BOP_EVENTS(enable
);
371 if (device
->physical_device
->rad_info
.chip_class
== GFX10
)
372 spi_config_cntl
|= S_031100_PS_PKR_PRIORITY_CNTL(3);
374 radeon_set_uconfig_reg(cs
, R_031100_SPI_CONFIG_CNTL
, spi_config_cntl
);
376 /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
377 radeon_set_privileged_config_reg(cs
, R_009100_SPI_CONFIG_CNTL
,
378 S_009100_ENABLE_SQG_TOP_EVENTS(enable
) |
379 S_009100_ENABLE_SQG_BOP_EVENTS(enable
));
384 radv_emit_wait_for_idle(struct radv_device
*device
,
385 struct radeon_cmdbuf
*cs
, int family
)
387 si_cs_emit_cache_flush(cs
, device
->physical_device
->rad_info
.chip_class
,
389 family
== RING_COMPUTE
&&
390 device
->physical_device
->rad_info
.chip_class
>= GFX7
,
391 (family
== RADV_QUEUE_COMPUTE
?
392 RADV_CMD_FLAG_CS_PARTIAL_FLUSH
:
393 (RADV_CMD_FLAG_CS_PARTIAL_FLUSH
| RADV_CMD_FLAG_PS_PARTIAL_FLUSH
)) |
394 RADV_CMD_FLAG_INV_ICACHE
|
395 RADV_CMD_FLAG_INV_SCACHE
|
396 RADV_CMD_FLAG_INV_VCACHE
|
397 RADV_CMD_FLAG_INV_L2
, 0);
401 radv_thread_trace_init_cs(struct radv_device
*device
)
403 struct radeon_winsys
*ws
= device
->ws
;
405 /* Thread trace start CS. */
406 for (int family
= 0; family
< 2; ++family
) {
407 device
->thread_trace_start_cs
[family
] = ws
->cs_create(ws
, family
);
409 case RADV_QUEUE_GENERAL
:
410 radeon_emit(device
->thread_trace_start_cs
[family
], PKT3(PKT3_CONTEXT_CONTROL
, 1, 0));
411 radeon_emit(device
->thread_trace_start_cs
[family
], CC0_UPDATE_LOAD_ENABLES(1));
412 radeon_emit(device
->thread_trace_start_cs
[family
], CC1_UPDATE_SHADOW_ENABLES(1));
414 case RADV_QUEUE_COMPUTE
:
415 radeon_emit(device
->thread_trace_start_cs
[family
], PKT3(PKT3_NOP
, 0, 0));
416 radeon_emit(device
->thread_trace_start_cs
[family
], 0);
420 radv_cs_add_buffer(ws
, device
->thread_trace_start_cs
[family
],
421 device
->thread_trace_bo
);
423 /* Make sure to wait-for-idle before starting SQTT. */
424 radv_emit_wait_for_idle(device
,
425 device
->thread_trace_start_cs
[family
],
428 /* Enable SQG events that collects thread trace data. */
429 radv_emit_spi_config_cntl(device
,
430 device
->thread_trace_start_cs
[family
],
433 radv_emit_thread_trace_start(device
,
434 device
->thread_trace_start_cs
[family
],
437 ws
->cs_finalize(device
->thread_trace_start_cs
[family
]);
440 /* Thread trace stop CS. */
441 for (int family
= 0; family
< 2; ++family
) {
442 device
->thread_trace_stop_cs
[family
] = ws
->cs_create(ws
, family
);
444 case RADV_QUEUE_GENERAL
:
445 radeon_emit(device
->thread_trace_stop_cs
[family
], PKT3(PKT3_CONTEXT_CONTROL
, 1, 0));
446 radeon_emit(device
->thread_trace_stop_cs
[family
], CC0_UPDATE_LOAD_ENABLES(1));
447 radeon_emit(device
->thread_trace_stop_cs
[family
], CC1_UPDATE_SHADOW_ENABLES(1));
449 case RADV_QUEUE_COMPUTE
:
450 radeon_emit(device
->thread_trace_stop_cs
[family
], PKT3(PKT3_NOP
, 0, 0));
451 radeon_emit(device
->thread_trace_stop_cs
[family
], 0);
455 radv_cs_add_buffer(ws
, device
->thread_trace_stop_cs
[family
],
456 device
->thread_trace_bo
);
458 /* Make sure to wait-for-idle before stopping SQTT. */
459 radv_emit_wait_for_idle(device
,
460 device
->thread_trace_stop_cs
[family
],
463 radv_emit_thread_trace_stop(device
,
464 device
->thread_trace_stop_cs
[family
],
467 /* Restore previous state by disabling SQG events. */
468 radv_emit_spi_config_cntl(device
,
469 device
->thread_trace_stop_cs
[family
],
472 ws
->cs_finalize(device
->thread_trace_stop_cs
[family
]);
477 radv_thread_trace_init_bo(struct radv_device
*device
)
479 struct radeon_winsys
*ws
= device
->ws
;
482 /* Compute total size of the thread trace BO for 4 SEs. */
483 size
= align64(sizeof(struct radv_thread_trace_info
) * 4,
484 1 << SQTT_BUFFER_ALIGN_SHIFT
);
485 size
+= device
->thread_trace_buffer_size
* 4;
487 device
->thread_trace_bo
= ws
->buffer_create(ws
, size
, 4096,
489 RADEON_FLAG_CPU_ACCESS
|
490 RADEON_FLAG_NO_INTERPROCESS_SHARING
|
491 RADEON_FLAG_ZERO_VRAM
,
492 RADV_BO_PRIORITY_SCRATCH
);
493 if (!device
->thread_trace_bo
)
496 device
->thread_trace_ptr
= ws
->buffer_map(device
->thread_trace_bo
);
497 if (!device
->thread_trace_ptr
)
504 radv_thread_trace_init(struct radv_device
*device
)
506 if (!radv_thread_trace_init_bo(device
))
509 radv_thread_trace_init_cs(device
);
514 radv_thread_trace_finish(struct radv_device
*device
)
516 struct radeon_winsys
*ws
= device
->ws
;
518 if (unlikely(device
->thread_trace_bo
))
519 ws
->buffer_destroy(device
->thread_trace_bo
);
521 for (unsigned i
= 0; i
< 2; i
++) {
522 if (device
->thread_trace_start_cs
[i
])
523 ws
->cs_destroy(device
->thread_trace_start_cs
[i
]);
524 if (device
->thread_trace_stop_cs
[i
])
525 ws
->cs_destroy(device
->thread_trace_stop_cs
[i
]);
530 radv_begin_thread_trace(struct radv_queue
*queue
)
532 int family
= queue
->queue_family_index
;
533 struct radeon_cmdbuf
*cs
= queue
->device
->thread_trace_start_cs
[family
];
534 return radv_queue_internal_submit(queue
, cs
);
538 radv_end_thread_trace(struct radv_queue
*queue
)
540 int family
= queue
->queue_family_index
;
541 struct radeon_cmdbuf
*cs
= queue
->device
->thread_trace_stop_cs
[family
];
542 return radv_queue_internal_submit(queue
, cs
);
546 radv_is_thread_trace_complete(struct radv_device
*device
,
547 const struct radv_thread_trace_info
*info
)
549 if (device
->physical_device
->rad_info
.chip_class
== GFX10
) {
550 /* GFX10 doesn't have THREAD_TRACE_CNTR but it reports the
551 * number of dropped bytes for all SEs via
552 * THREAD_TRACE_DROPPED_CNTR.
554 return info
->gfx10_dropped_cntr
== 0;
557 /* Otherwise, compare the current thread trace offset with the number
560 return info
->cur_offset
== info
->gfx9_write_counter
;
564 radv_get_expected_buffer_size(struct radv_device
*device
,
565 const struct radv_thread_trace_info
*info
)
567 if (device
->physical_device
->rad_info
.chip_class
== GFX10
) {
568 uint32_t dropped_cntr_per_se
= info
->gfx10_dropped_cntr
/ device
->physical_device
->rad_info
.max_se
;
569 return ((info
->cur_offset
* 32) + dropped_cntr_per_se
) / 1024;
572 return (info
->gfx9_write_counter
* 32) / 1024;
576 radv_get_thread_trace(struct radv_queue
*queue
,
577 struct radv_thread_trace
*thread_trace
)
579 struct radv_device
*device
= queue
->device
;
580 unsigned max_se
= device
->physical_device
->rad_info
.max_se
;
581 void *thread_trace_ptr
= device
->thread_trace_ptr
;
583 memset(thread_trace
, 0, sizeof(*thread_trace
));
584 thread_trace
->num_traces
= max_se
;
586 for (unsigned se
= 0; se
< max_se
; se
++) {
587 uint64_t info_offset
= radv_thread_trace_get_info_offset(se
);
588 uint64_t data_offset
= radv_thread_trace_get_data_offset(device
, se
);
589 void *info_ptr
= thread_trace_ptr
+ info_offset
;
590 void *data_ptr
= thread_trace_ptr
+ data_offset
;
591 struct radv_thread_trace_info
*info
=
592 (struct radv_thread_trace_info
*)info_ptr
;
593 struct radv_thread_trace_se thread_trace_se
= {};
595 if (!radv_is_thread_trace_complete(device
, info
)) {
596 uint32_t expected_size
=
597 radv_get_expected_buffer_size(device
, info
);
598 uint32_t available_size
=
599 (info
->cur_offset
* 32) / 1024;
601 fprintf(stderr
, "Failed to get the thread trace "
602 "because the buffer is too small. The "
603 "hardware needs %d KB but the "
604 "buffer size is %d KB.\n",
605 expected_size
, available_size
);
606 fprintf(stderr
, "Please update the buffer size with "
607 "RADV_THREAD_TRACE_BUFFER_SIZE=<size_in_bytes>\n");
611 thread_trace_se
.data_ptr
= data_ptr
;
612 thread_trace_se
.info
= *info
;
613 thread_trace_se
.shader_engine
= se
;
614 thread_trace_se
.compute_unit
= 0;
616 thread_trace
->traces
[se
] = thread_trace_se
;