1 -- Parse cmdstream dump and analyse blits and batches
3 --local posix = require "posix"
5 function printf(fmt, ...)
6 return io.write(string.format(fmt, ...))
13 printf("Analyzing Data...\n")
15 local r = rnn.init("a630")
17 -- Each submit, all draws will target the same N MRTs:
19 local allmrts = {} -- includes historical render targets
20 function push_mrt(fmt, w, h, samples, base, flag, gmem)
21 dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base)
36 -- And each each draw will read from M sources/textures:
38 function push_source(fmt, w, h, samples, base, flag)
39 dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base)
45 source.samples = samples
49 sources[base] = source
67 function start_cmdstream(name)
68 printf("Parsing %s\n", name)
87 function start_submit()
98 -- TODO we get false-positives for 'NULL BATCH!' because we don't have
99 -- a really good way to differentiate between submits and cmds. Ie.
100 -- with growable cmdstream, and a large # of tiles, IB1 can get split
101 -- across multiple buffers. Since we ignore GMEM draws for window-
102 -- offset != 0,0, the later cmds will appear as null batches
103 if draws == 0 and blits == 0 then
105 printf("NULL BATCH!\n");
113 printf(" # of draws: %u\n", draws)
114 printf(" mode: %s\n", drawmode)
115 if drawmode == "RM6_GMEM" then
116 printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins)
118 if depthtest or depthwrite then
128 if stenciltest or stencilwrite then
131 printf("STENCILTEST ")
134 printf("STENCILWRITE")
143 for base,mrt in pairs(mrts) do
144 printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples)
145 if drawmode == "RM6_GMEM" then
146 if cleared[mrt.gmem] then
149 if restored[mrt.gmem] then
152 if resolved[mrt.gmem] then
156 if cleared[mrt.base] then
163 function print_source(source)
164 printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples)
167 for base,source in pairs(sources) do
168 -- only show sources that have been previously rendered to, other
169 -- textures are less interesting. Possibly this should be an
173 elseif allmrts[base] or draws == 0 then
175 elseif source.flag and allmrts[source.flag] then
182 function end_submit()
187 -- Track the current mode:
189 function CP_SET_MARKER(pkt, size)
191 dbg("mode: %s\n", mode)
194 function CP_EVENT_WRITE(pkt, size)
195 if tostring(pkt[0].EVENT) ~= "BLIT" then
199 local m = tostring(mode)
200 if m == "RM6_GMEM" then
201 -- either clear or restore:
202 if r.RB_BLIT_INFO.CLEAR_MASK == 0 then
203 restored[r.RB_BLIT_BASE_GMEM] = 1
205 cleared[r.RB_BLIT_BASE_GMEM] = 1
207 -- push_mrt() because we could have GMEM
208 -- passes with only a clear and no draws:
211 -- try to match up the GMEM addr with the MRT/DEPTH state,
212 -- to avoid relying on RB_BLIT_DST also getting written:
213 for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
214 if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then
215 sysmem = r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32)
216 flag = r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32)
220 if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then
221 sysmem = r.RB_DEPTH_BUFFER_BASE_LO | (r.RB_DEPTH_BUFFER_BASE_HI << 32)
222 flag = r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32)
225 --NOTE this can get confused by previous blits:
226 --if sysmem == 0 then
228 -- sysmem = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32)
229 -- flag = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32)
231 if not r.RB_BLIT_DST_INFO.FLAGS then
234 -- TODO maybe just emit RB_BLIT_DST_LO/HI for clears.. otherwise
235 -- we get confused by stale values in registers.. not sure
236 -- if this is a problem w/ blob
237 push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT,
238 r.RB_BLIT_SCISSOR_BR.X + 1,
239 r.RB_BLIT_SCISSOR_BR.Y + 1,
240 r.RB_BLIT_DST_INFO.SAMPLES,
244 elseif m == "RM6_RESOLVE" then
245 resolved[r.RB_BLIT_BASE_GMEM] = 1
247 printf("I am confused!!!\n")
251 function A6XX_TEX_CONST(pkt, size)
252 push_source(pkt[0].FMT,
253 pkt[1].WIDTH, pkt[1].HEIGHT,
255 pkt[4].BASE_LO | (pkt[5].BASE_HI << 32),
256 pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32))
259 function handle_blit()
260 -- blob sometimes uses CP_BLIT for resolves, so filter those out:
261 -- TODO it would be nice to not hard-code GMEM addr:
262 -- TODO I guess the src can be an offset from GMEM addr..
263 if r.SP_PS_2D_SRC_LO == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then
272 -- This kinda assumes that we are doing full img blits, which is maybe
273 -- Not completely legit. We could perhaps instead just track pitch and
274 -- size/pitch?? Or maybe the size doesn't matter much
275 push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT,
276 r.GRAS_2D_DST_BR.X + 1,
277 r.GRAS_2D_DST_BR.Y + 1,
279 r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32),
280 r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32),
282 if r.RB_2D_BLIT_CNTL.SOLID_COLOR then
283 dbg("CLEAR=%x\n", r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32))
284 cleared[r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)] = 1
286 push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT,
287 r.GRAS_2D_SRC_BR_X.X + 1,
288 r.GRAS_2D_SRC_BR_Y.Y + 1,
290 r.SP_PS_2D_SRC_LO | (r.SP_PS_2D_SRC_HI << 32),
291 r.SP_PS_2D_SRC_FLAGS_LO | (r.SP_PS_2D_SRC_FLAGS_HI << 32))
297 function valid_transition(curmode, newmode)
298 if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then
301 if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then
307 function draw(primtype, nindx)
308 dbg("draw: %s (%s)\n", primtype, mode)
310 if primtype == "BLIT_OP_SCALE" then
313 elseif primtype == "EVENT:BLIT" then
317 local m = tostring(mode)
319 -- detect changes in drawmode which indicate a different
320 -- pass.. BINNING->GMEM means same pass, but other
321 -- transitions mean different pass:
322 if drawmode and m ~= drawmode then
323 dbg("%s -> %s transition\n", drawmode, m)
324 if not valid_transition(drawmode, m) then
325 dbg("invalid transition, new render pass!\n")
331 if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then
332 if m == "RM6_BINNING" then
336 if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then
339 printf("unknown MODE %s for primtype %s\n", m, primtype)
343 -- Only count the first tile for GMEM mode to avoid counting
344 -- each draw for each tile
345 if m == "RM6_GMEM" then
346 if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then
352 local render_components = {}
353 render_components[0] = r.RB_RENDER_COMPONENTS.RT0;
354 render_components[1] = r.RB_RENDER_COMPONENTS.RT1;
355 render_components[2] = r.RB_RENDER_COMPONENTS.RT2;
356 render_components[3] = r.RB_RENDER_COMPONENTS.RT3;
357 render_components[4] = r.RB_RENDER_COMPONENTS.RT4;
358 render_components[5] = r.RB_RENDER_COMPONENTS.RT5;
359 render_components[6] = r.RB_RENDER_COMPONENTS.RT6;
360 render_components[7] = r.RB_RENDER_COMPONENTS.RT7;
361 for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
362 if render_components[n] ~= 0 then
363 push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT,
364 r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
365 r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
366 r.RB_MSAA_CNTL.SAMPLES,
367 r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32),
368 r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32),
369 r.RB_MRT[n].BASE_GMEM)
373 local depthbase = r.RB_DEPTH_BUFFER_BASE_LO |
374 (r.RB_DEPTH_BUFFER_BASE_HI << 32)
376 if depthbase ~= 0 then
377 push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT,
378 r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
379 r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
380 r.RB_MSAA_CNTL.SAMPLES,
382 r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32),
383 r.RB_DEPTH_BUFFER_BASE_GMEM)
386 if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then
390 if r.RB_DEPTH_CNTL.Z_ENABLE then
394 -- clearly 0 != false.. :-/
395 if r.RB_STENCILWRMASK.WRMASK ~= 0 then
399 if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then
403 -- TODO should also check for stencil buffer for z32+s8 case
405 if m == "RM6_GMEM" then
406 binw = r.VSC_BIN_SIZE.WIDTH
407 binh = r.VSC_BIN_SIZE.HEIGHT
408 nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY