From b55a93fdd463b5f27c1c98a2be28960e59cab32d Mon Sep 17 00:00:00 2001 From: Jan Zielinski Date: Wed, 31 Jul 2019 15:43:47 +0200 Subject: [PATCH] swr/rasterizer: Events are now grouped and enabled by knobs All events are now grouped as follows: -Framework (i.e. ThreadStart) [always ON] -Api (i.e. SwrSync) [always ON] -Pipeline [default ON] -Shader [default ON] -SWTag [default OFF] -Memory [default OFF] Reviewed-by: Alok Hota --- .../swr/rasterizer/archrast/archrast.cpp | 33 ++-- .../swr/rasterizer/archrast/archrast.h | 1 + .../swr/rasterizer/archrast/events.proto | 158 +++++++++--------- .../rasterizer/archrast/events_private.proto | 52 +++--- .../swr/rasterizer/codegen/gen_archrast.py | 96 +++++++---- .../swr/rasterizer/codegen/knob_defs.py | 39 ++++- .../codegen/templates/gen_ar_event.cpp | 14 +- .../codegen/templates/gen_ar_event.hpp | 41 +++-- .../codegen/templates/gen_ar_eventhandler.hpp | 11 +- .../templates/gen_ar_eventhandlerfile.hpp | 23 ++- .../drivers/swr/rasterizer/core/api.cpp | 4 +- .../swr/rasterizer/jitter/builder_gfx_mem.cpp | 90 ++++++++-- .../swr/rasterizer/jitter/builder_gfx_mem.h | 8 +- .../swr/rasterizer/jitter/builder_mem.cpp | 2 +- .../swr/rasterizer/jitter/builder_mem.h | 2 +- 15 files changed, 372 insertions(+), 202 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp index ba99391ae76..03df614da2a 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.cpp @@ -31,6 +31,7 @@ #include "common/os.h" #include "archrast/archrast.h" #include "archrast/eventmanager.h" +#include "gen_ar_event.hpp" #include "gen_ar_eventhandlerfile.hpp" namespace ArchRast @@ -104,9 +105,9 @@ namespace ArchRast uint64_t tscMax; }; - struct AddressRangeComparator + struct AddressRangeComparator { - bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const + bool operator()(MemoryTrackerKey a, MemoryTrackerKey b) const { return (a.address & a.mask) < (b.address & b.mask); } @@ -260,7 +261,7 @@ namespace ArchRast // compute address mask for memory tracking mAddressMask = 0; - uint64_t addressRangeBytes = 64; + uint64_t addressRangeBytes = 4096; while (addressRangeBytes > 0) { mAddressMask = (mAddressMask << 1) | 1; @@ -687,7 +688,7 @@ namespace ArchRast mMemoryStats.TrackMemoryAccess(trackAddr, mAddressMask, event.data.isRead, event.data.tsc, size); sizeTracked += size; trackAddr = nextAddr; - } + } } virtual void Handle(const MemoryStatsEndEvent& event) @@ -695,13 +696,13 @@ namespace ArchRast MemoryStats::MemoryTrackerMap::iterator i = mMemoryStats.trackedMemory.begin(); while (i != mMemoryStats.trackedMemory.end()) { - MemoryStatsEvent mse(event.data.drawId, - i->first.address & mAddressMask, - i->second.accessCountRead, - i->second.accessCountWrite, - i->second.totalSizeRead, - i->second.totalSizeWrite, - i->second.tscMin, + MemoryStatsEvent mse(event.data.drawId, + i->first.address & mAddressMask, + i->second.accessCountRead, + i->second.accessCountWrite, + i->second.totalSizeRead, + i->second.totalSizeWrite, + i->second.tscMin, i->second.tscMax); EventHandlerFile::Handle(mse); i++; @@ -812,10 +813,12 @@ namespace ArchRast // Dispatch event for this thread. void Dispatch(HANDLE hThreadContext, const Event& event) { - EventManager* pManager = FromHandle(hThreadContext); - SWR_ASSERT(pManager != nullptr); - - pManager->Dispatch(event); + if (event.IsEnabled()) + { + EventManager* pManager = reinterpret_cast(hThreadContext); + SWR_ASSERT(pManager != nullptr); + pManager->Dispatch(event); + } } // Flush for this thread. diff --git a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h index d42c197bcda..a247443f54b 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/archrast.h +++ b/src/gallium/drivers/swr/rasterizer/archrast/archrast.h @@ -44,5 +44,6 @@ namespace ArchRast // Dispatch event for this thread. void Dispatch(HANDLE hThreadContext, const Event& event); + void FlushDraw(HANDLE hThreadContext, uint32_t drawId); }; // namespace ArchRast diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events.proto b/src/gallium/drivers/swr/rasterizer/archrast/events.proto index 471bd0e286a..8a6093f29be 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events.proto @@ -29,15 +29,15 @@ enum AR_DRAW_TYPE IndexedInstancedSplit = 3 }; -event ThreadStartApiEvent +event Framework::ThreadStartApiEvent { }; -event ThreadStartWorkerEvent +event Framework::ThreadStartWorkerEvent { }; -event DrawInfoEvent +event SwrApi::DrawInfoEvent { uint32_t drawId; AR_DRAW_TYPE type; @@ -55,7 +55,7 @@ event DrawInfoEvent uint32_t splitId; // Split draw count or id. }; -event DispatchEvent +event SwrApi::DispatchEvent { uint32_t drawId; uint32_t threadGroupCountX; @@ -63,37 +63,37 @@ event DispatchEvent uint32_t threadGroupCountZ; }; -event FrameEndEvent +event SwrApi::FrameEndEvent { uint32_t frameId; uint32_t nextDrawId; }; ///@brief API Stat: Synchonization event. -event SwrSyncEvent +event SwrApi::SwrSyncEvent { uint32_t drawId; }; ///@brief API Stat: Invalidate hot tiles (i.e. tile cache) -event SwrInvalidateTilesEvent +event SwrApi::SwrInvalidateTilesEvent { uint32_t drawId; }; ///@brief API Stat: Invalidate and discard hot tiles within pixel region -event SwrDiscardRectEvent +event SwrApi::SwrDiscardRectEvent { uint32_t drawId; }; ///@brief API Stat: Flush tiles out to memory that is typically owned by driver (e.g. Flush RT cache) -event SwrStoreTilesEvent +event SwrApi::SwrStoreTilesEvent { uint32_t drawId; }; -event FrontendStatsEvent +event Pipeline::FrontendStatsEvent { uint32_t drawId; uint64_t counter IaVertices; @@ -115,7 +115,7 @@ event FrontendStatsEvent uint64_t counter SoNumPrimsWritten3; }; -event BackendStatsEvent +event Pipeline::BackendStatsEvent { uint32_t drawId; uint64_t counter DepthPassCount; @@ -124,56 +124,56 @@ event BackendStatsEvent }; -event EarlyZSingleSample +event Pipeline::EarlyZSingleSample { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateZSingleSample +event Pipeline::LateZSingleSample { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyStencilSingleSample +event Pipeline::EarlyStencilSingleSample { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateStencilSingleSample +event Pipeline::LateStencilSingleSample { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyZSampleRate +event Pipeline::EarlyZSampleRate { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateZSampleRate +event Pipeline::LateZSampleRate { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyStencilSampleRate +event Pipeline::EarlyStencilSampleRate { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateStencilSampleRate +event Pipeline::LateStencilSampleRate { uint32_t drawId; uint64_t counter passCount; @@ -181,7 +181,7 @@ event LateStencilSampleRate }; // Total Early-Z counts, SingleSample and SampleRate -event EarlyZ +event Pipeline::EarlyZ { uint32_t drawId; uint64_t counter passCount; @@ -189,7 +189,7 @@ event EarlyZ }; // Total LateZ counts, SingleSample and SampleRate -event LateZ +event Pipeline::LateZ { uint32_t drawId; uint64_t counter passCount; @@ -197,7 +197,7 @@ event LateZ }; // Total EarlyStencil counts, SingleSample and SampleRate -event EarlyStencil +event Pipeline::EarlyStencil { uint32_t drawId; uint64_t counter passCount; @@ -205,35 +205,35 @@ event EarlyStencil }; // Total LateStencil counts, SingleSample and SampleRate -event LateStencil +event Pipeline::LateStencil { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyZNullPS +event Pipeline::EarlyZNullPS { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyStencilNullPS +event Pipeline::EarlyStencilNullPS { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyZPixelRate +event Pipeline::EarlyZPixelRate { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateZPixelRate +event Pipeline::LateZPixelRate { uint32_t drawId; uint64_t counter passCount; @@ -241,65 +241,65 @@ event LateZPixelRate }; -event EarlyOmZ +event Pipeline::EarlyOmZ { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event EarlyOmStencil +event Pipeline::EarlyOmStencil { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateOmZ +event Pipeline::LateOmZ { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event LateOmStencil +event Pipeline::LateOmStencil { uint32_t drawId; uint64_t counter passCount; uint64_t counter failCount; }; -event GSInputPrims +event Pipeline::GSInputPrims { uint32_t drawId; uint64_t counter inputPrimCount; }; -event GSPrimsGen +event Pipeline::GSPrimsGen { uint32_t drawId; uint64_t counter primGeneratedCount; }; -event GSVertsInput +event Pipeline::GSVertsInput { uint32_t drawId; uint64_t counter vertsInput; }; -event TessPrims +event Pipeline::TessPrims { uint32_t drawId; uint64_t counter primCount; }; -event RasterTiles +event Pipeline::RasterTiles { uint32_t drawId; uint32_t counter rastTileCount; }; -event ClipperEvent +event Pipeline::ClipperEvent { uint32_t drawId; uint32_t counter trivialRejectCount; @@ -307,21 +307,21 @@ event ClipperEvent uint32_t counter mustClipCount; }; -event CullEvent +event Pipeline::CullEvent { uint32_t drawId; uint64_t counter backfacePrimCount; uint64_t counter degeneratePrimCount; }; -event AlphaEvent +event Pipeline::AlphaEvent { uint32_t drawId; uint32_t counter alphaTestCount; uint32_t counter alphaBlendCount; }; -event VSInfo +event Shader::VSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -338,7 +338,7 @@ event VSInfo uint32_t counter numLodExecuted; }; -event HSInfo +event Shader::HSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -355,7 +355,7 @@ event HSInfo uint32_t counter numLodExecuted; }; -event DSInfo +event Shader::DSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -372,7 +372,7 @@ event DSInfo uint32_t counter numLodExecuted; }; -event GSInfo +event Shader::GSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -390,7 +390,7 @@ event GSInfo }; -event PSInfo +event Shader::PSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -407,7 +407,7 @@ event PSInfo uint32_t counter numLodExecuted; }; -event CSInfo +event Shader::CSInfo { uint32_t drawId; uint32_t counter numInstExecuted; @@ -424,64 +424,62 @@ event CSInfo uint32_t counter numLodExecuted; }; -event SWTagFrameEvent +event SWTagApi::SWTagEndFrameEvent { - uint64_t swTagFrame; + uint64_t frameCount; + uint32_t renderpassCount; + uint32_t drawOrDispatchCount; + uint32_t drawCount; + uint32_t dispatchCount; }; -event SWTagRenderpassEvent +event SWTagApi::SWTagRenderpassEvent { - uint64_t swTagFrame; - uint32_t swTagDrawOrDispatch; - uint32_t swTagDraw; - uint32_t swTagDispatch; - uint32_t swTagRenderpassCount; + uint64_t frameCount; + uint32_t renderpassCount; + uint32_t drawOrDispatchCount; + uint32_t drawCount; + uint32_t dispatchCount; }; -event SWTagDrawEvent +event SWTagApi::SWTagDrawEvent { - uint64_t swTagFrame; - uint32_t swTagDrawOrDispatch; - uint32_t swTagDraw; - uint32_t swTagDispatch; + uint64_t frameCount; + uint32_t renderpassCount; + uint32_t drawOrDispatchCount; + uint32_t drawCount; + uint32_t dispatchCount; }; -event SWTagDispatchEvent +event SWTagApi::SWTagDispatchEvent { - uint64_t swTagFrame; - uint32_t swTagDrawOrDispatch; - uint32_t swTagDraw; - uint32_t swTagDispatch; + uint64_t frameCount; + uint32_t renderpassCount; + uint32_t drawOrDispatchCount; + uint32_t drawCount; + uint32_t dispatchCount; }; -event SWTagFlushEvent +event SWTagApi::SWTagDriverCallEvent { - uint64_t swTagFrame; - uint32_t swTagDrawOrDispatch; - uint32_t swTagDraw; - uint32_t swTagDispatch; - uint32_t swTagFlushCounter; - char swTagFlushReason[256]; - uint32_t swTagFlushType; + char cmd[256]; }; -event SWTagApiCallEvent +event SWTag::SWTagFlushEvent { - uint64_t swTagFrame; - uint32_t swTagDrawOrDispatch; - uint32_t swTagDraw; - uint32_t swTagDispatch; - char swTagApiCall[256]; + uint32_t count; + char reason[256]; + uint32_t type; }; -event MemoryStatsEvent +event Memory::MemoryStatsEvent { uint32_t drawId; uint64_t baseAddr; uint32_t accessCountRead; uint32_t accessCountWrite; - uint32_t totalSizeRead; - uint32_t totalSizeWrite; + uint32_t totalSizeRead; + uint32_t totalSizeWrite; uint64_t tscMin; uint64_t tscMax; }; diff --git a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto index 19fb582a414..da4419a4626 100644 --- a/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto +++ b/src/gallium/drivers/swr/rasterizer/archrast/events_private.proto @@ -24,73 +24,73 @@ # ArchRast is to not pollute the Rasty code with lots of calculations, etc. that # are needed to compute per draw statistics, etc. -event EarlyDepthStencilInfoSingleSample +event Pipeline::EarlyDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthStencilInfoSampleRate +event Pipeline::EarlyDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthStencilInfoNullPS +event Pipeline::EarlyDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoSingleSample +event Pipeline::LateDepthStencilInfoSingleSample { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoSampleRate +event Pipeline::LateDepthStencilInfoSampleRate { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event LateDepthStencilInfoNullPS +event Pipeline::LateDepthStencilInfoNullPS { uint64_t depthPassMask; uint64_t stencilPassMask; uint64_t coverageMask; }; -event EarlyDepthInfoPixelRate +event Pipeline::EarlyDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event LateDepthInfoPixelRate +event Pipeline::LateDepthInfoPixelRate { uint64_t depthPassCount; uint64_t activeLanes; }; -event BackendDrawEndEvent +event Pipeline::BackendDrawEndEvent { uint32_t drawId; }; -event FrontendDrawEndEvent +event Pipeline::FrontendDrawEndEvent { uint32_t drawId; }; -event MemoryAccessEvent +event Memory::MemoryAccessEvent { uint32_t drawId; uint64_t tsc; @@ -100,23 +100,23 @@ event MemoryAccessEvent uint8_t client; }; -event MemoryStatsEndEvent +event Memory::MemoryStatsEndEvent { uint32_t drawId; }; -event TessPrimCount +event Pipeline::TessPrimCount { uint64_t primCount; }; -event RasterTileCount +event Pipeline::RasterTileCount { uint32_t drawId; uint64_t rasterTiles; }; -event GSPrimInfo +event Pipeline::GSPrimInfo { uint64_t inputPrimCount; uint64_t primGeneratedCount; @@ -128,14 +128,14 @@ event GSPrimInfo // Trivial reject is numInvocations - pop_cnt32(validMask) // Trivial accept is validMask & ~clipMask // Must clip count is pop_cnt32(clipMask) -event ClipInfoEvent +event Pipeline::ClipInfoEvent { uint32_t numInvocations; uint32_t validMask; uint32_t clipMask; }; -event CullInfoEvent +event Pipeline::CullInfoEvent { uint32_t drawId; uint64_t degeneratePrimMask; @@ -143,14 +143,14 @@ event CullInfoEvent uint32_t validMask; }; -event AlphaInfoEvent +event Pipeline::AlphaInfoEvent { uint32_t drawId; uint32_t alphaTestEnable; uint32_t alphaBlendEnable; }; -event DrawInstancedEvent +event SwrApi::DrawInstancedEvent { uint32_t drawId; uint32_t topology; @@ -165,7 +165,7 @@ event DrawInstancedEvent uint32_t splitId; // Split draw count or id. }; -event DrawIndexedInstancedEvent +event SwrApi::DrawIndexedInstancedEvent { uint32_t drawId; uint32_t topology; @@ -181,32 +181,32 @@ event DrawIndexedInstancedEvent uint32_t splitId; // Split draw count or id. }; -event VSStats +event Shader::VSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event HSStats +event Shader::HSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event DSStats +event Shader::DSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event GSStats +event Shader::GSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event PSStats +event Shader::PSStats { HANDLE hStats; // SWR_SHADER_STATS }; -event CSStats +event Shader::CSStats { HANDLE hStats; // SWR_SHADER_STATS }; \ No newline at end of file diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py index b4b6127c8cc..44f2af036b9 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_archrast.py @@ -87,14 +87,20 @@ def parse_protos(files, verbose=False): """ Parses a proto file and returns a dictionary of event definitions """ - protos = {} - protos['events'] = {} # event dictionary containing events with their fields - protos['event_names'] = [] # needed to keep events in order parsed. dict is not ordered. - protos['event_map'] = {} # dictionary to map event ids to event names - protos['enums'] = {} - protos['enum_names'] = [] - - eventId = 0 + protos = { + 'events': { + 'defs': {}, # event dictionary containing events with their fields + 'map': {}, # dictionary to map event ids to event names + 'groups': {} # event keys stored by groups + }, + 'enums': { + 'defs': {}, + 'map': {} + } + } + + event_id = 0 + enum_id = 0 if type(files) is not list: files = [files] @@ -104,40 +110,74 @@ def parse_protos(files, verbose=False): print("Parsing proto file: %s" % os.path.normpath(filename)) with open(filename, 'r') as f: - lines=f.readlines() + lines = f.readlines() idx = 0 - - raw_text = [] while idx < len(lines): - line = lines[idx].rstrip() + line = lines[idx].strip() idx += 1 - # search for event definitions. - match = re.match(r'(\s*)event(\s*)(\w+)', line) - + # Match event definition + match = re.match(r'event(\s*)(((\w*)::){0,1}(\w+))', line) # i.e. "event SWTag::CounterEvent" if match: - eventId += 1 - event_name = match.group(3) - protos["event_names"].append(event_name) + event_id += 1 + + # Parse event attributes + event_key = match.group(2) # i.e. SWTag::CounterEvent + event_group = match.group(4) if match.group(4) else "" # i.e. SWTag + event_name = match.group(5) # i.e. CounterEvent - protos["events"][event_name] = {} - protos["events"][event_name]["event_id"] = eventId - protos["event_map"][eventId] = event_name - idx = parse_event_fields(lines, idx, protos["events"][event_name]) + # Define event attributes + event = { + 'id': event_id, + 'group': event_group, + 'name': event_name + } - # search for enums. - match = re.match(r'(\s*)enum(\s*)(\w+)', line) + # Now add event fields + idx = parse_event_fields(lines, idx, event) + protos['events']['defs'][event_key] = event + protos['events']['map'][event_id] = event_key + + continue + + # Match enum definition + match = re.match(r'enum(\s*)(\w+)', line) if match: - enum_name = match.group(3) - protos["enum_names"].append(enum_name) + enum_id += 1 + + # Parse enum attributes + enum_name = match.group(2) + + # Define enum attr + enum = { + 'name': enum_name + } + + # Now add enum fields + idx = parse_enums(lines, idx, enum) + + protos['enums']['defs'][enum_name] = enum + protos['enums']['map'][enum_id] = enum_name + + continue + + # Sort and group events + event_groups = protos['events']['groups'] + for key in sorted(protos['events']['defs']): + group = protos['events']['defs'][key]['group'] + if group not in event_groups: + event_groups[group] = [] + event_groups[group].append(key) - protos["enums"][enum_name] = {} - idx = parse_enums(lines, idx, protos["enums"][enum_name]) return protos +def get_sorted_protos(protos): + protos["groups"] + + def main(): # Parse args... diff --git a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py index c9d1f5d5a31..351587ad5ca 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/knob_defs.py @@ -304,7 +304,7 @@ KNOBS = [ 'NOTE: Requires KNOB_ENABLE_TOSS_POINTS to be enabled in core/knobs.h'], 'category' : 'perf_adv', }], - + ['DISABLE_SPLIT_DRAW', { 'type' : 'bool', 'default' : 'false', @@ -315,4 +315,41 @@ KNOBS = [ 'category' : 'perf_adv', }], + ['AR_ENABLE_PIPELINE_EVENTS', { + 'type' : 'bool', + 'default' : 'true', + 'desc' : ['Enable pipeline events when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_SHADER_EVENTS', { + 'type' : 'bool', + 'default' : 'true', + 'desc' : ['Enable shader events when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_SWTAG_EVENTS', { + 'type' : 'bool', + 'default' : 'false', + 'desc' : ['Enable SWTag events when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_ENABLE_MEMORY_EVENTS', { + 'type' : 'bool', + 'default' : 'false', + 'desc' : ['Enable memory events when using Archrast'], + 'category' : 'archrast', + }], + + ['AR_MEM_SET_BYTE_GRANULARITY', { + 'type' : 'uint32_t', + 'default' : '64', + 'desc' : ['Granularity and alignment of tracking of memory accesses', + 'ONLY ACTIVE UNDER ArchRast.'], + 'category' : 'archrast', + }], + + ] diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp index e696dd2096a..e73a8110ee1 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.cpp @@ -36,12 +36,20 @@ #include "gen_ar_eventhandler.hpp" using namespace ArchRast; -% for name in protos['event_names']: -void ${name}::Accept(EventHandler* pHandler) const +<% sorted_groups = sorted(protos['events']['groups']) %> +% for group in sorted_groups: +% for event_key in protos['events']['groups'][group]: +<% + event = protos['events']['defs'][event_key] +%> +void ${event['name']}::Accept(EventHandler* pHandler) const { pHandler->Handle(*this); } -% endfor +% endfor +% endfor + + // clan-format on diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp index 6c592f74461..8079b0e187a 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_event.hpp @@ -36,11 +36,13 @@ #include "common/os.h" #include "core/state.h" +<% always_enabled_knob_groups = ['', 'Framework', 'SWTagApi', 'SwrApi'] %> namespace ArchRast { -% for name in protos['enum_names']: +<% sorted_enums = sorted(protos['enums']['defs']) %> +% for name in sorted_enums: enum ${name} - {<% names = protos['enums'][name]['names'] %> + {<% names = protos['enums']['defs'][name]['names'] %> % for i in range(len(names)): ${names[i].lstrip()} % endfor @@ -58,17 +60,23 @@ namespace ArchRast Event() {} virtual ~Event() {} + virtual bool IsEnabled() const { return true; }; virtual void Accept(EventHandler* pHandler) const = 0; }; -% for name in protos['event_names']: +<% sorted_groups = sorted(protos['events']['groups']) %> +% for group in sorted_groups: + % for event_key in protos['events']['groups'][group]: +<% + event = protos['events']['defs'][event_key] +%> ////////////////////////////////////////////////////////////////////////// - /// ${name}Data + /// ${event_key}Data ////////////////////////////////////////////////////////////////////////// #pragma pack(push, 1) - struct ${name}Data + struct ${event['name']}Data {<% - fields = protos['events'][name]['fields'] %> + fields = event['fields'] %> // Fields % for i in range(len(fields)): % if fields[i]['size'] > 1: @@ -81,15 +89,15 @@ namespace ArchRast #pragma pack(pop) ////////////////////////////////////////////////////////////////////////// - /// ${name} + /// ${event_key} ////////////////////////////////////////////////////////////////////////// - struct ${name} : Event + struct ${event['name']} : Event {<% - fields = protos['events'][name]['fields'] %> - ${name}Data data; + fields = event['fields'] %> + ${event['name']}Data data; // Constructor - ${name}( + ${event['name']}( % for i in range(len(fields)): % if i < len(fields)-1: % if fields[i]['size'] > 1: @@ -127,7 +135,18 @@ namespace ArchRast } virtual void Accept(EventHandler* pHandler) const; + % if group not in always_enabled_knob_groups: + <% group_knob_define = 'KNOB_AR_ENABLE_' + group.upper() + '_EVENTS' %> + bool IsEnabled() const + { + static const bool IsEventEnabled = true; // TODO: Replace with knob for each event + return ${group_knob_define} && IsEventEnabled; + } + % endif }; + % endfor + +% endfor } // namespace ArchRast // clang-format on diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp index 140dd00dbeb..d3e82e8a4ee 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandler.hpp @@ -48,9 +48,14 @@ namespace ArchRast virtual void FlushDraw(uint32_t drawId) {} -% for name in protos['event_names']: - virtual void Handle(const ${name}& event) {} -% endfor +<% sorted_groups = sorted(protos['events']['groups']) %> +% for group in sorted_groups: +% for event_key in protos['events']['groups'][group]: +<% + event = protos['events']['defs'][event_key] +%> virtual void Handle(const ${event['name']}& event) {} +% endfor +% endfor }; } // namespace ArchRast // clan-format off diff --git a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp index 7c10e124c3c..3f85c88bd7a 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp +++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_ar_eventhandlerfile.hpp @@ -136,19 +136,24 @@ namespace ArchRast memcpy(&mBuffer[mBufOffset], pBlock, size); mBufOffset += size; } - -% for name in protos['event_names']: +<% sorted_groups = sorted(protos['events']['groups']) %> +% for group in sorted_groups: +% for event_key in protos['events']['groups'][group]: +<% + event = protos['events']['defs'][event_key] +%> ////////////////////////////////////////////////////////////////////////// - /// @brief Handle ${name} event - virtual void Handle(const ${name}& event) + /// @brief Handle ${event_key} event + virtual void Handle(const ${event['name']}& event) { -% if protos['events'][name]['num_fields'] == 0: - Write(${protos['events'][name]['event_id']}, (char*)&event.data, 0); +% if event['num_fields'] == 0: + Write(${event['id']}, (char*)&event.data, 0); % else: - Write(${protos['events'][name]['event_id']}, (char*)&event.data, sizeof(event.data)); -%endif + Write(${event['id']}, (char*)&event.data, sizeof(event.data)); +% endif } -% endfor +% endfor +% endfor ////////////////////////////////////////////////////////////////////////// /// @brief Everything written to buffer this point is the header. diff --git a/src/gallium/drivers/swr/rasterizer/core/api.cpp b/src/gallium/drivers/swr/rasterizer/core/api.cpp index 20f1a345880..3601aa3f509 100644 --- a/src/gallium/drivers/swr/rasterizer/core/api.cpp +++ b/src/gallium/drivers/swr/rasterizer/core/api.cpp @@ -1212,7 +1212,7 @@ void DrawInstanced(HANDLE hContext, uint32_t numVertsForDraw = (remainingVerts < maxVertsPerDraw) ? remainingVerts : maxVertsPerDraw; - bool isSplitDraw = (draw > 0) ? true : false; + bool isSplitDraw = (draw > 0) ? !KNOB_DISABLE_SPLIT_DRAW : false; DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw); InitDraw(pDC, isSplitDraw); @@ -1366,7 +1366,7 @@ void DrawIndexedInstance(HANDLE hContext, (remainingIndices < maxIndicesPerDraw) ? remainingIndices : maxIndicesPerDraw; // When breaking up draw, we need to obtain new draw context for each iteration. - bool isSplitDraw = (draw > 0) ? true : false; + bool isSplitDraw = (draw > 0) ? !KNOB_DISABLE_SPLIT_DRAW : false; pDC = GetDrawContext(pContext, isSplitDraw); InitDraw(pDC, isSplitDraw); diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp index 5f359ed2113..b67ffbfa7aa 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.cpp @@ -44,6 +44,7 @@ namespace SwrJit mpfnTrackMemAccess = nullptr; mpParamSimDC = nullptr; mpWorkerData = nullptr; + } void BuilderGfxMem::NotifyPrivateContextSet() @@ -96,6 +97,7 @@ namespace SwrJit uint8_t scale, MEM_CLIENT usage) { + // address may be coming in as 64bit int now so get the pointer if (pBase->getType() == mInt64Ty) { @@ -109,6 +111,7 @@ namespace SwrJit void BuilderGfxMem::SCATTERPS( Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask, MEM_CLIENT usage) { + // address may be coming in as 64bit int now so get the pointer if (pDst->getType() == mInt64Ty) { @@ -123,32 +126,83 @@ namespace SwrJit return ADD(base, offset); } - Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) + Value* BuilderGfxMem::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, Idx, nullptr, Name); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + if (isReadOnly) + { + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForWrite); + } + } + else + { + Ptr = Builder::GEP(Ptr, Idx, nullptr, isReadOnly, Name); + } + return Ptr; } Value* BuilderGfxMem::GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ty, Ptr, Idx, Name); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ty, Ptr, Idx, Name); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ty, Ptr, Idx, Name); + } + return Ptr; } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, indexList); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, indexList); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ptr, indexList); + } + return Ptr; } Value* BuilderGfxMem::GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty) { - Ptr = TranslationHelper(Ptr, Ty); - return Builder::GEP(Ptr, indexList); + bool xlate = (Ptr->getType() == mInt64Ty); + if (xlate) + { + Ptr = INT_TO_PTR(Ptr, Ty); + Ptr = Builder::GEP(Ptr, indexList); + Ptr = PTR_TO_INT(Ptr, mInt64Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); + } + else + { + Ptr = Builder::GEP(Ptr, indexList); + } + return Ptr; } - Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty) + Value* BuilderGfxMem::TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress) { SWR_ASSERT(!(Ptr->getType() == mInt64Ty && Ty == nullptr), "Access of GFX pointers must have non-null type specified."); @@ -165,7 +219,7 @@ namespace SwrJit void BuilderGfxMem::TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead) { #if defined(KNOB_ENABLE_AR) - if (!KNOB_TRACK_MEMORY_WORKING_SET) + if (!KNOB_AR_ENABLE_MEMORY_EVENTS) { return; } @@ -214,7 +268,7 @@ namespace SwrJit AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, Name); } @@ -223,7 +277,7 @@ namespace SwrJit AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, Name); } @@ -233,7 +287,7 @@ namespace SwrJit AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::LOAD(Ptr, isVolatile, Name); } @@ -277,7 +331,7 @@ namespace SwrJit AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, true); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::MASKED_LOAD(Ptr, Align, Mask, PassThru, Name, Ty, usage); } @@ -287,7 +341,7 @@ namespace SwrJit AssertGFXMemoryParams(Ptr, usage); TrackerHelper(Ptr, Ty, usage, false); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::STORE(Val, Ptr, isVolatile, Ty, usage); } @@ -300,7 +354,7 @@ namespace SwrJit AssertGFXMemoryParams(BasePtr, usage); TrackerHelper(BasePtr, Ty, usage, false); - BasePtr = TranslationHelper(BasePtr, Ty); + BasePtr = TranslationHelper(BasePtr, Ty, mpfnTranslateGfxAddressForRead); return Builder::STORE(Val, BasePtr, offset, Ty, usage); } @@ -311,7 +365,7 @@ namespace SwrJit TrackerHelper(Ptr, Ty, usage, false); - Ptr = TranslationHelper(Ptr, Ty); + Ptr = TranslationHelper(Ptr, Ty, mpfnTranslateGfxAddressForRead); return Builder::MASKED_STORE(Val, Ptr, Align, Mask, Ty, usage); } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h index b6e8ed1d760..b1f662414ab 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_gfx_mem.h @@ -41,7 +41,7 @@ namespace SwrJit BuilderGfxMem(JitManager* pJitMgr); virtual ~BuilderGfxMem() {} - virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); + virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* Ptr, const std::initializer_list& indexList, Type* Ty = nullptr); @@ -76,7 +76,7 @@ namespace SwrJit MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual StoreInst* STORE(Value *Val, Value *Ptr, bool isVolatile = false, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - + virtual StoreInst* STORE(Value* Val, Value* BasePtr, const std::initializer_list& offset, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); virtual CallInst* MASKED_STORE(Value *Val, Value *Ptr, unsigned Align, Value *Mask, Type* Ty = nullptr, MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); @@ -108,7 +108,7 @@ namespace SwrJit Type* PtrTy = nullptr, const Twine& Name = "", MEM_CLIENT usage = MEM_CLIENT::MEM_CLIENT_INTERNAL); - + protected: void AssertGFXMemoryParams(Value* ptr, MEM_CLIENT usage); @@ -116,7 +116,7 @@ namespace SwrJit virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); - Value* TranslationHelper(Value* Ptr, Type* Ty); + Value* TranslationHelper(Value* Ptr, Type* Ty, Value* pfnTranslateGfxAddress); void TrackerHelper(Value* Ptr, Type* Ty, MEM_CLIENT usage, bool isRead); FunctionType* GetTranslationFunctionType() { return mpTranslationFuncTy; } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp index 3ec2cb32522..b183a9e0082 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp @@ -41,7 +41,7 @@ namespace SwrJit "Address appears to be GFX access. Requires translation through BuilderGfxMem."); } - Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, const Twine& Name) + Value* Builder::GEP(Value* Ptr, Value* Idx, Type* Ty, bool isReadOnly, const Twine& Name) { return IRB()->CreateGEP(Ptr, Idx, Name); } diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h index e548d8dd138..934a8279c2f 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h +++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h @@ -45,7 +45,7 @@ virtual Value* OFFSET_TO_NEXT_COMPONENT(Value* base, Constant* offset); void AssertMemoryUsageParams(Value* ptr, MEM_CLIENT usage); public: -virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, const Twine& Name = ""); +virtual Value* GEP(Value* Ptr, Value* Idx, Type* Ty = nullptr, bool isReadOnly = true, const Twine& Name = ""); virtual Value* GEP(Type* Ty, Value* Ptr, Value* Idx, const Twine& Name = ""); virtual Value* GEP(Value* ptr, const std::initializer_list& indexList, Type* Ty = nullptr); virtual Value* -- 2.30.2