From b14e15e26a8749935a28a7988e6dcd52a9454dc5 Mon Sep 17 00:00:00 2001 From: Mark Janes Date: Wed, 29 May 2019 15:31:58 -0700 Subject: [PATCH] intel/perf: move oa_sample_buf structure to perf oa_sample_buf holds the data provided by the kernel that will be collated into performance metrics. Since this functionality will be implemented in perf, the struct needs to be defined there. Reviewed-by: Kenneth Graunke --- src/intel/perf/gen_perf.h | 125 ++++++++++++++ .../drivers/dri/i965/brw_performance_query.c | 155 ++---------------- 2 files changed, 141 insertions(+), 139 deletions(-) diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index 2a8d9310f22..cb4d0d2da6c 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -31,6 +31,7 @@ #include #include "util/hash_table.h" +#include "compiler/glsl/list.h" #include "util/ralloc.h" struct gen_device_info; @@ -91,6 +92,9 @@ struct gen_pipeline_stat { #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) +#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ + 256) /* OA counter report */ + struct gen_perf_query_result { /** * Storage for the final accumulated OA counters. @@ -224,6 +228,127 @@ struct gen_perf_config { } vtbl; }; +/** + * Periodic OA samples are read() into these buffer structures via the + * i915 perf kernel interface and appended to the + * brw->perfquery.sample_buffers linked list. When we process the + * results of an OA metrics query we need to consider all the periodic + * samples between the Begin and End MI_REPORT_PERF_COUNT command + * markers. + * + * 'Periodic' is a simplification as there are other automatic reports + * written by the hardware also buffered here. + * + * Considering three queries, A, B and C: + * + * Time ----> + * ________________A_________________ + * | | + * | ________B_________ _____C___________ + * | | | | | | + * + * And an illustration of sample buffers read over this time frame: + * [HEAD ][ ][ ][ ][ ][ ][ ][ ][TAIL ] + * + * These nodes may hold samples for query A: + * [ ][ ][ A ][ A ][ A ][ A ][ A ][ ][ ] + * + * These nodes may hold samples for query B: + * [ ][ ][ B ][ B ][ B ][ ][ ][ ][ ] + * + * These nodes may hold samples for query C: + * [ ][ ][ ][ ][ ][ C ][ C ][ C ][ ] + * + * The illustration assumes we have an even distribution of periodic + * samples so all nodes have the same size plotted against time: + * + * Note, to simplify code, the list is never empty. + * + * With overlapping queries we can see that periodic OA reports may + * relate to multiple queries and care needs to be take to keep + * track of sample buffers until there are no queries that might + * depend on their contents. + * + * We use a node ref counting system where a reference ensures that a + * node and all following nodes can't be freed/recycled until the + * reference drops to zero. + * + * E.g. with a ref of one here: + * [ 0 ][ 0 ][ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] + * + * These nodes could be freed or recycled ("reaped"): + * [ 0 ][ 0 ] + * + * These must be preserved until the leading ref drops to zero: + * [ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] + * + * When a query starts we take a reference on the current tail of + * the list, knowing that no already-buffered samples can possibly + * relate to the newly-started query. A pointer to this node is + * also saved in the query object's ->oa.samples_head. + * + * E.g. starting query A while there are two nodes in .sample_buffers: + * ________________A________ + * | + * + * [ 0 ][ 1 ] + * ^_______ Add a reference and store pointer to node in + * A->oa.samples_head + * + * Moving forward to when the B query starts with no new buffer nodes: + * (for reference, i915 perf reads() are only done when queries finish) + * ________________A_______ + * | ________B___ + * | | + * + * [ 0 ][ 2 ] + * ^_______ Add a reference and store pointer to + * node in B->oa.samples_head + * + * Once a query is finished, after an OA query has become 'Ready', + * once the End OA report has landed and after we we have processed + * all the intermediate periodic samples then we drop the + * ->oa.samples_head reference we took at the start. + * + * So when the B query has finished we have: + * ________________A________ + * | ______B___________ + * | | | + * [ 0 ][ 1 ][ 0 ][ 0 ][ 0 ] + * ^_______ Drop B->oa.samples_head reference + * + * We still can't free these due to the A->oa.samples_head ref: + * [ 1 ][ 0 ][ 0 ][ 0 ] + * + * When the A query finishes: (note there's a new ref for C's samples_head) + * ________________A_________________ + * | | + * | _____C_________ + * | | | + * [ 0 ][ 0 ][ 0 ][ 0 ][ 1 ][ 0 ][ 0 ] + * ^_______ Drop A->oa.samples_head reference + * + * And we can now reap these nodes up to the C->oa.samples_head: + * [ X ][ X ][ X ][ X ] + * keeping -> [ 1 ][ 0 ][ 0 ] + * + * We reap old sample buffers each time we finish processing an OA + * query by iterating the sample_buffers list from the head until we + * find a referenced node and stop. + * + * Reaped buffers move to a perfquery.free_sample_buffers list and + * when we come to read() we first look to recycle a buffer from the + * free_sample_buffers list before allocating a new buffer. + */ +struct oa_sample_buf { + struct exec_node link; + int refcount; + int len; + uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10]; + uint32_t last_timestamp; +}; + + static inline size_t gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) { diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 6f7ab86d42b..0ecdac48253 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -86,129 +86,6 @@ #define OAREPORT_REASON_CTX_SWITCH (1<<3) #define OAREPORT_REASON_GO_TRANSITION (1<<4) -#define I915_PERF_OA_SAMPLE_SIZE (8 + /* drm_i915_perf_record_header */ \ - 256) /* OA counter report */ - -/** - * Periodic OA samples are read() into these buffer structures via the - * i915 perf kernel interface and appended to the - * brw->perfquery.sample_buffers linked list. When we process the - * results of an OA metrics query we need to consider all the periodic - * samples between the Begin and End MI_REPORT_PERF_COUNT command - * markers. - * - * 'Periodic' is a simplification as there are other automatic reports - * written by the hardware also buffered here. - * - * Considering three queries, A, B and C: - * - * Time ----> - * ________________A_________________ - * | | - * | ________B_________ _____C___________ - * | | | | | | - * - * And an illustration of sample buffers read over this time frame: - * [HEAD ][ ][ ][ ][ ][ ][ ][ ][TAIL ] - * - * These nodes may hold samples for query A: - * [ ][ ][ A ][ A ][ A ][ A ][ A ][ ][ ] - * - * These nodes may hold samples for query B: - * [ ][ ][ B ][ B ][ B ][ ][ ][ ][ ] - * - * These nodes may hold samples for query C: - * [ ][ ][ ][ ][ ][ C ][ C ][ C ][ ] - * - * The illustration assumes we have an even distribution of periodic - * samples so all nodes have the same size plotted against time: - * - * Note, to simplify code, the list is never empty. - * - * With overlapping queries we can see that periodic OA reports may - * relate to multiple queries and care needs to be take to keep - * track of sample buffers until there are no queries that might - * depend on their contents. - * - * We use a node ref counting system where a reference ensures that a - * node and all following nodes can't be freed/recycled until the - * reference drops to zero. - * - * E.g. with a ref of one here: - * [ 0 ][ 0 ][ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] - * - * These nodes could be freed or recycled ("reaped"): - * [ 0 ][ 0 ] - * - * These must be preserved until the leading ref drops to zero: - * [ 1 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ][ 0 ] - * - * When a query starts we take a reference on the current tail of - * the list, knowing that no already-buffered samples can possibly - * relate to the newly-started query. A pointer to this node is - * also saved in the query object's ->oa.samples_head. - * - * E.g. starting query A while there are two nodes in .sample_buffers: - * ________________A________ - * | - * - * [ 0 ][ 1 ] - * ^_______ Add a reference and store pointer to node in - * A->oa.samples_head - * - * Moving forward to when the B query starts with no new buffer nodes: - * (for reference, i915 perf reads() are only done when queries finish) - * ________________A_______ - * | ________B___ - * | | - * - * [ 0 ][ 2 ] - * ^_______ Add a reference and store pointer to - * node in B->oa.samples_head - * - * Once a query is finished, after an OA query has become 'Ready', - * once the End OA report has landed and after we we have processed - * all the intermediate periodic samples then we drop the - * ->oa.samples_head reference we took at the start. - * - * So when the B query has finished we have: - * ________________A________ - * | ______B___________ - * | | | - * [ 0 ][ 1 ][ 0 ][ 0 ][ 0 ] - * ^_______ Drop B->oa.samples_head reference - * - * We still can't free these due to the A->oa.samples_head ref: - * [ 1 ][ 0 ][ 0 ][ 0 ] - * - * When the A query finishes: (note there's a new ref for C's samples_head) - * ________________A_________________ - * | | - * | _____C_________ - * | | | - * [ 0 ][ 0 ][ 0 ][ 0 ][ 1 ][ 0 ][ 0 ] - * ^_______ Drop A->oa.samples_head reference - * - * And we can now reap these nodes up to the C->oa.samples_head: - * [ X ][ X ][ X ][ X ] - * keeping -> [ 1 ][ 0 ][ 0 ] - * - * We reap old sample buffers each time we finish processing an OA - * query by iterating the sample_buffers list from the head until we - * find a referenced node and stop. - * - * Reaped buffers move to a perfquery.free_sample_buffers list and - * when we come to read() we first look to recycle a buffer from the - * free_sample_buffers list before allocating a new buffer. - */ -struct brw_oa_sample_buf { - struct exec_node link; - int refcount; - int len; - uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10]; - uint32_t last_timestamp; -}; - /** Downcasting convenience macro. */ static inline struct brw_perf_query_object * brw_perf_query(struct gl_perf_query_object *o) @@ -304,14 +181,14 @@ dump_perf_queries(struct brw_context *brw) /******************************************************************************/ -static struct brw_oa_sample_buf * +static struct oa_sample_buf * get_free_sample_buf(struct brw_context *brw) { struct exec_node *node = exec_list_pop_head(&brw->perfquery.free_sample_buffers); - struct brw_oa_sample_buf *buf; + struct oa_sample_buf *buf; if (node) - buf = exec_node_data(struct brw_oa_sample_buf, node, link); + buf = exec_node_data(struct oa_sample_buf, node, link); else { buf = ralloc_size(brw, sizeof(*buf)); @@ -328,15 +205,15 @@ reap_old_sample_buffers(struct brw_context *brw) { struct exec_node *tail_node = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *tail_buf = - exec_node_data(struct brw_oa_sample_buf, tail_node, link); + struct oa_sample_buf *tail_buf = + exec_node_data(struct oa_sample_buf, tail_node, link); /* Remove all old, unreferenced sample buffers walking forward from * the head of the list, except always leave at least one node in * the list so we always have a node to reference when we Begin * a new query. */ - foreach_list_typed_safe(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_safe(struct oa_sample_buf, buf, link, &brw->perfquery.sample_buffers) { if (buf->refcount == 0 && buf != tail_buf) { @@ -350,7 +227,7 @@ reap_old_sample_buffers(struct brw_context *brw) static void free_sample_bufs(struct brw_context *brw) { - foreach_list_typed_safe(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_safe(struct oa_sample_buf, buf, link, &brw->perfquery.free_sample_buffers) ralloc_free(buf); @@ -532,8 +409,8 @@ drop_from_unaccumulated_query_list(struct brw_context *brw, * referenced by any other queries... */ - struct brw_oa_sample_buf *buf = - exec_node_data(struct brw_oa_sample_buf, obj->oa.samples_head, link); + struct oa_sample_buf *buf = + exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link); assert(buf->refcount > 0); buf->refcount--; @@ -604,12 +481,12 @@ read_oa_samples_until(struct brw_context *brw, { struct exec_node *tail_node = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *tail_buf = - exec_node_data(struct brw_oa_sample_buf, tail_node, link); + struct oa_sample_buf *tail_buf = + exec_node_data(struct oa_sample_buf, tail_node, link); uint32_t last_timestamp = tail_buf->last_timestamp; while (1) { - struct brw_oa_sample_buf *buf = get_free_sample_buf(brw); + struct oa_sample_buf *buf = get_free_sample_buf(brw); uint32_t offset; int len; @@ -764,7 +641,7 @@ accumulate_oa_reports(struct brw_context *brw, */ first_samples_node = obj->oa.samples_head->next; - foreach_list_typed_from(struct brw_oa_sample_buf, buf, link, + foreach_list_typed_from(struct oa_sample_buf, buf, link, &brw->perfquery.sample_buffers, first_samples_node) { @@ -1162,8 +1039,8 @@ brw_begin_perf_query(struct gl_context *ctx, assert(!exec_list_is_empty(&brw->perfquery.sample_buffers)); obj->oa.samples_head = exec_list_get_tail(&brw->perfquery.sample_buffers); - struct brw_oa_sample_buf *buf = - exec_node_data(struct brw_oa_sample_buf, obj->oa.samples_head, link); + struct oa_sample_buf *buf = + exec_node_data(struct oa_sample_buf, obj->oa.samples_head, link); /* This reference will ensure that future/following sample * buffers (that may relate to this query) can't be freed until @@ -1805,7 +1682,7 @@ brw_init_perf_query_info(struct gl_context *ctx) * Begin an OA query we can always take a reference on a buffer * in this list. */ - struct brw_oa_sample_buf *buf = get_free_sample_buf(brw); + struct oa_sample_buf *buf = get_free_sample_buf(brw); exec_list_push_head(&brw->perfquery.sample_buffers, &buf->link); brw->perfquery.oa_stream_fd = -1; -- 2.30.2