X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_performance_query.h;h=66b32c0490b5fb275aa0e4e6db0bf58d27e8d7cc;hb=bb84fa146f2252f22999205a2904d8a948bffd3b;hp=11938b74df365e4e83dbdba9acb87d30ad30fb3f;hpb=8c8761b237d35d1a39ec9535e69d6aaa97b99c84;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h index 11938b74df3..66b32c0490b 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.h +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h @@ -27,33 +27,201 @@ #include #include "brw_context.h" +#include "brw_performance_query_metrics.h" -struct brw_pipeline_stat -{ - uint32_t reg; - uint32_t numerator; - uint32_t denominator; -}; +/* + * When currently allocate only one page for pipeline statistics queries. Here + * we derived the maximum number of counters for that amount. + */ +#define STATS_BO_SIZE 4096 +#define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) +#define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) + +/* + * The largest OA formats we can use include: + * For Haswell: + * 1 timestamp, 45 A counters, 8 B counters and 8 C counters. + * For Gen8+ + * 1 timestamp, 1 clock, 36 A counters, 8 B counters and 8 C counters + */ +#define MAX_OA_REPORT_COUNTERS 62 -struct brw_perf_query_counter +/** + * i965 representation of a performance query object. + * + * NB: We want to keep this structure relatively lean considering that + * applications may expect to allocate enough objects to be able to + * query around all draw calls in a frame. + */ +struct brw_perf_query_object { - const char *name; - const char *desc; - GLenum type; - GLenum data_type; - uint64_t raw_max; - size_t offset; - size_t size; + struct gl_perf_query_object base; + + const struct brw_perf_query_info *query; + /* See query->kind to know which state below is in use... */ union { - uint64_t (*oa_counter_read_uint64)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - float (*oa_counter_read_float)(struct brw_context *brw, - const struct brw_perf_query_info *query, - uint64_t *accumulator); - struct brw_pipeline_stat pipeline_stat; + struct { + + /** + * BO containing OA counter snapshots at query Begin/End time. + */ + struct brw_bo *bo; + + /** + * Address of mapped of @bo + */ + void *map; + + /** + * The MI_REPORT_PERF_COUNT command lets us specify a unique + * ID that will be reflected in the resulting OA report + * that's written by the GPU. This is the ID we're expecting + * in the begin report and the the end report should be + * @begin_report_id + 1. + */ + int begin_report_id; + + /** + * Reference the head of the brw->perfquery.sample_buffers + * list at the time that the query started (so we only need + * to look at nodes after this point when looking for samples + * related to this query) + * + * (See struct brw_oa_sample_buf description for more details) + */ + struct exec_node *samples_head; + + /** + * Storage for the final accumulated OA counters. + */ + uint64_t accumulator[MAX_OA_REPORT_COUNTERS]; + + /** + * Hw ID used by the context on which the query was running. + */ + uint32_t hw_id; + + /** + * false while in the unaccumulated_elements list, and set to + * true when the final, end MI_RPC snapshot has been + * accumulated. + */ + bool results_accumulated; + + /** + * Number of reports accumulated to produce the results. + */ + uint32_t reports_accumulated; + + /** + * Frequency of the GT at begin and end of the query. + */ + uint64_t gt_frequency[2]; + + /** + * Frequency in the slices of the GT at the begin and end of the + * query. + */ + uint64_t slice_frequency[2]; + + /** + * Frequency in the unslice of the GT at the begin and end of the + * query. + */ + uint64_t unslice_frequency[2]; + } oa; + + struct { + /** + * BO containing starting and ending snapshots for the + * statistics counters. + */ + struct brw_bo *bo; + } pipeline_stats; }; }; +static inline struct brw_perf_query_info * +brw_perf_query_append_query_info(struct brw_context *brw) +{ + brw->perfquery.queries = + reralloc(brw, brw->perfquery.queries, + struct brw_perf_query_info, ++brw->perfquery.n_queries); + + return &brw->perfquery.queries[brw->perfquery.n_queries - 1]; +} + +static inline void +brw_perf_query_info_add_stat_reg(struct brw_perf_query_info *query, + uint32_t reg, + uint32_t numerator, + uint32_t denominator, + const char *name, + const char *description) +{ + struct brw_perf_query_counter *counter; + + assert(query->n_counters < MAX_STAT_COUNTERS); + + counter = &query->counters[query->n_counters]; + counter->name = name; + counter->desc = description; + counter->type = GL_PERFQUERY_COUNTER_RAW_INTEL; + counter->data_type = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL; + counter->size = sizeof(uint64_t); + counter->offset = sizeof(uint64_t) * query->n_counters; + counter->pipeline_stat.reg = reg; + counter->pipeline_stat.numerator = numerator; + counter->pipeline_stat.denominator = denominator; + + query->n_counters++; +} + +static inline void +brw_perf_query_info_add_basic_stat_reg(struct brw_perf_query_info *query, + uint32_t reg, const char *name) +{ + brw_perf_query_info_add_stat_reg(query, reg, 1, 1, name, name); +} + +/* Accumulate 32bits OA counters */ +static inline void +brw_perf_query_accumulate_uint32(const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + *accumulator += (uint32_t)(*report1 - *report0); +} + +/* Accumulate 40bits OA counters */ +static inline void +brw_perf_query_accumulate_uint40(int a_index, + const uint32_t *report0, + const uint32_t *report1, + uint64_t *accumulator) +{ + const uint8_t *high_bytes0 = (uint8_t *)(report0 + 40); + const uint8_t *high_bytes1 = (uint8_t *)(report1 + 40); + uint64_t high0 = (uint64_t)(high_bytes0[a_index]) << 32; + uint64_t high1 = (uint64_t)(high_bytes1[a_index]) << 32; + uint64_t value0 = report0[a_index + 4] | high0; + uint64_t value1 = report1[a_index + 4] | high1; + uint64_t delta; + + if (value0 > value1) + delta = (1ULL << 40) + value1 - value0; + else + delta = value1 - value0; + + *accumulator += delta; +} + +int brw_perf_query_get_mdapi_oa_data(struct brw_context *brw, + struct brw_perf_query_object *obj, + size_t data_size, + uint8_t *data); +void brw_perf_query_register_mdapi_oa_query(struct brw_context *brw); +void brw_perf_query_register_mdapi_statistic_query(struct brw_context *brw); + #endif /* BRW_PERFORMANCE_QUERY_H */