X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fperf%2Fgen_perf.h;h=790719ccbadad3960acea2d5f1d7e1af2d3f39ec;hb=HEAD;hp=a3e08d1cb712fc02f08fd0a8cc2cdfad36c68ff4;hpb=2df1ab4d48027c642abe316a10ad39fb399f3d60;p=mesa.git diff --git a/src/intel/perf/gen_perf.h b/src/intel/perf/gen_perf.h index a3e08d1cb71..790719ccbad 100644 --- a/src/intel/perf/gen_perf.h +++ b/src/intel/perf/gen_perf.h @@ -25,32 +25,27 @@ #define GEN_PERF_H #include +#include #include #include +#if defined(MAJOR_IN_SYSMACROS) #include +#elif defined(MAJOR_IN_MKDEV) +#include +#endif #include "util/hash_table.h" #include "compiler/glsl/list.h" #include "util/ralloc.h" +#include "drm-uapi/i915_drm.h" + struct gen_device_info; struct gen_perf_config; struct gen_perf_query_info; -#define GEN7_RPSTAT1 0xA01C -#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7 -#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7) -#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0 -#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0) - -#define GEN9_RPSTAT0 0xA01C -#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23 -#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23) -#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0 -#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0) - enum gen_perf_counter_type { GEN_PERF_COUNTER_TYPE_EVENT, GEN_PERF_COUNTER_TYPE_DURATION_NORM, @@ -68,6 +63,39 @@ enum gen_perf_counter_data_type { GEN_PERF_COUNTER_DATA_TYPE_DOUBLE, }; +enum gen_perf_counter_units { + /* size */ + GEN_PERF_COUNTER_UNITS_BYTES, + + /* frequency */ + GEN_PERF_COUNTER_UNITS_HZ, + + /* time */ + GEN_PERF_COUNTER_UNITS_NS, + GEN_PERF_COUNTER_UNITS_US, + + /**/ + GEN_PERF_COUNTER_UNITS_PIXELS, + GEN_PERF_COUNTER_UNITS_TEXELS, + GEN_PERF_COUNTER_UNITS_THREADS, + GEN_PERF_COUNTER_UNITS_PERCENT, + + /* events */ + GEN_PERF_COUNTER_UNITS_MESSAGES, + GEN_PERF_COUNTER_UNITS_NUMBER, + GEN_PERF_COUNTER_UNITS_CYCLES, + GEN_PERF_COUNTER_UNITS_EVENTS, + GEN_PERF_COUNTER_UNITS_UTILIZATION, + + /**/ + GEN_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES, + GEN_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES, + GEN_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES, + GEN_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE, + + GEN_PERF_COUNTER_UNITS_MAX +}; + struct gen_pipeline_stat { uint32_t reg; uint32_t numerator; @@ -83,19 +111,6 @@ struct gen_pipeline_stat { */ #define MAX_OA_REPORT_COUNTERS 62 -#define IA_VERTICES_COUNT 0x2310 -#define IA_PRIMITIVES_COUNT 0x2318 -#define VS_INVOCATION_COUNT 0x2320 -#define HS_INVOCATION_COUNT 0x2300 -#define DS_INVOCATION_COUNT 0x2308 -#define GS_INVOCATION_COUNT 0x2328 -#define GS_PRIMITIVES_COUNT 0x2330 -#define CL_INVOCATION_COUNT 0x2338 -#define CL_PRIMITIVES_COUNT 0x2340 -#define PS_INVOCATION_COUNT 0x2348 -#define CS_INVOCATION_COUNT 0x2290 -#define PS_DEPTH_COUNT 0x2350 - /* * When currently allocate only one page for pipeline statistics queries. Here * we derived the maximum number of counters for that amount. @@ -134,13 +149,26 @@ struct gen_perf_query_result { * query. */ uint64_t unslice_frequency[2]; + + /** + * Timestamp of the query. + */ + uint64_t begin_timestamp; + + /** + * Whether the query was interrupted by another workload (aka preemption). + */ + bool query_disjoint; }; struct gen_perf_query_counter { const char *name; const char *desc; + const char *symbol_name; + const char *category; enum gen_perf_counter_type type; enum gen_perf_counter_data_type data_type; + enum gen_perf_counter_units units; uint64_t raw_max; size_t offset; @@ -160,6 +188,18 @@ struct gen_perf_query_register_prog { uint32_t val; }; +/* Register programming for a given query */ +struct gen_perf_registers { + const struct gen_perf_query_register_prog *flex_regs; + uint32_t n_flex_regs; + + const struct gen_perf_query_register_prog *mux_regs; + uint32_t n_mux_regs; + + const struct gen_perf_query_register_prog *b_counter_regs; + uint32_t n_b_counter_regs; +}; + struct gen_perf_query_info { enum gen_perf_query_type { GEN_PERF_QUERY_TYPE_OA, @@ -167,6 +207,7 @@ struct gen_perf_query_info { GEN_PERF_QUERY_TYPE_PIPELINE, } kind; const char *name; + const char *symbol_name; const char *guid; struct gen_perf_query_counter *counters; int n_counters; @@ -184,21 +225,40 @@ struct gen_perf_query_info { int b_offset; int c_offset; - /* Register programming for a given query */ - struct gen_perf_query_register_prog *flex_regs; - uint32_t n_flex_regs; + struct gen_perf_registers config; +}; - struct gen_perf_query_register_prog *mux_regs; - uint32_t n_mux_regs; +struct gen_perf_query_counter_info { + struct gen_perf_query_counter *counter; - struct gen_perf_query_register_prog *b_counter_regs; - uint32_t n_b_counter_regs; + uint64_t query_mask; + + /** + * Each counter can be a part of many groups, each time at different index. + * This struct stores one of those locations. + */ + struct { + int group_idx; /* query/group number */ + int counter_idx; /* index inside of query/group */ + } location; }; struct gen_perf_config { + /* Whether i915 has DRM_I915_QUERY_PERF_CONFIG support. */ + bool i915_query_supported; + + /* Version of the i915-perf subsystem, refer to i915_drm.h. */ + int i915_perf_version; + + /* Powergating configuration for the running the query. */ + struct drm_i915_gem_context_param_sseu sseu; + struct gen_perf_query_info *queries; int n_queries; + struct gen_perf_query_counter_info *counter_infos; + int n_counters; + /* Variables referenced in the XML meta data for OA performance * counters, e.g in the normalization equations. * @@ -223,6 +283,17 @@ struct gen_perf_config { */ struct hash_table *oa_metrics_table; + /* When MDAPI hasn't configured the metric we need to use by the time the + * query begins, this OA metric is used as a fallback. + */ + uint64_t fallback_raw_oa_metric; + + /* Whether we have support for this platform. If true && n_queries == 0, + * this means we will not be able to use i915-perf because of it is in + * paranoid mode. + */ + bool platform_supported; + /* Location of the device's sysfs entry. */ char sysfs_dev_dir[256]; @@ -234,41 +305,63 @@ struct gen_perf_config { bool (*batch_references)(void *batch, void *bo); void (*bo_wait_rendering)(void *bo); int (*bo_busy)(void *bo); - void (*emit_mi_flush)(void *ctx); + void (*emit_stall_at_pixel_scoreboard)(void *ctx); void (*emit_mi_report_perf_count)(void *ctx, void *bo, uint32_t offset_in_bytes, uint32_t report_id); void (*batchbuffer_flush)(void *ctx, const char *file, int line); - void (*capture_frequency_stat_register)(void *ctx, void *bo, - uint32_t bo_offset); - void (*store_register_mem64)(void *ctx, void *bo, uint32_t reg, uint32_t offset); + void (*store_register_mem)(void *ctx, void *bo, uint32_t reg, uint32_t reg_size, uint32_t offset); } vtbl; }; -struct gen_perf_query_object; -const struct gen_perf_query_info* gen_perf_query_info(const struct gen_perf_query_object *); - -struct gen_perf_context; -struct gen_perf_context *gen_perf_new_context(void *parent); +struct gen_perf_counter_pass { + struct gen_perf_query_info *query; + struct gen_perf_query_counter *counter; + uint32_t pass; +}; void gen_perf_init_metrics(struct gen_perf_config *perf_cfg, const struct gen_device_info *devinfo, - int drm_fd); -void gen_perf_init_context(struct gen_perf_context *perf_ctx, - struct gen_perf_config *perf_cfg, - void * ctx, /* driver context (eg, brw_context) */ - void * bufmgr, /* eg brw_bufmgr */ - const struct gen_device_info *devinfo, - uint32_t hw_ctx, - int drm_fd); + int drm_fd, + bool include_pipeline_statistics); -struct gen_perf_config *gen_perf_config(struct gen_perf_context *ctx); +/** Query i915 for a metric id using guid. + */ +bool gen_perf_load_metric_id(struct gen_perf_config *perf_cfg, + const char *guid, + uint64_t *metric_id); + +/** Load a configuation's content from i915 using a guid. + */ +struct gen_perf_registers *gen_perf_load_configuration(struct gen_perf_config *perf_cfg, + int fd, const char *guid); -int gen_perf_active_queries(struct gen_perf_context *perf_ctx, - const struct gen_perf_query_info *query); +/** Store a configuration into i915 using guid and return a new metric id. + * + * If guid is NULL, then a generated one will be provided by hashing the + * content of the configuration. + */ +uint64_t gen_perf_store_configuration(struct gen_perf_config *perf_cfg, int fd, + const struct gen_perf_registers *config, + const char *guid); + +/** Read the slice/unslice frequency from 2 OA reports and store then into + * result. + */ +void gen_perf_query_result_read_frequencies(struct gen_perf_query_result *result, + const struct gen_device_info *devinfo, + const uint32_t *start, + const uint32_t *end); +/** Accumulate the delta between 2 OA reports into result for a given query. + */ +void gen_perf_query_result_accumulate(struct gen_perf_query_result *result, + const struct gen_perf_query_info *query, + const uint32_t *start, + const uint32_t *end); +void gen_perf_query_result_clear(struct gen_perf_query_result *result); static inline size_t gen_perf_query_counter_get_size(const struct gen_perf_query_counter *counter) @@ -296,31 +389,13 @@ gen_perf_new(void *ctx) return perf; } -struct gen_perf_query_object * -gen_perf_new_query(struct gen_perf_context *, unsigned query_index); - - -bool gen_perf_begin_query(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query); -void gen_perf_end_query(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query); -void gen_perf_wait_query(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query, - void *current_batch); -bool gen_perf_is_query_ready(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query, - void *current_batch); -void gen_perf_delete_query(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query); -void gen_perf_get_query_data(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *query, - int data_size, - unsigned *data, - unsigned *bytes_written); - -void gen_perf_dump_query_count(struct gen_perf_context *perf_ctx); -void gen_perf_dump_query(struct gen_perf_context *perf_ctx, - struct gen_perf_query_object *obj, - void *current_batch); +uint32_t gen_perf_get_n_passes(struct gen_perf_config *perf, + const uint32_t *counter_indices, + uint32_t counter_indices_count, + struct gen_perf_query_info **pass_queries); +void gen_perf_get_counters_passes(struct gen_perf_config *perf, + const uint32_t *counter_indices, + uint32_t counter_indices_count, + struct gen_perf_counter_pass *counter_pass); #endif /* GEN_PERF_H */