From ae3870df7043861632aa553e12cc9284a9aef827 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Apr 2015 12:01:09 -0700 Subject: [PATCH] i965: Add a brw_compiler structure and store the register sets in it Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_context.h | 4 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 54 +++++++++--------- src/mesa/drivers/dri/i965/brw_shader.cpp | 13 +++++ src/mesa/drivers/dri/i965/brw_shader.h | 55 +++++++++++++++++++ .../dri/i965/brw_vec4_reg_allocate.cpp | 38 ++++++------- src/mesa/drivers/dri/i965/intel_screen.c | 5 +- src/mesa/drivers/dri/i965/intel_screen.h | 48 +--------------- 7 files changed, 120 insertions(+), 97 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 57249321c07..f79729baf6f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1613,10 +1613,10 @@ void brw_upload_cs_urb_state(struct brw_context *brw); /* brw_fs_reg_allocate.cpp */ -void brw_fs_alloc_reg_sets(struct intel_screen *screen); +void brw_fs_alloc_reg_sets(struct brw_compiler *compiler); /* brw_vec4_reg_allocate.cpp */ -void brw_vec4_alloc_reg_set(struct intel_screen *screen); +void brw_vec4_alloc_reg_set(struct brw_compiler *compiler); /* brw_disasm.c */ int brw_disassemble_inst(FILE *file, const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 92d29a4d530..dc433b096e6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -71,9 +71,9 @@ fs_visitor::assign_regs_trivial() } static void -brw_alloc_reg_set(struct intel_screen *screen, int reg_width) +brw_alloc_reg_set(struct brw_compiler *compiler, int reg_width) { - const struct brw_device_info *devinfo = screen->devinfo; + const struct brw_device_info *devinfo = compiler->devinfo; int base_reg_count = BRW_MAX_GRF; int index = reg_width - 1; @@ -112,9 +112,9 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) class_sizes[class_count++] = 8; } - memset(screen->wm_reg_sets[index].class_to_ra_reg_range, 0, - sizeof(screen->wm_reg_sets[index].class_to_ra_reg_range)); - int *class_to_ra_reg_range = screen->wm_reg_sets[index].class_to_ra_reg_range; + memset(compiler->fs_reg_sets[index].class_to_ra_reg_range, 0, + sizeof(compiler->fs_reg_sets[index].class_to_ra_reg_range)); + int *class_to_ra_reg_range = compiler->fs_reg_sets[index].class_to_ra_reg_range; /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; @@ -144,16 +144,16 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) class_to_ra_reg_range[i] = class_to_ra_reg_range[i-1]; } - uint8_t *ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); - struct ra_regs *regs = ra_alloc_reg_set(screen, ra_reg_count); + uint8_t *ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); + struct ra_regs *regs = ra_alloc_reg_set(compiler, ra_reg_count); if (devinfo->gen >= 6) ra_set_allocate_round_robin(regs); - int *classes = ralloc_array(screen, int, class_count); + int *classes = ralloc_array(compiler, int, class_count); int aligned_pairs_class = -1; /* Allocate space for q values. We allocate class_count + 1 because we * want to leave room for the aligned pairs class if we have it. */ - unsigned int **q_values = ralloc_array(screen, unsigned int *, + unsigned int **q_values = ralloc_array(compiler, unsigned int *, class_count + 1); for (int i = 0; i < class_count + 1; ++i) q_values[i] = ralloc_array(q_values, unsigned int, class_count + 1); @@ -273,20 +273,20 @@ brw_alloc_reg_set(struct intel_screen *screen, int reg_width) ralloc_free(q_values); - screen->wm_reg_sets[index].regs = regs; - for (unsigned i = 0; i < ARRAY_SIZE(screen->wm_reg_sets[index].classes); i++) - screen->wm_reg_sets[index].classes[i] = -1; + compiler->fs_reg_sets[index].regs = regs; + for (unsigned i = 0; i < ARRAY_SIZE(compiler->fs_reg_sets[index].classes); i++) + compiler->fs_reg_sets[index].classes[i] = -1; for (int i = 0; i < class_count; i++) - screen->wm_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; - screen->wm_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; - screen->wm_reg_sets[index].aligned_pairs_class = aligned_pairs_class; + compiler->fs_reg_sets[index].classes[class_sizes[i] - 1] = classes[i]; + compiler->fs_reg_sets[index].ra_reg_to_grf = ra_reg_to_grf; + compiler->fs_reg_sets[index].aligned_pairs_class = aligned_pairs_class; } void -brw_fs_alloc_reg_sets(struct intel_screen *screen) +brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { - brw_alloc_reg_set(screen, 1); - brw_alloc_reg_set(screen, 2); + brw_alloc_reg_set(compiler, 1); + brw_alloc_reg_set(compiler, 2); } static int @@ -524,7 +524,7 @@ fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) bool fs_visitor::assign_regs(bool allow_spilling) { - struct intel_screen *screen = brw->intelScreen; + struct brw_compiler *compiler = brw->intelScreen->compiler; /* Most of this allocation was written for a reg_width of 1 * (dispatch_width == 8). In extending to SIMD16, the code was * left in place and it was converted to have the hardware @@ -534,7 +534,7 @@ fs_visitor::assign_regs(bool allow_spilling) int reg_width = dispatch_width / 8; unsigned hw_reg_mapping[this->alloc.count]; int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width); - int rsi = reg_width - 1; /* Which screen->wm_reg_sets[] to use */ + int rsi = reg_width - 1; /* Which compiler->fs_reg_sets[] to use */ calculate_live_intervals(); int node_count = this->alloc.count; @@ -544,15 +544,15 @@ fs_visitor::assign_regs(bool allow_spilling) if (devinfo->gen >= 7) node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START; struct ra_graph *g = - ra_alloc_interference_graph(screen->wm_reg_sets[rsi].regs, node_count); + ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count); for (unsigned i = 0; i < this->alloc.count; i++) { unsigned size = this->alloc.sizes[i]; int c; - assert(size <= ARRAY_SIZE(screen->wm_reg_sets[rsi].classes) && + assert(size <= ARRAY_SIZE(compiler->fs_reg_sets[rsi].classes) && "Register allocation relies on split_virtual_grfs()"); - c = screen->wm_reg_sets[rsi].classes[size - 1]; + c = compiler->fs_reg_sets[rsi].classes[size - 1]; /* Special case: on pre-GEN6 hardware that supports PLN, the * second operand of a PLN instruction needs to be an @@ -563,10 +563,10 @@ fs_visitor::assign_regs(bool allow_spilling) * any other interpolation modes). So all we need to do is find * that register and set it to the appropriate class. */ - if (screen->wm_reg_sets[rsi].aligned_pairs_class >= 0 && + if (compiler->fs_reg_sets[rsi].aligned_pairs_class >= 0 && this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].file == GRF && this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) { - c = screen->wm_reg_sets[rsi].aligned_pairs_class; + c = compiler->fs_reg_sets[rsi].aligned_pairs_class; } ra_set_node_class(g, i, c); @@ -595,7 +595,7 @@ fs_visitor::assign_regs(bool allow_spilling) */ if (inst->eot) { int size = alloc.sizes[inst->src[0].reg]; - int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1; + int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; ra_set_node_reg(g, inst->src[0].reg, reg); break; } @@ -663,7 +663,7 @@ fs_visitor::assign_regs(bool allow_spilling) for (unsigned i = 0; i < this->alloc.count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = screen->wm_reg_sets[rsi].ra_reg_to_grf[reg]; + hw_reg_mapping[i] = compiler->fs_reg_sets[rsi].ra_reg_to_grf[reg]; this->grf_used = MAX2(this->grf_used, hw_reg_mapping[i] + this->alloc.sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 2e340574079..79f0e1cffa7 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -32,6 +32,19 @@ #include "glsl/glsl_parser_extras.h" #include "main/shaderapi.h" +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) +{ + struct brw_compiler *compiler = rzalloc(mem_ctx, struct brw_compiler); + + compiler->devinfo = devinfo; + + brw_fs_alloc_reg_sets(compiler); + brw_vec4_alloc_reg_set(compiler); + + return compiler; +} + struct gl_shader * brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 72e8be5b11a..ac4e62a9a27 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -36,6 +36,58 @@ #define MAX_SAMPLER_MESSAGE_SIZE 11 #define MAX_VGRF_SIZE 16 +struct brw_compiler { + const struct brw_device_info *devinfo; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used. + */ + int *classes; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + } vec4_reg_set; + + struct { + struct ra_regs *regs; + + /** + * Array of the ra classes for the unaligned contiguous register + * block sizes used, indexed by register size. + */ + int classes[16]; + + /** + * Mapping from classes to ra_reg ranges. Each of the per-size + * classes corresponds to a range of ra_reg nodes. This array stores + * those ranges in the form of first ra_reg in each class and the + * total number of ra_reg elements in the last array element. This + * way the range of the i'th class is given by: + * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) + */ + int class_to_ra_reg_range[17]; + + /** + * Mapping for register-allocated objects in *regs to the first + * GRF for that object. + */ + uint8_t *ra_reg_to_grf; + + /** + * ra class for the aligned pairs we use for PLN, which doesn't + * appear in *classes. + */ + int aligned_pairs_class; + } fs_reg_sets[2]; +}; + enum PACKED register_file { BAD_FILE, GRF, @@ -223,6 +275,9 @@ bool brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg); extern "C" { #endif +struct brw_compiler * +brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); + bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 3f2bb059122..5368a75bc0f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -91,10 +91,10 @@ vec4_visitor::reg_allocate_trivial() } extern "C" void -brw_vec4_alloc_reg_set(struct intel_screen *screen) +brw_vec4_alloc_reg_set(struct brw_compiler *compiler) { int base_reg_count = - screen->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; + compiler->devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF; /* After running split_virtual_grfs(), almost all VGRFs will be of size 1. * SEND-from-GRF sources cannot be split, so we also need classes for each @@ -112,14 +112,14 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) ra_reg_count += base_reg_count - (class_sizes[i] - 1); } - ralloc_free(screen->vec4_reg_set.ra_reg_to_grf); - screen->vec4_reg_set.ra_reg_to_grf = ralloc_array(screen, uint8_t, ra_reg_count); - ralloc_free(screen->vec4_reg_set.regs); - screen->vec4_reg_set.regs = ra_alloc_reg_set(screen, ra_reg_count); - if (screen->devinfo->gen >= 6) - ra_set_allocate_round_robin(screen->vec4_reg_set.regs); - ralloc_free(screen->vec4_reg_set.classes); - screen->vec4_reg_set.classes = ralloc_array(screen, int, class_count); + ralloc_free(compiler->vec4_reg_set.ra_reg_to_grf); + compiler->vec4_reg_set.ra_reg_to_grf = ralloc_array(compiler, uint8_t, ra_reg_count); + ralloc_free(compiler->vec4_reg_set.regs); + compiler->vec4_reg_set.regs = ra_alloc_reg_set(compiler, ra_reg_count); + if (compiler->devinfo->gen >= 6) + ra_set_allocate_round_robin(compiler->vec4_reg_set.regs); + ralloc_free(compiler->vec4_reg_set.classes); + compiler->vec4_reg_set.classes = ralloc_array(compiler, int, class_count); /* Now, add the registers to their classes, and add the conflicts * between them and the base GRF registers (and also each other). @@ -128,19 +128,19 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) unsigned *q_values[MAX_VGRF_SIZE]; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); - screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs); + compiler->vec4_reg_set.classes[i] = ra_alloc_reg_class(compiler->vec4_reg_set.regs); q_values[i] = new unsigned[MAX_VGRF_SIZE]; for (int j = 0; j < class_reg_count; j++) { - ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg); + ra_class_add_reg(compiler->vec4_reg_set.regs, compiler->vec4_reg_set.classes[i], reg); - screen->vec4_reg_set.ra_reg_to_grf[reg] = j; + compiler->vec4_reg_set.ra_reg_to_grf[reg] = j; for (int base_reg = j; base_reg < j + class_sizes[i]; base_reg++) { - ra_add_transitive_reg_conflict(screen->vec4_reg_set.regs, base_reg, reg); + ra_add_transitive_reg_conflict(compiler->vec4_reg_set.regs, base_reg, reg); } reg++; @@ -158,7 +158,7 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) } assert(reg == ra_reg_count); - ra_set_finalize(screen->vec4_reg_set.regs, q_values); + ra_set_finalize(compiler->vec4_reg_set.regs, q_values); for (int i = 0; i < MAX_VGRF_SIZE; i++) delete[] q_values[i]; @@ -191,7 +191,7 @@ vec4_visitor::setup_payload_interference(struct ra_graph *g, bool vec4_visitor::reg_allocate() { - struct intel_screen *screen = brw->intelScreen; + struct brw_compiler *compiler = brw->intelScreen->compiler; unsigned int hw_reg_mapping[alloc.count]; int payload_reg_count = this->first_non_payload_grf; @@ -207,12 +207,12 @@ vec4_visitor::reg_allocate() int first_payload_node = node_count; node_count += payload_reg_count; struct ra_graph *g = - ra_alloc_interference_graph(screen->vec4_reg_set.regs, node_count); + ra_alloc_interference_graph(compiler->vec4_reg_set.regs, node_count); for (unsigned i = 0; i < alloc.count; i++) { int size = this->alloc.sizes[i]; assert(size >= 1 && size <= MAX_VGRF_SIZE); - ra_set_node_class(g, i, screen->vec4_reg_set.classes[size - 1]); + ra_set_node_class(g, i, compiler->vec4_reg_set.classes[size - 1]); for (unsigned j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { @@ -248,7 +248,7 @@ vec4_visitor::reg_allocate() for (unsigned i = 0; i < alloc.count; i++) { int reg = ra_get_node_reg(g, i); - hw_reg_mapping[i] = screen->vec4_reg_set.ra_reg_to_grf[reg]; + hw_reg_mapping[i] = compiler->vec4_reg_set.ra_reg_to_grf[reg]; prog_data->total_grf = MAX2(prog_data->total_grf, hw_reg_mapping[i] + alloc.sizes[i]); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index f5e15a06664..92e638f7b75 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -38,6 +38,7 @@ #include "main/version.h" #include "swrast/s_renderbuffer.h" #include "util/ralloc.h" +#include "brw_shader.h" #include "utils.h" #include "xmlpool.h" @@ -1406,8 +1407,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) psp->extensions = !intelScreen->has_context_reset_notification ? intelScreenExtensions : intelRobustScreenExtensions; - brw_fs_alloc_reg_sets(intelScreen); - brw_vec4_alloc_reg_set(intelScreen); + intelScreen->compiler = brw_compiler_create(intelScreen, + intelScreen->devinfo); return (const __DRIconfig**) intel_screen_make_configs(psp); } diff --git a/src/mesa/drivers/dri/i965/intel_screen.h b/src/mesa/drivers/dri/i965/intel_screen.h index 393315ea292..f814ed017b1 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.h +++ b/src/mesa/drivers/dri/i965/intel_screen.h @@ -63,53 +63,7 @@ struct intel_screen int winsys_msaa_samples_override; - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used. - */ - int *classes; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - } vec4_reg_set; - - struct { - struct ra_regs *regs; - - /** - * Array of the ra classes for the unaligned contiguous register - * block sizes used, indexed by register size. - */ - int classes[16]; - - /** - * Mapping from classes to ra_reg ranges. Each of the per-size - * classes corresponds to a range of ra_reg nodes. This array stores - * those ranges in the form of first ra_reg in each class and the - * total number of ra_reg elements in the last array element. This - * way the range of the i'th class is given by: - * [ class_to_ra_reg_range[i], class_to_ra_reg_range[i+1] ) - */ - int class_to_ra_reg_range[17]; - - /** - * Mapping for register-allocated objects in *regs to the first - * GRF for that object. - */ - uint8_t *ra_reg_to_grf; - - /** - * ra class for the aligned pairs we use for PLN, which doesn't - * appear in *classes. - */ - int aligned_pairs_class; - } wm_reg_sets[2]; + struct brw_compiler *compiler; /** * Configuration cache with default values for all contexts -- 2.30.2