From 886a4d4a6ad8a67e02e67d5dd79ae37a592930c6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 6 Sep 2012 00:20:27 -0400 Subject: [PATCH] r300g: Init regalloc state during context creation Initializing the regalloc state is expensive, and since it is always the same for every compile we only need to initialize it once per context. This should help improve shader compile times for the driver. --- .../drivers/r300/compiler/radeon_compiler.c | 4 +- .../drivers/r300/compiler/radeon_compiler.h | 3 +- .../r300/compiler/radeon_pair_regalloc.c | 275 ++++++++---------- .../drivers/r300/compiler/radeon_regalloc.h | 62 ++++ src/gallium/drivers/r300/r300_context.c | 6 + src/gallium/drivers/r300/r300_context.h | 5 + src/gallium/drivers/r300/r300_fs.c | 2 +- src/gallium/drivers/r300/r300_vs.c | 2 +- 8 files changed, 204 insertions(+), 155 deletions(-) create mode 100644 src/gallium/drivers/r300/compiler/radeon_regalloc.h diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.c b/src/gallium/drivers/r300/compiler/radeon_compiler.c index 4d4eb645841..081cd2d0d55 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.c +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.c @@ -29,10 +29,11 @@ #include "radeon_dataflow.h" #include "radeon_program.h" #include "radeon_program_pair.h" +#include "radeon_regalloc.h" #include "radeon_compiler_util.h" -void rc_init(struct radeon_compiler * c) +void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs) { memset(c, 0, sizeof(*c)); @@ -40,6 +41,7 @@ void rc_init(struct radeon_compiler * c) c->Program.Instructions.Prev = &c->Program.Instructions; c->Program.Instructions.Next = &c->Program.Instructions; c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; + c->regalloc_state = rs; } void rc_destroy(struct radeon_compiler * c) diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h index f78a51c19c3..e2710d848c5 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -44,6 +44,7 @@ enum rc_program_type { struct radeon_compiler { struct memory_pool Pool; struct rc_program Program; + const struct rc_regalloc_state *regalloc_state; enum rc_program_type type; unsigned Debug:2; unsigned Error:1; @@ -77,7 +78,7 @@ struct radeon_compiler { unsigned initial_num_insts; /* Number of instructions at start. */ }; -void rc_init(struct radeon_compiler * c); +void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs); void rc_destroy(struct radeon_compiler * c); void rc_debug(struct radeon_compiler * c, const char * fmt, ...); diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c index 12594c81966..0a0ca116deb 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c @@ -39,6 +39,7 @@ #include "radeon_compiler_util.h" #include "radeon_dataflow.h" #include "radeon_list.h" +#include "radeon_regalloc.h" #include "radeon_variable.h" #define VERBOSE 0 @@ -70,47 +71,96 @@ struct regalloc_state { int LoopEnd; }; -enum rc_reg_class { - RC_REG_CLASS_SINGLE, - RC_REG_CLASS_DOUBLE, - RC_REG_CLASS_TRIPLE, - RC_REG_CLASS_ALPHA, - RC_REG_CLASS_SINGLE_PLUS_ALPHA, - RC_REG_CLASS_DOUBLE_PLUS_ALPHA, - RC_REG_CLASS_TRIPLE_PLUS_ALPHA, - RC_REG_CLASS_X, - RC_REG_CLASS_Y, - RC_REG_CLASS_Z, - RC_REG_CLASS_XY, - RC_REG_CLASS_YZ, - RC_REG_CLASS_XZ, - RC_REG_CLASS_XW, - RC_REG_CLASS_YW, - RC_REG_CLASS_ZW, - RC_REG_CLASS_XYW, - RC_REG_CLASS_YZW, - RC_REG_CLASS_XZW, - RC_REG_CLASS_COUNT -}; - struct rc_class { - enum rc_reg_class Class; + enum rc_reg_class ID; unsigned int WritemaskCount; - /** This is 1 if this class is being used by the register allocator - * and 0 otherwise */ - unsigned int Used; - - /** This is the ID number assigned to this class by ra. */ - unsigned int Id; - /** List of writemasks that belong to this class */ unsigned int Writemasks[3]; }; +static const struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} +}; + static void print_live_intervals(struct live_intervals * src) { if (!src || !src->Used) { @@ -234,7 +284,7 @@ static unsigned int is_derivative(rc_opcode op) } static int find_class( - struct rc_class * classes, + const struct rc_class * classes, unsigned int writemask, unsigned int max_writemask_count) { @@ -274,7 +324,7 @@ static void variable_get_class_read_cb( static enum rc_reg_class variable_get_class( struct rc_variable * variable, - struct rc_class * classes) + const struct rc_class * classes) { unsigned int i; unsigned int can_change_writemask= 1; @@ -380,7 +430,7 @@ static enum rc_reg_class variable_get_class( can_change_writemask ? 3 : 1); done: if (class_index > -1) { - return classes[class_index].Class; + return classes[class_index].ID; } else { error: rc_error(variable->C, @@ -458,95 +508,14 @@ static void add_register_conflicts( static void do_advanced_regalloc(struct regalloc_state * s) { - struct rc_class rc_class_list [] = { - {RC_REG_CLASS_SINGLE, 3, 0, 0, - {RC_MASK_X, - RC_MASK_Y, - RC_MASK_Z}}, - {RC_REG_CLASS_DOUBLE, 3, 0, 0, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_X | RC_MASK_Z, - RC_MASK_Y | RC_MASK_Z}}, - {RC_REG_CLASS_TRIPLE, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_ALPHA, 1, 0, 0, - {RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, - {RC_MASK_X | RC_MASK_W, - RC_MASK_Y | RC_MASK_W, - RC_MASK_Z | RC_MASK_W}}, - {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, - {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_X, 1, 0, 0, - {RC_MASK_X, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_Y, 1, 0, 0, - {RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_Z, 1, 0, 0, - {RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XY, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YZ, 1, 0, 0, - {RC_MASK_Y | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XZ, 1, 0, 0, - {RC_MASK_X | RC_MASK_Z, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XW, 1, 0, 0, - {RC_MASK_X | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YW, 1, 0, 0, - {RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_ZW, 1, 0, 0, - {RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XYW, 1, 0, 0, - {RC_MASK_X | RC_MASK_Y | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_YZW, 1, 0, 0, - {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}}, - {RC_REG_CLASS_XZW, 1, 0, 0, - {RC_MASK_X | RC_MASK_Z | RC_MASK_W, - RC_MASK_NONE, - RC_MASK_NONE}} - }; - - unsigned int i, j, index, input_node, node_count, node_index; + + unsigned int i, input_node, node_count, node_index; unsigned int * node_classes; struct rc_instruction * inst; struct rc_list * var_ptr; struct rc_list * variables; - struct ra_regs * regs; struct ra_graph * graph; - - /* Allocate the main ra data structure */ - regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW); + const struct rc_regalloc_state *ra_state = s->C->regalloc_state; /* Get list of program variables */ variables = rc_get_variables(s->C); @@ -561,37 +530,10 @@ static void do_advanced_regalloc(struct regalloc_state * s) rc_variable_compute_live_intervals(var_ptr->Item); class_index = variable_get_class(var_ptr->Item, rc_class_list); - - /* If we haven't used this register class yet, mark it - * as used and allocate space for it. */ - if (!rc_class_list[class_index].Used) { - rc_class_list[class_index].Used = 1; - rc_class_list[class_index].Id = ra_alloc_reg_class(regs); - } - - node_classes[node_index] = rc_class_list[class_index].Id; + node_classes[node_index] = ra_state->class_ids[class_index]; } - /* Assign registers to the classes */ - for (i = 0; i < RC_REG_CLASS_COUNT; i++) { - struct rc_class class = rc_class_list[i]; - if (!class.Used) { - continue; - } - - for (index = 0; index < s->C->max_temp_regs; index++) { - for (j = 0; j < class.WritemaskCount; j++) { - int reg_id = get_reg_id(index, - class.Writemasks[j]); - ra_class_add_reg(regs, class.Id, reg_id); - } - } - } - - /* Add register conflicts */ - add_register_conflicts(regs, s->C->max_temp_regs); - /* Calculate live intervals for input registers */ for (inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; @@ -609,7 +551,7 @@ static void do_advanced_regalloc(struct regalloc_state * s) /* Compute the writemask for inputs. */ for (i = 0; i < s->NumInputs; i++) { - unsigned int chan, class_id, writemask = 0; + unsigned int chan, writemask = 0; for (chan = 0; chan < 4; chan++) { if (s->Input[i].Live[chan].Used) { writemask |= (1 << chan); @@ -618,9 +560,8 @@ static void do_advanced_regalloc(struct regalloc_state * s) s->Input[i].Writemask = writemask; } - ra_set_finalize(regs, NULL); - - graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); + graph = ra_alloc_interference_graph(ra_state->regs, + node_count + s->NumInputs); /* Build the interference graph */ for (var_ptr = variables, node_index = 0; var_ptr; @@ -691,7 +632,39 @@ static void do_advanced_regalloc(struct regalloc_state * s) } ralloc_free(graph); - ralloc_free(regs); +} + +void rc_init_regalloc_state(struct rc_regalloc_state *s) +{ + unsigned i, j, index; + /* Allocate the main ra data structure */ + s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW); + + /* Create the register classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + const struct rc_class *class = &rc_class_list[i]; + s->class_ids[class->ID] = ra_alloc_reg_class(s->regs); + + /* Assign registers to the classes */ + for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) { + for (j = 0; j < class->WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class->Writemasks[j]); + ra_class_add_reg(s->regs, + s->class_ids[class->ID], reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS); + + ra_set_finalize(s->regs, NULL); +} + +void rc_destroy_regalloc_state(struct rc_regalloc_state *s) +{ + ralloc_free(s->regs); } /** diff --git a/src/gallium/drivers/r300/compiler/radeon_regalloc.h b/src/gallium/drivers/r300/compiler/radeon_regalloc.h new file mode 100644 index 00000000000..260a3caf91d --- /dev/null +++ b/src/gallium/drivers/r300/compiler/radeon_regalloc.h @@ -0,0 +1,62 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Tom Stellard + */ + +#ifndef RADEON_REGALLOC_H +#define RADEON_REGALLOC_H + +struct ra_regs; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_regalloc_state { + struct ra_regs *regs; + unsigned class_ids[RC_REG_CLASS_COUNT]; +}; + +void rc_init_regalloc_state(struct rc_regalloc_state *s); +void rc_destroy_regalloc_state(struct rc_regalloc_state *s); + +#endif /* RADEON_REGALLOC_H */ diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c index 8cbe246f787..76c2e232009 100644 --- a/src/gallium/drivers/r300/r300_context.c +++ b/src/gallium/drivers/r300/r300_context.c @@ -35,6 +35,7 @@ #include "r300_emit.h" #include "r300_screen.h" #include "r300_screen_buffer.h" +#include "compiler/radeon_regalloc.h" static void r300_release_referenced_objects(struct r300_context *r300) { @@ -89,6 +90,8 @@ static void r300_destroy_context(struct pipe_context* context) if (r300->cs) r300->rws->cs_destroy(r300->cs); + rc_destroy_regalloc_state(&r300->fs_regalloc_state); + /* XXX: No way to tell if this was initialized or not? */ util_slab_destroy(&r300->pool_transfers); @@ -466,6 +469,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen, r300->hyperz_time_of_last_flush = os_time_get(); + /* Register allocator state */ + rc_init_regalloc_state(&r300->fs_regalloc_state); + /* Print driver info. */ #ifdef DEBUG { diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h index 2c7b477685a..fe253089094 100644 --- a/src/gallium/drivers/r300/r300_context.h +++ b/src/gallium/drivers/r300/r300_context.h @@ -33,6 +33,7 @@ #include "r300_defines.h" #include "r300_screen.h" +#include "compiler/radeon_regalloc.h" #include "../../winsys/radeon/drm/radeon_winsys.h" struct u_upload_mgr; @@ -622,6 +623,10 @@ struct r300_context { boolean hiz_in_use; /* Whether HIZ is enabled. */ enum r300_hiz_func hiz_func; /* HiZ function. Can be either MIN or MAX. */ uint32_t hiz_clear_value; /* HiZ clear value. */ + + /* Compiler state. */ + struct rc_regalloc_state fs_regalloc_state; /* Register allocator info for + * fragment shaders. */ }; #define foreach_atom(r300, atom) \ diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 8a492d2b40d..0842f9ad5dc 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -442,7 +442,7 @@ static void r300_translate_fragment_shader( /* Setup the compiler. */ memset(&compiler, 0, sizeof(compiler)); - rc_init(&compiler.Base); + rc_init(&compiler.Base, &r300->fs_regalloc_state); DBG_ON(r300, DBG_FP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 4faf2b58b0f..ea0621e2d3e 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -210,7 +210,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, /* Setup the compiler */ memset(&compiler, 0, sizeof(compiler)); - rc_init(&compiler.Base); + rc_init(&compiler.Base, NULL); DBG_ON(r300, DBG_VP) ? compiler.Base.Debug |= RC_DBG_LOG : 0; DBG_ON(r300, DBG_P_STAT) ? compiler.Base.Debug |= RC_DBG_STATS : 0; -- 2.30.2