From fe622bac0c1b5b9f2a9fcf9f35b51232a06bea42 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 11 Jan 2011 00:05:08 -0800 Subject: [PATCH] r300/compiler: Rewrite register allocator The new allocator uses ra and does swizzle packing. Also, a data structure (struct rc_variable) and associated functions have been added for generating UD and DU chains. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c | 1 + src/mesa/drivers/dri/r300/compiler/Makefile | 3 + src/mesa/drivers/dri/r300/compiler/SConscript | 2 + .../dri/r300/compiler/r300_fragprog_emit.c | 2 +- .../drivers/dri/r300/compiler/r3xx_fragprog.c | 3 +- .../dri/r300/compiler/r500_fragprog_emit.c | 9 +- .../dri/r300/compiler/radeon_compiler_util.c | 256 ++++++ .../dri/r300/compiler/radeon_compiler_util.h | 25 + .../dri/r300/compiler/radeon_dataflow.c | 57 +- .../dri/r300/compiler/radeon_dataflow.h | 10 + .../drivers/dri/r300/compiler/radeon_list.c | 88 +++ .../drivers/dri/r300/compiler/radeon_list.h | 46 ++ .../dri/r300/compiler/radeon_pair_regalloc.c | 742 ++++++++++++------ .../dri/r300/compiler/radeon_program.h | 3 + .../r300/compiler/radeon_program_constants.h | 1 + .../dri/r300/compiler/radeon_program_pair.c | 14 + .../dri/r300/compiler/radeon_program_pair.h | 4 + .../dri/r300/compiler/radeon_variable.c | 434 ++++++++++ .../dri/r300/compiler/radeon_variable.h | 84 ++ 19 files changed, 1548 insertions(+), 236 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_list.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_list.h create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_variable.c create mode 100644 src/mesa/drivers/dri/r300/compiler/radeon_variable.h diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 6a000cfe2c6..0561ab9bfa4 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -266,6 +266,7 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi *shadowSamplers |= 1 << dst->U.I.TexSrcUnit; break; } + dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW; } static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src) diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile index 90bd8e86768..5c9f57b4eac 100644 --- a/src/mesa/drivers/dri/r300/compiler/Makefile +++ b/src/mesa/drivers/dri/r300/compiler/Makefile @@ -24,9 +24,11 @@ C_SOURCES = \ radeon_dataflow.c \ radeon_dataflow_deadcode.c \ radeon_dataflow_swizzles.c \ + radeon_list.c \ radeon_optimize.c \ radeon_remove_constants.c \ radeon_rename_regs.c \ + radeon_variable.c \ r3xx_fragprog.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ @@ -49,6 +51,7 @@ INCLUDES = \ -I. \ -I$(TOP)/include \ -I$(TOP)/src/mesa \ + -I$(TOP)/src/glsl \ ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript index d44b745562c..bebb9ebe623 100755 --- a/src/mesa/drivers/dri/r300/compiler/SConscript +++ b/src/mesa/drivers/dri/r300/compiler/SConscript @@ -31,6 +31,8 @@ r300compiler = env.ConvenienceLibrary( 'radeon_dataflow.c', 'radeon_dataflow_deadcode.c', 'radeon_dataflow_swizzles.c', + 'radeon_variable.c', + 'radeon_list.c', 'r3xx_fragprog.c', 'r300_fragprog.c', 'r300_fragprog_swizzle.c', diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 8b73409136f..e6fd1fde62d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -93,7 +93,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r if (src.File == RC_FILE_CONSTANT) { return src.Index | (1 << 5); - } else if (src.File == RC_FILE_TEMPORARY) { + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { use_temporary(code, src.Index); return src.Index & 0x1f; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index cdfda0b3196..ff022a51037 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -149,8 +149,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL}, - {"register allocation", 1, opt, rc_pair_regalloc, NULL}, - {"dumb register allocation", 1, !opt, rc_pair_regalloc_inputs_only, NULL}, + {"register allocation", 1, 1, rc_pair_regalloc, opt}, {"final code validation", 0, 1, rc_validate_final_shader, NULL}, {"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL}, {"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index c7f79bc53c7..5f2588bdfe5 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -207,7 +207,7 @@ static unsigned int use_source(struct r500_fragment_program_code* code, struct r if (src.File == RC_FILE_CONSTANT) { return src.Index | R500_RGB_ADDR0_CONST; - } else if (src.File == RC_FILE_TEMPORARY) { + } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) { use_temporary(code, src.Index); return src.Index; } @@ -407,8 +407,11 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) | R500_TEX_DST_ADDR(inst->DstReg.Index) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + | (GET_SWZ(inst->TexSwizzle, 0) << 24) + | (GET_SWZ(inst->TexSwizzle, 1) << 26) + | (GET_SWZ(inst->TexSwizzle, 2) << 28) + | (GET_SWZ(inst->TexSwizzle, 3) << 30) + ; return 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index 15ec4418cb8..ae61d20fb9b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -124,6 +124,160 @@ unsigned swizzle_mask(unsigned swizzle, unsigned mask) return ret; } +static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info) +{ + if (info->HasTexture) { + return 0; + } + switch (info->Opcode) { + case RC_OPCODE_DP2: + case RC_OPCODE_DP3: + case RC_OPCODE_DP4: + case RC_OPCODE_DDX: + case RC_OPCODE_DDY: + return 0; + default: + return 1; + } +} + +static unsigned int adjust_channels( + unsigned int old_swizzle, + unsigned int conversion_swizzle) +{ + unsigned int i; + unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + for (i = 0; i < 4; i++) { + unsigned int new_chan = get_swz(conversion_swizzle, i); + if (new_chan == RC_SWIZZLE_UNUSED) { + continue; + } + SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i)); + } + return new_swizzle; +} + +static unsigned int rewrite_writemask( + unsigned int old_mask, + unsigned int conversion_swizzle) +{ + unsigned int new_mask = 0; + unsigned int i; + + for (i = 0; i < 4; i++) { + if (!GET_BIT(old_mask, i) + || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) { + continue; + } + new_mask |= (1 << GET_SWZ(conversion_swizzle, i)); + } + + return new_mask; +} + +/** + * This function rewrites the writemask of sub and adjusts the swizzles + * of all its source registers based on the conversion_swizzle. + * conversion_swizzle represents a mapping of the old writemask to the + * new writemask. For a detailed description of how conversion swizzles + * work see rc_rewrite_swizzle(). + */ +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle) +{ + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + unsigned int i; + + sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle); + + if (!srcs_need_rewrite(info)) { + return ; + } + + for (i = 0; i < info->NumSrcRegs; i++) { + sub->Arg[i].Swizzle = + adjust_channels(sub->Arg[i].Swizzle, conversion_swizzle); + } +} + +static void normal_rewrite_writemask_cb( + void * userdata, + struct rc_instruction * inst, + struct rc_src_register * src) +{ + unsigned int * new_mask = (unsigned int *)userdata; + src->Swizzle = adjust_channels(src->Swizzle, *new_mask); +} + +/** + * This function is the same as rc_pair_rewrite_writemask() except it + * operates on normal instructions. + */ +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle) +{ + unsigned int new_mask; + struct rc_sub_instruction * sub = &inst->U.I; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + sub->DstReg.WriteMask = + rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle); + + if (info->HasTexture) { + unsigned int i; + assert(sub->TexSwizzle == RC_SWIZZLE_XYZW); + for (i = 0; i < 4; i++) { + unsigned int swz = GET_SWZ(conversion_swizzle, i); + if (swz > 3) + continue; + SET_SWZ(sub->TexSwizzle, swz, i); + } + } + + if (!srcs_need_rewrite(info)) { + return; + } + + new_mask = sub->DstReg.WriteMask; + rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask); +} + +/** + * This function replaces each value 'swz' in swizzle with the value of + * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's + * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want + * to change all the Y's in swizzle to X, then conversion_swizzle should be + * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then + * conversion swizzle should be YX__ (0xfc1). + * @param swizzle The swizzle to change + * @param conversion_swizzle Describes the conversion to perform on the swizzle + * @return A converted swizzle + */ +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int conversion_swizzle) +{ + unsigned int chan; + unsigned int out_swizzle = swizzle; + + for (chan = 0; chan < 4; chan++) { + unsigned int swz = GET_SWZ(swizzle, chan); + unsigned int new_swz; + if (swz > 3) { + SET_SWZ(out_swizzle, chan, swz); + } else { + new_swz = GET_SWZ(conversion_swizzle, swz); + if (new_swz != RC_SWIZZLE_UNUSED) { + SET_SWZ(out_swizzle, chan, new_swz); + } else { + SET_SWZ(out_swizzle, chan, swz); + } + } + } + return out_swizzle; +} + /** * Left multiplication of a register with a swizzle */ @@ -281,3 +435,105 @@ unsigned int rc_inst_can_use_presub( return 1; } +struct max_data { + unsigned int Max; + unsigned int HasFileType; + rc_register_file File; +}; + +static void max_callback( + void * userdata, + struct rc_instruction * inst, + rc_register_file file, + unsigned int index, + unsigned int mask) +{ + struct max_data * d = (struct max_data*)userdata; + if (file == d->File && (!d->HasFileType || index > d->Max)) { + d->Max = index; + d->HasFileType = 1; + } +} + +/** + * @return The maximum index of the specified register file used by the + * program. + */ +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file) +{ + struct max_data data; + data.Max = 0; + data.HasFileType = 0; + data.File = file; + struct rc_instruction * inst; + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, max_callback, &data); + rc_for_all_writes_mask(inst, max_callback, &data); + } + if (!data.HasFileType) { + return -1; + } else { + return data.Max; + } +} + +static unsigned int get_source_readmask( + struct rc_pair_sub_instruction * sub, + unsigned int source, + unsigned int src_type) +{ + unsigned int i; + unsigned int readmask = 0; + const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode); + + for (i = 0; i < info->NumSrcRegs; i++) { + if (sub->Arg[i].Source != source + || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) { + continue; + } + readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle); + } + return readmask; +} + +/** + * This function attempts to remove a source from a pair instructions. + * @param inst + * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd + * @param source The index of the source to remove + * @param new_readmask A mask representing the components that are read by + * the source that is intended to replace the one you are removing. If you + * want to remove a source only and not replace it, this parameter should be + * zero. + * @return 1 if the source was successfully removed, 0 if it was not + */ +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask) +{ + unsigned int readmask = 0; + + readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type); + readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type); + + if ((new_readmask & readmask) != readmask) + return 0; + + if (src_type & RC_SOURCE_RGB) { + memset(&inst->U.P.RGB.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + if (src_type & RC_SOURCE_ALPHA) { + memset(&inst->U.P.Alpha.Src[source], 0, + sizeof(struct rc_pair_instruction_source)); + } + + return 1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index dd0f6c66156..43ef8772092 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -3,7 +3,10 @@ #ifndef RADEON_PROGRAM_UTIL_H #define RADEON_PROGRAM_UTIL_H +struct radeon_compiler; struct rc_instruction; +struct rc_pair_instruction; +struct rc_pair_sub_instruction; struct rc_src_register; unsigned int rc_swizzle_to_writemask(unsigned int swz); @@ -22,6 +25,18 @@ rc_swizzle rc_mask_to_swizzle(unsigned int mask); unsigned swizzle_mask(unsigned swizzle, unsigned mask); +void rc_pair_rewrite_writemask( + struct rc_pair_sub_instruction * sub, + unsigned int conversion_swizzle); + +void rc_normal_rewrite_writemask( + struct rc_instruction * inst, + unsigned int conversion_swizzle); + +unsigned int rc_rewrite_swizzle( + unsigned int swizzle, + unsigned int new_mask); + struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg); void reset_srcreg(struct rc_src_register* reg); @@ -46,4 +61,14 @@ unsigned int rc_inst_can_use_presub( struct rc_src_register presub_src0, struct rc_src_register presub_src1); +int rc_get_max_index( + struct radeon_compiler * c, + rc_register_file file); + +unsigned int rc_pair_remove_src( + struct rc_instruction * inst, + unsigned int src_type, + unsigned int source, + unsigned int new_readmask); + #endif /* RADEON_PROGRAM_UTIL_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index 3c0ab10aa56..966b7f825f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -151,6 +151,7 @@ static void pair_sub_for_all_args( unsigned int presub_src_count; struct rc_pair_instruction_source * src_array; unsigned int j; +// fprintf(stderr, "Presubtract reader\n"); if (src_type & RC_SOURCE_RGB) { presub_type = fullinst-> U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index; @@ -165,7 +166,9 @@ static void pair_sub_for_all_args( for(j = 0; j < presub_src_count; j++) { cb(userdata, fullinst, &sub->Arg[i], &src_array[j]); +// fprintf(stderr, "Callback for presub %u type=%u\n", j, src_type); } +// fprintf(stderr, "Done presubtract reader\n"); } else { struct rc_pair_instruction_source * src = rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]); @@ -586,6 +589,8 @@ static void get_readers_pair_read_callback( 0 /*Pair Instructions don't use RelAddr*/, src->File, src->Index, arg->Swizzle); +// fprintf(stderr, "Shared mask = %u for [%u].%u writemask=%u abort=%u exit=%u\n", +// shared_mask, src->Index, arg->Swizzle, d->AliveWriteMask,d->ReaderData->Abort, d->ReaderData->ExitOnAbort); if (shared_mask == RC_MASK_NONE) return; @@ -775,6 +780,26 @@ static void get_readers_for_single_write( } } +static void init_get_readers_callback_data( + struct get_readers_callback_data * d, + struct rc_reader_data * reader_data, + struct radeon_compiler * c, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + reader_data->Abort = 0; + reader_data->ReaderCount = 0; + reader_data->ReadersReserved = 0; + reader_data->Readers = NULL; + + d->C = c; + d->ReaderData = reader_data; + d->ReadNormalCB = read_normal_cb; + d->ReadPairCB = read_pair_cb; + d->WriteCB = write_cb; +} + /** * This function will create a list of readers via the rc_reader_data struct. * This function will abort (set the flag data->Abort) and return if it @@ -823,16 +848,28 @@ void rc_get_readers( { struct get_readers_callback_data d; - data->Abort = 0; - data->ReaderCount = 0; - data->ReadersReserved = 0; - data->Readers = NULL; - - d.C = c; - d.ReaderData = data; - d.ReadNormalCB = read_normal_cb; - d.ReadPairCB = read_pair_cb; - d.WriteCB = write_cb; + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); rc_for_all_writes_mask(writer, get_readers_for_single_write, &d); } + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb) +{ + struct get_readers_callback_data d; + + init_get_readers_callback_data(&d, data, c, read_normal_cb, + read_pair_cb, write_cb); + + if (sub_writer->WriteMask) { + get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY, + sub_writer->DestIndex, sub_writer->WriteMask); + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h index 1e30cc69695..6667ae1c4f3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h @@ -37,6 +37,7 @@ struct rc_swizzle_caps; struct rc_src_register; struct rc_pair_instruction_arg; struct rc_pair_instruction_source; +struct rc_pair_sub_instruction; struct rc_compiler; @@ -107,6 +108,15 @@ void rc_get_readers( rc_read_src_fn read_normal_cb, rc_pair_read_arg_fn read_pair_cb, rc_read_write_mask_fn write_cb); + +void rc_get_readers_sub( + struct radeon_compiler * c, + struct rc_instruction * writer, + struct rc_pair_sub_instruction * sub_writer, + struct rc_reader_data * data, + rc_read_src_fn read_normal_cb, + rc_pair_read_arg_fn read_pair_cb, + rc_read_write_mask_fn write_cb); /** * Compiler passes based on dataflow analysis. */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.c b/src/mesa/drivers/dri/r300/compiler/radeon_list.c new file mode 100644 index 00000000000..9b2ba80a8fa --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.c @@ -0,0 +1,88 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_list.h" + +#include +#include + +#include "memory_pool.h" + +struct rc_list * rc_list(struct memory_pool * pool, void * item) +{ + struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list)); + new->Item = item; + new->Next = NULL; + new->Prev = NULL; +} + +void rc_list_add(struct rc_list ** list, struct rc_list * new_value) +{ + struct rc_list * temp; + + if (*list == NULL) { + *list = new_value; + return; + } + + for (temp = *list; temp->Next; temp = temp->Next); + + temp->Next = new_value; + new_value->Prev = temp; +} + +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value) +{ + if (*list == rm_value) { + *list = rm_value->Next; + return; + } + + rm_value->Prev->Next = rm_value->Next; + if (rm_value->Next) { + rm_value->Next->Prev = rm_value->Prev; + } +} + +unsigned int rc_list_count(struct rc_list * list) +{ + unsigned int count = 0; + while (list) { + count++; + list = list->Next; + } + return count; +} + +void rc_list_print(struct rc_list * list) +{ + while(list) { + fprintf(stderr, "%p->", list->Item); + list = list->Next; + } + fprintf(stderr, "\n"); +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_list.h b/src/mesa/drivers/dri/r300/compiler/radeon_list.h new file mode 100644 index 00000000000..b3c8f89cc68 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_list.h @@ -0,0 +1,46 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_LIST_H +#define RADEON_LIST_H + +struct memory_pool; + +struct rc_list { + void * Item; + struct rc_list * Prev; + struct rc_list * Next; +}; + +struct rc_list * rc_list(struct memory_pool * pool, void * item); +void rc_list_add(struct rc_list ** list, struct rc_list * new_value); +void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value); +unsigned int rc_list_count(struct rc_list * list); +void rc_list_print(struct rc_list * list); + +#endif /* RADEON_LIST_H */ + diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index d53181e1f75..52c0216b64b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2009 Nicolai Haehnle. + * Copyright 2011 Tom Stellard * * All Rights Reserved. * @@ -29,125 +30,125 @@ #include +#include "main/glheader.h" +#include "program/register_allocate.h" +#include "ralloc.h" + #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_dataflow.h" - +#include "radeon_list.h" +#include "radeon_variable.h" #define VERBOSE 0 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) -struct live_intervals { - int Start; - int End; - struct live_intervals * Next; -}; struct register_info { - struct live_intervals Live; + struct live_intervals Live[4]; unsigned int Used:1; unsigned int Allocated:1; unsigned int File:3; unsigned int Index:RC_REGISTER_INDEX_BITS; -}; - -struct hardware_register { - struct live_intervals * Used; + unsigned int Writemask; }; struct regalloc_state { struct radeon_compiler * C; - struct register_info Input[RC_REGISTER_MAX_INDEX]; - struct register_info Temporary[RC_REGISTER_MAX_INDEX]; - - struct hardware_register * HwTemporary; - unsigned int NumHwTemporaries; - /** - * If an instruction is inside of a loop, EndLoop will be the - * IP of the ENDLOOP instruction, and BeginLoop will be the IP - * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop - * will be -1. - */ - int EndLoop; - int BeginLoop; + struct register_info * Input; + unsigned int NumInputs; + + struct register_info * Temporary; + unsigned int NumTemporaries; + + unsigned int Simple; + unsigned int HasLoop; +}; + +enum rc_reg_class { + RC_REG_CLASS_SINGLE, + RC_REG_CLASS_DOUBLE, + RC_REG_CLASS_TRIPLE, + RC_REG_CLASS_ALPHA, + RC_REG_CLASS_SINGLE_PLUS_ALPHA, + RC_REG_CLASS_DOUBLE_PLUS_ALPHA, + RC_REG_CLASS_TRIPLE_PLUS_ALPHA, + RC_REG_CLASS_X, + RC_REG_CLASS_Y, + RC_REG_CLASS_Z, + RC_REG_CLASS_XY, + RC_REG_CLASS_YZ, + RC_REG_CLASS_XZ, + RC_REG_CLASS_XW, + RC_REG_CLASS_YW, + RC_REG_CLASS_ZW, + RC_REG_CLASS_XYW, + RC_REG_CLASS_YZW, + RC_REG_CLASS_XZW, + RC_REG_CLASS_COUNT +}; + +struct rc_class { + enum rc_reg_class Class; + + unsigned int WritemaskCount; + + /** This is 1 if this class is being used by the register allocator + * and 0 otherwise */ + unsigned int Used; + + /** This is the ID number assigned to this class by ra. */ + unsigned int Id; + + /** List of writemasks that belong to this class */ + unsigned int Writemasks[3]; + + }; static void print_live_intervals(struct live_intervals * src) { - if (!src) { + if (!src || !src->Used) { DBG("(null)"); return; } - while(src) { - DBG("(%i,%i)", src->Start, src->End); - src = src->Next; - } + DBG("(%i,%i)", src->Start, src->End); } -static void add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) +static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) { - struct live_intervals ** dst_backup = dst; - if (VERBOSE) { - DBG("add_live_intervals: "); - print_live_intervals(*dst); + DBG("overlap_live_intervals: "); + print_live_intervals(a); DBG(" to "); - print_live_intervals(src); + print_live_intervals(b); DBG("\n"); } - while(src) { - if (*dst && (*dst)->End < src->Start) { - dst = &(*dst)->Next; - } else if (!*dst || (*dst)->Start > src->End) { - struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li)); - li->Start = src->Start; - li->End = src->End; - li->Next = *dst; - *dst = li; - src = src->Next; - } else { - if (src->End > (*dst)->End) - (*dst)->End = src->End; - if (src->Start < (*dst)->Start) - (*dst)->Start = src->Start; - src = src->Next; - } - } - - if (VERBOSE) { - DBG(" result: "); - print_live_intervals(*dst_backup); - DBG("\n"); - } -} - -static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src) -{ - if (VERBOSE) { - DBG("overlap_live_intervals: "); - print_live_intervals(dst); - DBG(" to "); - print_live_intervals(src); - DBG("\n"); + if (!a->Used || !b->Used) { + DBG(" unused interval\n"); + return 0; } - while(src && dst) { - if (dst->End <= src->Start) { - dst = dst->Next; - } else if (dst->End <= src->End) { + if (a->Start > b->Start) { + if (a->Start < b->End) { DBG(" overlap\n"); return 1; - } else if (dst->Start < src->End) { + } + } else if (b->Start > a->Start) { + if (b->Start < a->End) { + DBG(" overlap\n"); + return 1; + } + } else { /* a->Start == b->Start */ + if (a->Start != a->End && b->Start != b->End) { DBG(" overlap\n"); return 1; - } else { - src = src->Next; } } @@ -156,92 +157,26 @@ static int overlap_live_intervals(struct live_intervals * dst, struct live_inter return 0; } -static int try_add_live_intervals(struct regalloc_state * s, - struct live_intervals ** dst, struct live_intervals * src) -{ - if (overlap_live_intervals(*dst, src)) - return 0; - - add_live_intervals(s, dst, src); - return 1; -} - -static void scan_callback(void * data, struct rc_instruction * inst, +static void scan_read_callback(void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask) { struct regalloc_state * s = data; struct register_info * reg; + unsigned int i; - if (file == RC_FILE_TEMPORARY) - reg = &s->Temporary[index]; - else if (file == RC_FILE_INPUT) - reg = &s->Input[index]; - else + if (file != RC_FILE_INPUT) return; - if (!reg->Used) { - reg->Used = 1; - if (file == RC_FILE_INPUT) - reg->Live.Start = -1; - else if (s->BeginLoop >= 0) - reg->Live.Start = s->BeginLoop; - else - reg->Live.Start = inst->IP; - reg->Live.End = inst->IP; - } else if (s->EndLoop >= 0) - reg->Live.End = s->EndLoop; - else if (inst->IP > reg->Live.End) - reg->Live.End = inst->IP; -} - -static void compute_live_intervals(struct radeon_compiler *c, - struct regalloc_state *s) -{ - memset(s, 0, sizeof(*s)); - s->C = c; - s->NumHwTemporaries = c->max_temp_regs; - s->BeginLoop = -1; - s->EndLoop = -1; - s->HwTemporary = - memory_pool_malloc(&c->Pool, - s->NumHwTemporaries * sizeof(struct hardware_register)); - memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register)); - - rc_recompute_ips(s->C); - - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - - /* For all instructions inside of a loop, the ENDLOOP - * instruction is used as the end of the live interval and - * the BGNLOOP instruction is used as the beginning. */ - if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) { - int loops = 1; - struct rc_instruction * tmp; - s->BeginLoop = inst->IP; - for(tmp = inst->Next; - tmp != &s->C->Program.Instructions; - tmp = tmp->Next) { - if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { - loops++; - } else if (tmp->U.I.Opcode - == RC_OPCODE_ENDLOOP) { - if(!--loops) { - s->EndLoop = tmp->IP; - break; - } - } - } - } + s->Input[index].Used = 1; + reg = &s->Input[index]; - if (inst->IP == s->EndLoop) { - s->EndLoop = -1; - s->BeginLoop = -1; + for (i = 0; i < 4; i++) { + if (!((mask >> i) & 0x1)) { + continue; } - - rc_for_all_reads_mask(inst, scan_callback, s); - rc_for_all_writes_mask(inst, scan_callback, s); + reg->Live[i].Used = 1; + reg->Live[i].Start = 0; + reg->Live[i].End = inst->IP; } } @@ -251,7 +186,7 @@ static void remap_register(void * data, struct rc_instruction * inst, struct regalloc_state * s = data; const struct register_info * reg; - if (*file == RC_FILE_TEMPORARY) + if (*file == RC_FILE_TEMPORARY && s->Simple) reg = &s->Temporary[*index]; else if (*file == RC_FILE_INPUT) reg = &s->Input[*index]; @@ -259,106 +194,473 @@ static void remap_register(void * data, struct rc_instruction * inst, return; if (reg->Allocated) { - *file = reg->File; *index = reg->Index; } } -static void do_regalloc(struct regalloc_state * s) +static void alloc_input_simple(void * data, unsigned int input, + unsigned int hwreg) { - /* Simple and stupid greedy register allocation */ - for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) { - struct register_info * reg = &s->Temporary[index]; + struct regalloc_state * s = data; - if (!reg->Used) - continue; + if (input >= s->NumInputs) + return; + + s->Input[input].Allocated = 1; + s->Input[input].File = RC_FILE_TEMPORARY; + s->Input[input].Index = hwreg; +} - for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) { - if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) { - reg->Allocated = 1; - reg->File = RC_FILE_TEMPORARY; - reg->Index = hwreg; - goto success; +/* This functions offsets the temporary register indices by the number + * of input registers, because input registers are actually temporaries and + * should not occupy the same space. + * + * This pass is supposed to be used to maintain correct allocation of inputs + * if the standard register allocation is disabled. */ +static void do_regalloc_inputs_only(struct regalloc_state * s) +{ + for (unsigned i = 0; i < s->NumTemporaries; i++) { + s->Temporary[i].Allocated = 1; + s->Temporary[i].File = RC_FILE_TEMPORARY; + s->Temporary[i].Index = i + s->NumInputs; + } +} + +static unsigned int is_derivative(rc_opcode op) +{ + return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); +} + +static enum rc_reg_class variable_get_class( + struct rc_variable * variable, + struct rc_class * classes) +{ + unsigned int i; + unsigned int can_change_writemask= 1; + unsigned int writemask = rc_variable_writemask_sum(variable); + struct rc_list * readers = rc_variable_readers_union(variable); + + if (!variable->C->is_r500) { + unsigned int mask_count = 0; + /* The assumption here is that if an instruction has type + * RC_INSTRUCTION_NORMAL then it is a TEX instruction. + * r300 and r400 can't swizzle the result of a TEX lookup. */ + if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = RC_MASK_XYZW; + } + for (i = 0; i < 4; i++) { + if (GET_BIT(writemask, i)) { + mask_count++; } } + /* XXX We should do swizzle packing for r300 and r400 here. + * We need to figure out how not to create non-native + * swizzles. */ + if (mask_count > 1) { + can_change_writemask = 0; + } + } - rc_error(s->C, "Ran out of hardware temporaries\n"); - return; - - success:; + if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { + /* DDX/DDY seem to always fail when their writemasks are + * changed.*/ + if (is_derivative(variable->Inst->U.P.RGB.Opcode) + || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + } } + for ( ; readers; readers = readers->Next) { + struct rc_reader * r = readers->Item; + if (r->Inst->Type == RC_INSTRUCTION_PAIR) { + if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { + can_change_writemask = 0; + break; + } + /* DDX/DDY also fail when their swizzles are changed. */ + if (is_derivative(r->Inst->U.P.RGB.Opcode) + || is_derivative(r->Inst->U.P.Alpha.Opcode)) { + can_change_writemask = 0; + break; + } + } + } + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + unsigned int j; + if (!can_change_writemask && classes[i].WritemaskCount > 1) { + continue; + } + for (j = 0; j < 3; j++) { + if (classes[i].Writemasks[j] == writemask) { + return classes[i].Class; + } + } + } + rc_error(variable->C, "Could not find class for index=%u mask=%u\n", + variable->Dst.Index, writemask); + return 0; +} - /* Rewrite all instructions based on the translation table we built */ - for(struct rc_instruction * inst = s->C->Program.Instructions.Next; - inst != &s->C->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, s); +static unsigned int overlap_live_intervals_array( + struct live_intervals * a, + struct live_intervals * b) +{ + unsigned int a_chan, b_chan; + for (a_chan = 0; a_chan < 4; a_chan++) { + for (b_chan = 0; b_chan < 4; b_chan++) { + if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { + return 1; + } + } } + return 0; } -static void alloc_input(void * data, unsigned int input, unsigned int hwreg) +static unsigned int reg_get_index(int reg) { - struct regalloc_state * s = data; + return reg / RC_MASK_XYZW; +} - if (!s->Input[input].Used) - return; +static unsigned int reg_get_writemask(int reg) +{ + return (reg % RC_MASK_XYZW) + 1; +} - add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live); +static int get_reg_id(unsigned int index, unsigned int writemask) +{ + assert(writemask); + if (writemask == 0) { + return 0; + } + return (index * RC_MASK_XYZW) + (writemask - 1); +} - s->Input[input].Allocated = 1; - s->Input[input].File = RC_FILE_TEMPORARY; - s->Input[input].Index = hwreg; +#if VERBOSE +static void print_reg(int reg) +{ + unsigned int index = reg_get_index(reg); + unsigned int mask = reg_get_writemask(reg); + fprintf(stderr, "Temp[%u].%c%c%c%c", index, + mask & RC_MASK_X ? 'x' : '_', + mask & RC_MASK_Y ? 'y' : '_', + mask & RC_MASK_Z ? 'z' : '_', + mask & RC_MASK_W ? 'w' : '_'); +} +#endif +static void add_register_conflicts( + struct ra_regs * regs, + unsigned int max_temp_regs) +{ + unsigned int index, a_mask, b_mask; + for (index = 0; index < max_temp_regs; index++) { + for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { + for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; + b_mask++) { + if (a_mask & b_mask) { + ra_add_reg_conflict(regs, + get_reg_id(index, a_mask), + get_reg_id(index, b_mask)); + } + } + } + } } -void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +static void do_advanced_regalloc(struct regalloc_state * s) { - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; + struct rc_class rc_class_list [] = { + {RC_REG_CLASS_SINGLE, 3, 0, 0, + {RC_MASK_X, + RC_MASK_Y, + RC_MASK_Z}}, + {RC_REG_CLASS_DOUBLE, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_X | RC_MASK_Z, + RC_MASK_Y | RC_MASK_Z}}, + {RC_REG_CLASS_TRIPLE, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ALPHA, 1, 0, 0, + {RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_Y | RC_MASK_W, + RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, + {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_X, 1, 0, 0, + {RC_MASK_X, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Y, 1, 0, 0, + {RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_Z, 1, 0, 0, + {RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XY, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZ, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZ, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XW, 1, 0, 0, + {RC_MASK_X | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_ZW, 1, 0, 0, + {RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XYW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Y | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_YZW, 1, 0, 0, + {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}}, + {RC_REG_CLASS_XZW, 1, 0, 0, + {RC_MASK_X | RC_MASK_Z | RC_MASK_W, + RC_MASK_NONE, + RC_MASK_NONE}} + }; + + unsigned int i, j, index, input_node, node_count, node_index; + unsigned int * node_classes; + unsigned int * input_classes; + struct rc_instruction * inst; + struct rc_list * var_ptr; + struct rc_list * variables; + struct ra_regs * regs; + struct ra_graph * graph; + + /* Allocate the main ra data structure */ + regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW); + + /* Get list of program variables */ + variables = rc_get_variables(s->C); + node_count = rc_list_count(variables); + node_classes = memory_pool_malloc(&s->C->Pool, + node_count * sizeof(unsigned int)); + input_classes = memory_pool_malloc(&s->C->Pool, + s->NumInputs * sizeof(unsigned int)); + + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + unsigned int class_index; + /* Compute the live intervals */ + rc_variable_compute_live_intervals(var_ptr->Item); + + class_index = variable_get_class(var_ptr->Item, rc_class_list); + + /* If we haven't used this register class yet, mark it + * as used and allocate space for it. */ + if (!rc_class_list[class_index].Used) { + rc_class_list[class_index].Used = 1; + rc_class_list[class_index].Id = ra_alloc_reg_class(regs); + } - compute_live_intervals(cc, &s); + node_classes[node_index] = rc_class_list[class_index].Id; + } - c->AllocateHwInputs(c, &alloc_input, &s); - do_regalloc(&s); -} + /* Assign registers to the classes */ + for (i = 0; i < RC_REG_CLASS_COUNT; i++) { + struct rc_class class = rc_class_list[i]; + if (!class.Used) { + continue; + } -/* This functions offsets the temporary register indices by the number - * of input registers, because input registers are actually temporaries and - * should not occupy the same space. - * - * This pass is supposed to be used to maintain correct allocation of inputs - * if the standard register allocation is disabled. */ -void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user) -{ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; - struct regalloc_state s; - int temp_reg_offset; + for (index = 0; index < s->C->max_temp_regs; index++) { + for (j = 0; j < class.WritemaskCount; j++) { + int reg_id = get_reg_id(index, + class.Writemasks[j]); + ra_class_add_reg(regs, class.Id, reg_id); + } + } + } + + /* Add register conflicts */ + add_register_conflicts(regs, s->C->max_temp_regs); + + /* Calculate live intervals for input registers */ + for (inst = s->C->Program.Instructions.Next; + inst != &s->C->Program.Instructions; + inst = inst->Next) { + rc_for_all_reads_mask(inst, scan_read_callback, s); + } + + /* Create classes for input registers */ + for (i = 0; i < s->NumInputs; i++) { + unsigned int chan, class_id, writemask = 0; + for (chan = 0; chan < 4; chan++) { + if (s->Input[i].Live[chan].Used) { + writemask |= (1 << chan); + } + } + s->Input[i].Writemask = writemask; + if (!writemask) { + continue; + } + + class_id = ra_alloc_reg_class(regs); + input_classes[i] = class_id; + ra_class_add_reg(regs, class_id, + get_reg_id(s->Input[i].Index, writemask)); + } + + ra_set_finalize(regs); + + graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); - compute_live_intervals(cc, &s); + /* Build the interference graph */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next,node_index++) { + struct rc_list * a, * b; + unsigned int b_index; - c->AllocateHwInputs(c, &alloc_input, &s); + ra_set_node_class(graph, node_index, node_classes[node_index]); - temp_reg_offset = 0; - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index) - temp_reg_offset = s.Input[i].Index + 1; + for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; + b; b = b->Next, b_index++) { + struct rc_variable * var_a = a->Item; + while (var_a) { + struct rc_variable * var_b = b->Item; + while (var_b) { + if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { + ra_add_node_interference(graph, + node_index, b_index); + } + var_b = var_b->Friend; + } + var_a = var_a->Friend; + } + } } - if (temp_reg_offset) { - for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) { - if (s.Temporary[i].Used) { - s.Temporary[i].Allocated = 1; - s.Temporary[i].File = RC_FILE_TEMPORARY; - s.Temporary[i].Index = i + temp_reg_offset; + /* Add input registers to the interference graph */ + for (i = 0, input_node = 0; i< s->NumInputs; i++) { + if (!s->Input[i].Writemask) { + continue; + } + ra_set_node_class(graph, node_count + input_node, + input_classes[i]); + for (var_ptr = variables, node_index = 0; + var_ptr; var_ptr = var_ptr->Next, node_index++) { + struct rc_variable * var = var_ptr->Item; + if (overlap_live_intervals_array(s->Input[i].Live, + var->Live)) { + ra_add_node_interference(graph, node_index, + node_count + input_node); } } + /* Manually allocate a register for this input */ + ra_set_node_reg(graph, node_count + input_node, get_reg_id( + s->Input[i].Index, s->Input[i].Writemask)); + input_node++; + } + + if (!ra_allocate_no_spills(graph)) { + rc_error(s->C, "Ran out of hardware temporaries\n"); + return; + } + + /* Rewrite the registers */ + for (var_ptr = variables, node_index = 0; var_ptr; + var_ptr = var_ptr->Next, node_index++) { + int reg = ra_get_node_reg(graph, node_index); + unsigned int writemask = reg_get_writemask(reg); + unsigned int index = reg_get_index(reg); + struct rc_variable * var = var_ptr->Item; + + if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { + writemask = rc_variable_writemask_sum(var); + } + + if (var->Dst.File == RC_FILE_INPUT) { + continue; + } + rc_variable_change_dst(var, index, writemask); + } + + ralloc_free(graph); + ralloc_free(regs); +} + +/** + * @param user This parameter should be a pointer to an integer value. If this + * integer value is zero, then a simple register allocator will be used that + * only allocates space for input registers (\sa do_regalloc_inputs_only). If + * user is non-zero, then the regular register allocator will be used + * (\sa do_regalloc). + */ +void rc_pair_regalloc(struct radeon_compiler *cc, void *user) +{ + struct r300_fragment_program_compiler *c = + (struct r300_fragment_program_compiler*)cc; + struct regalloc_state s; + int do_full_regalloc = (int)user; + struct rc_instruction * inst; + + memset(&s, 0, sizeof(s)); + s.C = cc; + s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; + s.Input = memory_pool_malloc(&cc->Pool, + s.NumInputs * sizeof(struct register_info)); + memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); + + s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; + s.Temporary = memory_pool_malloc(&cc->Pool, + s.NumTemporaries * sizeof(struct register_info)); + memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); + + for(inst = cc->Program.Instructions.Next; + inst != &cc->Program.Instructions; + inst = inst->Next) { - /* Rewrite all registers. */ - for (struct rc_instruction *inst = cc->Program.Instructions.Next; - inst != &cc->Program.Instructions; - inst = inst->Next) { - rc_remap_registers(inst, &remap_register, &s); + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + s.HasLoop = 1; + break; } } + + rc_recompute_ips(s.C); + + c->AllocateHwInputs(c, &alloc_input_simple, &s); + if (!s.HasLoop && do_full_regalloc) { + do_advanced_regalloc(&s); + } else { + s.Simple = 1; + do_regalloc_inputs_only(&s); + } + + /* Rewrite inputs and if we are doing the simple allocation, rewrite + * temporaries too. */ + for (struct rc_instruction *inst = s.C->Program.Instructions.Next; + inst != &s.C->Program.Instructions; + inst = inst->Next) { + rc_remap_registers(inst, &remap_register, &s); + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index a07f6b63c6e..b899eccbf53 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -108,6 +108,9 @@ struct rc_sub_instruction { /** True if tex instruction should do shadow comparison */ unsigned int TexShadow:1; + + /**R500 Only. How to swizzle the result of a TEX lookup*/ + unsigned int TexSwizzle:12; /*@}*/ /** This holds information about the presubtract operation used by diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h index 45f79ece5ba..24577333450 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h @@ -129,6 +129,7 @@ typedef enum { #define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO) #define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE) #define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF) +#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED) /** * \name Bitmasks for components of vectors. diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 68874795b8a..52315957520 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -223,3 +223,17 @@ struct rc_pair_instruction_source * rc_pair_get_src( return NULL; } } + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src) +{ + int i; + for (i = 0; i < 3; i++) { + if (&pair_inst->RGB.Src[i] == src + || &pair_inst->Alpha.Src[i] == src) { + return i; + } + } + return -1; +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index 82d7815aa78..a957ea9f7a0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -114,6 +114,10 @@ void rc_pair_foreach_source_that_rgb_reads( struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg); + +int rc_pair_get_src_index( + struct rc_pair_instruction * pair_inst, + struct rc_pair_instruction_source * src); /*@}*/ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.c b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c new file mode 100644 index 00000000000..91a4d45dc0d --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.c @@ -0,0 +1,434 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_variable.h" + +#include "memory_pool.h" +#include "radeon_compiler_util.h" +#include "radeon_dataflow.h" +#include "radeon_list.h" +#include "radeon_opcodes.h" +#include "radeon_program.h" + +/** + * Rewrite the index and writemask for the destination register of var + * and its friends to new_index and new_writemask. This function also takes + * care of rewriting the swizzles for the sources of var. + */ +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask) +{ + unsigned int new_idx, old_idx; + unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0); + struct rc_variable * var_ptr; + struct rc_list * readers; + unsigned int old_mask = rc_variable_writemask_sum(var); + + new_idx = 0; + for (old_idx = 0; old_idx < 4; old_idx++) { + if (!GET_BIT(old_mask, old_idx)) + continue; + for ( ; new_idx < 4; new_idx++) { + if (GET_BIT(new_writemask, new_idx)) { + SET_SWZ(conversion_swizzle, old_idx, new_idx); + new_idx++; + break; + } + } + } + + for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) { + if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { + rc_normal_rewrite_writemask(var_ptr->Inst, + conversion_swizzle); + var_ptr->Inst->U.I.DstReg.Index = new_index; + } else { + struct rc_pair_sub_instruction * sub; + if (var_ptr->Dst.WriteMask == RC_MASK_W) { + assert(new_writemask & RC_MASK_W); + sub = &var_ptr->Inst->U.P.Alpha; + } else { + sub = &var_ptr->Inst->U.P.RGB; + rc_pair_rewrite_writemask(sub, + conversion_swizzle); + } + sub->DestIndex = new_index; + } + } + + readers = rc_variable_readers_union(var); + + for ( ; readers; readers = readers->Next) { + struct rc_reader * reader = readers->Item; + if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) { + reader->U.I.Src->Index = new_index; + reader->U.I.Src->Swizzle = rc_rewrite_swizzle( + reader->U.I.Src->Swizzle, conversion_swizzle); + } else { + struct rc_pair_instruction * pair_inst = + &reader->Inst->U.P; + unsigned int src_type = rc_source_type_swz( + reader->U.P.Arg->Swizzle); + + int src_index = reader->U.P.Arg->Source; + if (src_index == RC_PAIR_PRESUB_SRC) { + src_index = rc_pair_get_src_index( + pair_inst, reader->U.P.Src); + } + /* Try to delete the old src, it is OK if this fails, + * because rc_pair_alloc_source might be able to + * find a source the ca be reused. + */ + if (rc_pair_remove_src(reader->Inst, src_type, + src_index, old_mask)) { + /* Reuse the source index of the source that + * was just deleted and set its register + * index. We can't use rc_pair_alloc_source + * for this becuase it might return a source + * index that is already being used. */ + if (src_type & RC_SOURCE_RGB) { + pair_inst->RGB.Src[src_index] + .Used = 1; + pair_inst->RGB.Src[src_index] + .Index = new_index; + pair_inst->RGB.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + if (src_type & RC_SOURCE_ALPHA) { + pair_inst->Alpha.Src[src_index] + .Used = 1; + pair_inst->Alpha.Src[src_index] + .Index = new_index; + pair_inst->Alpha.Src[src_index] + .File = RC_FILE_TEMPORARY; + } + } else { + src_index = rc_pair_alloc_source( + &reader->Inst->U.P, + src_type & RC_SOURCE_RGB, + src_type & RC_SOURCE_ALPHA, + RC_FILE_TEMPORARY, + new_index); + if (src_index < 0) { + rc_error(var->C, "Rewrite of inst %u failed " + "Can't allocate source for " + "Inst %u src_type=%x " + "new_index=%u new_mask=%u\n", + var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask); + continue; + } + } + reader->U.P.Arg->Swizzle = rc_rewrite_swizzle( + reader->U.P.Arg->Swizzle, conversion_swizzle); + if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) { + reader->U.P.Arg->Source = src_index; + } + } + } +} + +/** + * Compute the live intervals for var and its friends. + */ +void rc_variable_compute_live_intervals(struct rc_variable * var) +{ + while(var) { + unsigned int i; + unsigned int start = var->Inst->IP; + + for (i = 0; i < var->ReaderCount; i++) { + unsigned int chan; + unsigned int mask = var->Readers[i].WriteMask; + for (chan = 0; chan < 4; chan++) { + if ((mask >> chan) & 0x1) { + var->Live[chan].Start = start; + var->Live[chan].End = + var->Readers[i].Inst->IP; + var->Live[chan].Used = 1; + } + } + } + var = var->Friend; + } +} + +/** + * @return 1 if a and b share a reader + * @return 0 if they do not + */ +static unsigned int readers_intersect( + struct rc_variable * a, + struct rc_variable * b) +{ + unsigned int a_index, b_index; + for (a_index = 0; a_index < a->ReaderCount; a_index++) { + struct rc_reader reader_a = a->Readers[a_index]; + for (b_index = 0; b_index < b->ReaderCount; b_index++) { + struct rc_reader reader_b = b->Readers[b_index]; + if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL + && reader_a.U.I.Src == reader_b.U.I.Src) { + + return 1; + } + + if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR + && reader_b.Inst->Type == RC_INSTRUCTION_PAIR + && reader_a.U.P.Arg == reader_b.U.P.Arg) { + + return 1; + } + } + } + return 0; +} + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend) +{ + while(var->Friend) { + var = var->Friend; + } + var->Friend = friend; +} + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data) +{ + struct rc_variable * new = + memory_pool_malloc(&c->Pool, sizeof(struct rc_variable)); + memset(new, 0, sizeof(struct rc_variable)); + new->C = c; + new->Dst.File = DstFile; + new->Dst.Index = DstIndex; + new->Dst.WriteMask = DstWriteMask; + if (reader_data) { + new->Inst = reader_data->Writer; + new->ReaderCount = reader_data->ReaderCount; + new->Readers = reader_data->Readers; + } + return new; +} + +static void get_variable_helper( + struct rc_list ** aborted_list, + struct rc_list ** variable_list, + unsigned int aborted, + struct rc_variable * variable) +{ + if (aborted) { + rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable)); + } else { + rc_list_add(variable_list, rc_list(&variable->C->Pool, variable)); + } +} + +static void get_variable_pair_helper( + struct rc_list ** aborted_list, + struct rc_list ** variable_list, + struct radeon_compiler * c, + struct rc_instruction * inst, + struct rc_pair_sub_instruction * sub_inst) +{ + struct rc_reader_data reader_data; + struct rc_variable * new_var; + rc_register_file file; + unsigned int writemask; + + if (sub_inst->Opcode == RC_OPCODE_NOP) { + return; + } + memset(&reader_data, 0, sizeof(struct rc_reader_data)); + rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL); + + if (reader_data.ReaderCount == 0) { + return; + } + + if (sub_inst->WriteMask) { + file = RC_FILE_TEMPORARY; + writemask = sub_inst->WriteMask; + } else if (sub_inst->OutputWriteMask) { + file = RC_FILE_OUTPUT; + writemask = sub_inst->OutputWriteMask; + } else { + writemask = 0; + file = RC_FILE_NONE; + } + new_var = rc_variable(c, file, sub_inst->DestIndex, writemask, + &reader_data); + get_variable_helper(aborted_list, variable_list, reader_data.Abort, + new_var); +} + +/** + * Generate a list of variables used by the shader program. Each instruction + * that writes to a register is considered a variable. The struct rc_variable + * data structure includes a list of readers and is essentially a + * definition-use chain. Any two variables that share a reader are considered + * "friends" and they are linked together via the Friend attribute. + */ +struct rc_list * rc_get_variables(struct radeon_compiler * c) +{ + struct rc_instruction * inst; + struct rc_list * aborted_list = NULL; + struct rc_list * variable_list = NULL; + struct rc_list * var_ptr; + struct rc_list * search_ptr; + + for (inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + struct rc_reader_data reader_data; + struct rc_variable * new_var; + memset(&reader_data, 0, sizeof(reader_data)); + + if (inst->Type == RC_INSTRUCTION_NORMAL) { + rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL); + if (reader_data.ReaderCount == 0) { + continue; + } + new_var = rc_variable(c, inst->U.I.DstReg.File, + inst->U.I.DstReg.Index, + inst->U.I.DstReg.WriteMask, &reader_data); + get_variable_helper(&aborted_list, &variable_list, + reader_data.Abort, new_var); + } else { + get_variable_pair_helper(&aborted_list, &variable_list, + c, inst, &inst->U.P.RGB); + get_variable_pair_helper(&aborted_list, &variable_list, + c, inst, &inst->U.P.Alpha); + } + } + + /* The aborted_list contains a list of variables that might share a + * reader with another variable. We need to search through this list + * and pair together variables that do share the same reader. + */ + while (aborted_list) { + struct rc_list * search_ptr_next; + var_ptr = aborted_list; + + search_ptr = var_ptr->Next; + while(search_ptr) { + search_ptr_next = search_ptr->Next; + if (readers_intersect(var_ptr->Item, search_ptr->Item)){ + rc_list_remove(&aborted_list, search_ptr); + rc_variable_add_friend(var_ptr->Item, + search_ptr->Item); + } + search_ptr = search_ptr_next; + } + rc_list_remove(&aborted_list, var_ptr); + rc_list_add(&variable_list, rc_list( + &((struct rc_variable*)(var_ptr->Item))->C->Pool, + var_ptr->Item)); + } + return variable_list; +} + +/** + * @return The bitwise or of the writemasks of a variable and all of its + * friends. + */ +unsigned int rc_variable_writemask_sum(struct rc_variable * var) +{ + unsigned int writemask = 0; + while(var) { + writemask |= var->Dst.WriteMask; + var = var->Friend; + } + return writemask; +} + +/* + * @return A list of readers for a variable and its friends. Readers + * that read from two different variable friends are only included once in + * this list. + */ +struct rc_list * rc_variable_readers_union(struct rc_variable * var) +{ + struct rc_list * list = NULL; + while (var) { + unsigned int i; + for (i = 0; i < var->ReaderCount; i++) { + struct rc_list * temp; + struct rc_reader * a = &var->Readers[i]; + unsigned int match = 0; + for (temp = list; temp; temp = temp->Next) { + struct rc_reader * b = temp->Item; + if (a->Inst->Type != b->Inst->Type) { + continue; + } + if (a->Inst->Type == RC_INSTRUCTION_NORMAL) { + if (a->U.I.Src == b->U.I.Src) { + match = 1; + break; + } + } + if (a->Inst->Type == RC_INSTRUCTION_PAIR) { + if (a->U.P.Arg == b->U.P.Arg + && a->U.P.Src == b->U.P.Src) { + match = 1; + break; + } + } + } + if (match) { + continue; + } + rc_list_add(&list, rc_list(&var->C->Pool, a)); + } + var = var->Friend; + } + return list; +} + +void rc_variable_print(struct rc_variable * var) +{ + unsigned int i; + while (var) { + fprintf(stderr, "%u: TEMP[%u].%u: ", + var->Inst->IP, var->Dst.Index, var->Dst.WriteMask); + for (i = 0; i < 4; i++) { + fprintf(stderr, "chan %u: start=%u end=%u ", i, + var->Live[i].Start, var->Live[i].End); + } + fprintf(stderr, "%u readers\n", var->ReaderCount); + if (var->Friend) { + fprintf(stderr, "Friend: \n\t"); + } + var = var->Friend; + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_variable.h b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h new file mode 100644 index 00000000000..b8fbcaa4029 --- /dev/null +++ b/src/mesa/drivers/dri/r300/compiler/radeon_variable.h @@ -0,0 +1,84 @@ +/* + * Copyright 2011 Tom Stellard + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_VARIABLE_H +#define RADEON_VARIABLE_H + +#include "radeon_compiler.h" + +struct radeon_compiler; +struct rc_list; +struct rc_reader_data; +struct rc_readers; + +struct live_intervals { + int Start; + int End; + int Used; +}; + +struct rc_variable { + struct radeon_compiler * C; + struct rc_dst_register Dst; + + struct rc_instruction * Inst; + unsigned int ReaderCount; + struct rc_reader * Readers; + struct live_intervals Live[4]; + + /* A friend is a variable that shares a reader with another variable. + */ + struct rc_variable * Friend; +}; + +void rc_variable_change_dst( + struct rc_variable * var, + unsigned int new_index, + unsigned int new_writemask); + +void rc_variable_compute_live_intervals(struct rc_variable * var); + +void rc_variable_add_friend( + struct rc_variable * var, + struct rc_variable * friend); + +struct rc_variable * rc_variable( + struct radeon_compiler * c, + unsigned int DstFile, + unsigned int DstIndex, + unsigned int DstWriteMask, + struct rc_reader_data * reader_data); + +struct rc_list * rc_get_variables(struct radeon_compiler * c); + +unsigned int rc_variable_writemask_sum(struct rc_variable * var); + +struct rc_list * rc_variable_readers_union(struct rc_variable * var); + +void rc_variable_print(struct rc_variable * var); + +#endif /* RADEON_VARIABLE_H */ -- 2.30.2