The new allocator uses ra and does swizzle packing.
Also, a data structure (struct rc_variable) and associated functions have
been added for generating UD and DU chains.
*shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
break;
}
+ dst->U.I.TexSwizzle = RC_SWIZZLE_XYZW;
}
static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
radeon_dataflow.c \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
+ radeon_list.c \
radeon_optimize.c \
radeon_remove_constants.c \
radeon_rename_regs.c \
+ radeon_variable.c \
r3xx_fragprog.c \
r300_fragprog.c \
r300_fragprog_swizzle.c \
-I. \
-I$(TOP)/include \
-I$(TOP)/src/mesa \
+ -I$(TOP)/src/glsl \
##### TARGETS #####
'radeon_dataflow.c',
'radeon_dataflow_deadcode.c',
'radeon_dataflow_swizzles.c',
+ 'radeon_variable.c',
+ 'radeon_list.c',
'r3xx_fragprog.c',
'r300_fragprog.c',
'r300_fragprog_swizzle.c',
if (src.File == RC_FILE_CONSTANT) {
return src.Index | (1 << 5);
- } else if (src.File == RC_FILE_TEMPORARY) {
+ } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index & 0x1f;
}
{"pair translate", 1, 1, rc_pair_translate, NULL},
{"pair scheduling", 1, 1, rc_pair_schedule, NULL},
{"dead sources", 1, 1, rc_pair_remove_dead_sources, NULL},
- {"register allocation", 1, opt, rc_pair_regalloc, NULL},
- {"dumb register allocation", 1, !opt, rc_pair_regalloc_inputs_only, NULL},
+ {"register allocation", 1, 1, rc_pair_regalloc, opt},
{"final code validation", 0, 1, rc_validate_final_shader, NULL},
{"machine code generation", 0, is_r500, r500BuildFragmentProgramHwCode, NULL},
{"machine code generation", 0, !is_r500, r300BuildFragmentProgramHwCode, NULL},
if (src.File == RC_FILE_CONSTANT) {
return src.Index | R500_RGB_ADDR0_CONST;
- } else if (src.File == RC_FILE_TEMPORARY) {
+ } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
use_temporary(code, src.Index);
return src.Index;
}
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
| R500_TEX_DST_ADDR(inst->DstReg.Index)
- | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
- | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+ | (GET_SWZ(inst->TexSwizzle, 0) << 24)
+ | (GET_SWZ(inst->TexSwizzle, 1) << 26)
+ | (GET_SWZ(inst->TexSwizzle, 2) << 28)
+ | (GET_SWZ(inst->TexSwizzle, 3) << 30)
+ ;
return 1;
}
return ret;
}
+static unsigned int srcs_need_rewrite(const struct rc_opcode_info * info)
+{
+ if (info->HasTexture) {
+ return 0;
+ }
+ switch (info->Opcode) {
+ case RC_OPCODE_DP2:
+ case RC_OPCODE_DP3:
+ case RC_OPCODE_DP4:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static unsigned int adjust_channels(
+ unsigned int old_swizzle,
+ unsigned int conversion_swizzle)
+{
+ unsigned int i;
+ unsigned int new_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ for (i = 0; i < 4; i++) {
+ unsigned int new_chan = get_swz(conversion_swizzle, i);
+ if (new_chan == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ SET_SWZ(new_swizzle, new_chan, GET_SWZ(old_swizzle, i));
+ }
+ return new_swizzle;
+}
+
+static unsigned int rewrite_writemask(
+ unsigned int old_mask,
+ unsigned int conversion_swizzle)
+{
+ unsigned int new_mask = 0;
+ unsigned int i;
+
+ for (i = 0; i < 4; i++) {
+ if (!GET_BIT(old_mask, i)
+ || GET_SWZ(conversion_swizzle, i) == RC_SWIZZLE_UNUSED) {
+ continue;
+ }
+ new_mask |= (1 << GET_SWZ(conversion_swizzle, i));
+ }
+
+ return new_mask;
+}
+
+/**
+ * This function rewrites the writemask of sub and adjusts the swizzles
+ * of all its source registers based on the conversion_swizzle.
+ * conversion_swizzle represents a mapping of the old writemask to the
+ * new writemask. For a detailed description of how conversion swizzles
+ * work see rc_rewrite_swizzle().
+ */
+void rc_pair_rewrite_writemask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int conversion_swizzle)
+{
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+ unsigned int i;
+
+ sub->WriteMask = rewrite_writemask(sub->WriteMask, conversion_swizzle);
+
+ if (!srcs_need_rewrite(info)) {
+ return ;
+ }
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ sub->Arg[i].Swizzle =
+ adjust_channels(sub->Arg[i].Swizzle, conversion_swizzle);
+ }
+}
+
+static void normal_rewrite_writemask_cb(
+ void * userdata,
+ struct rc_instruction * inst,
+ struct rc_src_register * src)
+{
+ unsigned int * new_mask = (unsigned int *)userdata;
+ src->Swizzle = adjust_channels(src->Swizzle, *new_mask);
+}
+
+/**
+ * This function is the same as rc_pair_rewrite_writemask() except it
+ * operates on normal instructions.
+ */
+void rc_normal_rewrite_writemask(
+ struct rc_instruction * inst,
+ unsigned int conversion_swizzle)
+{
+ unsigned int new_mask;
+ struct rc_sub_instruction * sub = &inst->U.I;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+ sub->DstReg.WriteMask =
+ rewrite_writemask(sub->DstReg.WriteMask, conversion_swizzle);
+
+ if (info->HasTexture) {
+ unsigned int i;
+ assert(sub->TexSwizzle == RC_SWIZZLE_XYZW);
+ for (i = 0; i < 4; i++) {
+ unsigned int swz = GET_SWZ(conversion_swizzle, i);
+ if (swz > 3)
+ continue;
+ SET_SWZ(sub->TexSwizzle, swz, i);
+ }
+ }
+
+ if (!srcs_need_rewrite(info)) {
+ return;
+ }
+
+ new_mask = sub->DstReg.WriteMask;
+ rc_for_all_reads_src(inst, normal_rewrite_writemask_cb, &new_mask);
+}
+
+/**
+ * This function replaces each value 'swz' in swizzle with the value of
+ * GET_SWZ(conversion_swizzle, swz). So, if you want to change all the X's
+ * in swizzle to Y, then conversion_swizzle should be Y___ (0xff9). If you want
+ * to change all the Y's in swizzle to X, then conversion_swizzle should be
+ * _X__ (0xfc7). If you want to change the Y's to X and the X's to Y, then
+ * conversion swizzle should be YX__ (0xfc1).
+ * @param swizzle The swizzle to change
+ * @param conversion_swizzle Describes the conversion to perform on the swizzle
+ * @return A converted swizzle
+ */
+unsigned int rc_rewrite_swizzle(
+ unsigned int swizzle,
+ unsigned int conversion_swizzle)
+{
+ unsigned int chan;
+ unsigned int out_swizzle = swizzle;
+
+ for (chan = 0; chan < 4; chan++) {
+ unsigned int swz = GET_SWZ(swizzle, chan);
+ unsigned int new_swz;
+ if (swz > 3) {
+ SET_SWZ(out_swizzle, chan, swz);
+ } else {
+ new_swz = GET_SWZ(conversion_swizzle, swz);
+ if (new_swz != RC_SWIZZLE_UNUSED) {
+ SET_SWZ(out_swizzle, chan, new_swz);
+ } else {
+ SET_SWZ(out_swizzle, chan, swz);
+ }
+ }
+ }
+ return out_swizzle;
+}
+
/**
* Left multiplication of a register with a swizzle
*/
return 1;
}
+struct max_data {
+ unsigned int Max;
+ unsigned int HasFileType;
+ rc_register_file File;
+};
+
+static void max_callback(
+ void * userdata,
+ struct rc_instruction * inst,
+ rc_register_file file,
+ unsigned int index,
+ unsigned int mask)
+{
+ struct max_data * d = (struct max_data*)userdata;
+ if (file == d->File && (!d->HasFileType || index > d->Max)) {
+ d->Max = index;
+ d->HasFileType = 1;
+ }
+}
+
+/**
+ * @return The maximum index of the specified register file used by the
+ * program.
+ */
+int rc_get_max_index(
+ struct radeon_compiler * c,
+ rc_register_file file)
+{
+ struct max_data data;
+ data.Max = 0;
+ data.HasFileType = 0;
+ data.File = file;
+ struct rc_instruction * inst;
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, max_callback, &data);
+ rc_for_all_writes_mask(inst, max_callback, &data);
+ }
+ if (!data.HasFileType) {
+ return -1;
+ } else {
+ return data.Max;
+ }
+}
+
+static unsigned int get_source_readmask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int source,
+ unsigned int src_type)
+{
+ unsigned int i;
+ unsigned int readmask = 0;
+ const struct rc_opcode_info * info = rc_get_opcode_info(sub->Opcode);
+
+ for (i = 0; i < info->NumSrcRegs; i++) {
+ if (sub->Arg[i].Source != source
+ || src_type != rc_source_type_swz(sub->Arg[i].Swizzle)) {
+ continue;
+ }
+ readmask |= rc_swizzle_to_writemask(sub->Arg[i].Swizzle);
+ }
+ return readmask;
+}
+
+/**
+ * This function attempts to remove a source from a pair instructions.
+ * @param inst
+ * @param src_type RC_SOURCE_RGB, RC_SOURCE_ALPHA, or both bitwise or'd
+ * @param source The index of the source to remove
+ * @param new_readmask A mask representing the components that are read by
+ * the source that is intended to replace the one you are removing. If you
+ * want to remove a source only and not replace it, this parameter should be
+ * zero.
+ * @return 1 if the source was successfully removed, 0 if it was not
+ */
+unsigned int rc_pair_remove_src(
+ struct rc_instruction * inst,
+ unsigned int src_type,
+ unsigned int source,
+ unsigned int new_readmask)
+{
+ unsigned int readmask = 0;
+
+ readmask |= get_source_readmask(&inst->U.P.RGB, source, src_type);
+ readmask |= get_source_readmask(&inst->U.P.Alpha, source, src_type);
+
+ if ((new_readmask & readmask) != readmask)
+ return 0;
+
+ if (src_type & RC_SOURCE_RGB) {
+ memset(&inst->U.P.RGB.Src[source], 0,
+ sizeof(struct rc_pair_instruction_source));
+ }
+
+ if (src_type & RC_SOURCE_ALPHA) {
+ memset(&inst->U.P.Alpha.Src[source], 0,
+ sizeof(struct rc_pair_instruction_source));
+ }
+
+ return 1;
+}
#ifndef RADEON_PROGRAM_UTIL_H
#define RADEON_PROGRAM_UTIL_H
+struct radeon_compiler;
struct rc_instruction;
+struct rc_pair_instruction;
+struct rc_pair_sub_instruction;
struct rc_src_register;
unsigned int rc_swizzle_to_writemask(unsigned int swz);
unsigned swizzle_mask(unsigned swizzle, unsigned mask);
+void rc_pair_rewrite_writemask(
+ struct rc_pair_sub_instruction * sub,
+ unsigned int conversion_swizzle);
+
+void rc_normal_rewrite_writemask(
+ struct rc_instruction * inst,
+ unsigned int conversion_swizzle);
+
+unsigned int rc_rewrite_swizzle(
+ unsigned int swizzle,
+ unsigned int new_mask);
+
struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
void reset_srcreg(struct rc_src_register* reg);
struct rc_src_register presub_src0,
struct rc_src_register presub_src1);
+int rc_get_max_index(
+ struct radeon_compiler * c,
+ rc_register_file file);
+
+unsigned int rc_pair_remove_src(
+ struct rc_instruction * inst,
+ unsigned int src_type,
+ unsigned int source,
+ unsigned int new_readmask);
+
#endif /* RADEON_PROGRAM_UTIL_H */
unsigned int presub_src_count;
struct rc_pair_instruction_source * src_array;
unsigned int j;
+// fprintf(stderr, "Presubtract reader\n");
if (src_type & RC_SOURCE_RGB) {
presub_type = fullinst->
U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Index;
for(j = 0; j < presub_src_count; j++) {
cb(userdata, fullinst, &sub->Arg[i],
&src_array[j]);
+// fprintf(stderr, "Callback for presub %u type=%u\n", j, src_type);
}
+// fprintf(stderr, "Done presubtract reader\n");
} else {
struct rc_pair_instruction_source * src =
rc_pair_get_src(&fullinst->U.P, &sub->Arg[i]);
0 /*Pair Instructions don't use RelAddr*/,
src->File, src->Index, arg->Swizzle);
+// fprintf(stderr, "Shared mask = %u for [%u].%u writemask=%u abort=%u exit=%u\n",
+// shared_mask, src->Index, arg->Swizzle, d->AliveWriteMask,d->ReaderData->Abort, d->ReaderData->ExitOnAbort);
if (shared_mask == RC_MASK_NONE)
return;
}
}
+static void init_get_readers_callback_data(
+ struct get_readers_callback_data * d,
+ struct rc_reader_data * reader_data,
+ struct radeon_compiler * c,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ reader_data->Abort = 0;
+ reader_data->ReaderCount = 0;
+ reader_data->ReadersReserved = 0;
+ reader_data->Readers = NULL;
+
+ d->C = c;
+ d->ReaderData = reader_data;
+ d->ReadNormalCB = read_normal_cb;
+ d->ReadPairCB = read_pair_cb;
+ d->WriteCB = write_cb;
+}
+
/**
* This function will create a list of readers via the rc_reader_data struct.
* This function will abort (set the flag data->Abort) and return if it
{
struct get_readers_callback_data d;
- data->Abort = 0;
- data->ReaderCount = 0;
- data->ReadersReserved = 0;
- data->Readers = NULL;
-
- d.C = c;
- d.ReaderData = data;
- d.ReadNormalCB = read_normal_cb;
- d.ReadPairCB = read_pair_cb;
- d.WriteCB = write_cb;
+ init_get_readers_callback_data(&d, data, c, read_normal_cb,
+ read_pair_cb, write_cb);
rc_for_all_writes_mask(writer, get_readers_for_single_write, &d);
}
+
+void rc_get_readers_sub(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_pair_sub_instruction * sub_writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb)
+{
+ struct get_readers_callback_data d;
+
+ init_get_readers_callback_data(&d, data, c, read_normal_cb,
+ read_pair_cb, write_cb);
+
+ if (sub_writer->WriteMask) {
+ get_readers_for_single_write(&d, writer, RC_FILE_TEMPORARY,
+ sub_writer->DestIndex, sub_writer->WriteMask);
+ }
+}
struct rc_src_register;
struct rc_pair_instruction_arg;
struct rc_pair_instruction_source;
+struct rc_pair_sub_instruction;
struct rc_compiler;
rc_read_src_fn read_normal_cb,
rc_pair_read_arg_fn read_pair_cb,
rc_read_write_mask_fn write_cb);
+
+void rc_get_readers_sub(
+ struct radeon_compiler * c,
+ struct rc_instruction * writer,
+ struct rc_pair_sub_instruction * sub_writer,
+ struct rc_reader_data * data,
+ rc_read_src_fn read_normal_cb,
+ rc_pair_read_arg_fn read_pair_cb,
+ rc_read_write_mask_fn write_cb);
/**
* Compiler passes based on dataflow analysis.
*/
--- /dev/null
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_list.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "memory_pool.h"
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item)
+{
+ struct rc_list * new = memory_pool_malloc(pool, sizeof(struct rc_list));
+ new->Item = item;
+ new->Next = NULL;
+ new->Prev = NULL;
+}
+
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value)
+{
+ struct rc_list * temp;
+
+ if (*list == NULL) {
+ *list = new_value;
+ return;
+ }
+
+ for (temp = *list; temp->Next; temp = temp->Next);
+
+ temp->Next = new_value;
+ new_value->Prev = temp;
+}
+
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value)
+{
+ if (*list == rm_value) {
+ *list = rm_value->Next;
+ return;
+ }
+
+ rm_value->Prev->Next = rm_value->Next;
+ if (rm_value->Next) {
+ rm_value->Next->Prev = rm_value->Prev;
+ }
+}
+
+unsigned int rc_list_count(struct rc_list * list)
+{
+ unsigned int count = 0;
+ while (list) {
+ count++;
+ list = list->Next;
+ }
+ return count;
+}
+
+void rc_list_print(struct rc_list * list)
+{
+ while(list) {
+ fprintf(stderr, "%p->", list->Item);
+ list = list->Next;
+ }
+ fprintf(stderr, "\n");
+}
--- /dev/null
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_LIST_H
+#define RADEON_LIST_H
+
+struct memory_pool;
+
+struct rc_list {
+ void * Item;
+ struct rc_list * Prev;
+ struct rc_list * Next;
+};
+
+struct rc_list * rc_list(struct memory_pool * pool, void * item);
+void rc_list_add(struct rc_list ** list, struct rc_list * new_value);
+void rc_list_remove(struct rc_list ** list, struct rc_list * rm_value);
+unsigned int rc_list_count(struct rc_list * list);
+void rc_list_print(struct rc_list * list);
+
+#endif /* RADEON_LIST_H */
+
/*
* Copyright (C) 2009 Nicolai Haehnle.
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
*
* All Rights Reserved.
*
#include <stdio.h>
+#include "main/glheader.h"
+#include "program/register_allocate.h"
+#include "ralloc.h"
+
#include "radeon_compiler.h"
+#include "radeon_compiler_util.h"
#include "radeon_dataflow.h"
-
+#include "radeon_list.h"
+#include "radeon_variable.h"
#define VERBOSE 0
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
-struct live_intervals {
- int Start;
- int End;
- struct live_intervals * Next;
-};
struct register_info {
- struct live_intervals Live;
+ struct live_intervals Live[4];
unsigned int Used:1;
unsigned int Allocated:1;
unsigned int File:3;
unsigned int Index:RC_REGISTER_INDEX_BITS;
-};
-
-struct hardware_register {
- struct live_intervals * Used;
+ unsigned int Writemask;
};
struct regalloc_state {
struct radeon_compiler * C;
- struct register_info Input[RC_REGISTER_MAX_INDEX];
- struct register_info Temporary[RC_REGISTER_MAX_INDEX];
-
- struct hardware_register * HwTemporary;
- unsigned int NumHwTemporaries;
- /**
- * If an instruction is inside of a loop, EndLoop will be the
- * IP of the ENDLOOP instruction, and BeginLoop will be the IP
- * of the BGNLOOP instruction. Otherwise, EndLoop and BeginLoop
- * will be -1.
- */
- int EndLoop;
- int BeginLoop;
+ struct register_info * Input;
+ unsigned int NumInputs;
+
+ struct register_info * Temporary;
+ unsigned int NumTemporaries;
+
+ unsigned int Simple;
+ unsigned int HasLoop;
+};
+
+enum rc_reg_class {
+ RC_REG_CLASS_SINGLE,
+ RC_REG_CLASS_DOUBLE,
+ RC_REG_CLASS_TRIPLE,
+ RC_REG_CLASS_ALPHA,
+ RC_REG_CLASS_SINGLE_PLUS_ALPHA,
+ RC_REG_CLASS_DOUBLE_PLUS_ALPHA,
+ RC_REG_CLASS_TRIPLE_PLUS_ALPHA,
+ RC_REG_CLASS_X,
+ RC_REG_CLASS_Y,
+ RC_REG_CLASS_Z,
+ RC_REG_CLASS_XY,
+ RC_REG_CLASS_YZ,
+ RC_REG_CLASS_XZ,
+ RC_REG_CLASS_XW,
+ RC_REG_CLASS_YW,
+ RC_REG_CLASS_ZW,
+ RC_REG_CLASS_XYW,
+ RC_REG_CLASS_YZW,
+ RC_REG_CLASS_XZW,
+ RC_REG_CLASS_COUNT
+};
+
+struct rc_class {
+ enum rc_reg_class Class;
+
+ unsigned int WritemaskCount;
+
+ /** This is 1 if this class is being used by the register allocator
+ * and 0 otherwise */
+ unsigned int Used;
+
+ /** This is the ID number assigned to this class by ra. */
+ unsigned int Id;
+
+ /** List of writemasks that belong to this class */
+ unsigned int Writemasks[3];
+
+
};
static void print_live_intervals(struct live_intervals * src)
{
- if (!src) {
+ if (!src || !src->Used) {
DBG("(null)");
return;
}
- while(src) {
- DBG("(%i,%i)", src->Start, src->End);
- src = src->Next;
- }
+ DBG("(%i,%i)", src->Start, src->End);
}
-static void add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
+static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
{
- struct live_intervals ** dst_backup = dst;
-
if (VERBOSE) {
- DBG("add_live_intervals: ");
- print_live_intervals(*dst);
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(a);
DBG(" to ");
- print_live_intervals(src);
+ print_live_intervals(b);
DBG("\n");
}
- while(src) {
- if (*dst && (*dst)->End < src->Start) {
- dst = &(*dst)->Next;
- } else if (!*dst || (*dst)->Start > src->End) {
- struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
- li->Start = src->Start;
- li->End = src->End;
- li->Next = *dst;
- *dst = li;
- src = src->Next;
- } else {
- if (src->End > (*dst)->End)
- (*dst)->End = src->End;
- if (src->Start < (*dst)->Start)
- (*dst)->Start = src->Start;
- src = src->Next;
- }
- }
-
- if (VERBOSE) {
- DBG(" result: ");
- print_live_intervals(*dst_backup);
- DBG("\n");
- }
-}
-
-static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
-{
- if (VERBOSE) {
- DBG("overlap_live_intervals: ");
- print_live_intervals(dst);
- DBG(" to ");
- print_live_intervals(src);
- DBG("\n");
+ if (!a->Used || !b->Used) {
+ DBG(" unused interval\n");
+ return 0;
}
- while(src && dst) {
- if (dst->End <= src->Start) {
- dst = dst->Next;
- } else if (dst->End <= src->End) {
+ if (a->Start > b->Start) {
+ if (a->Start < b->End) {
DBG(" overlap\n");
return 1;
- } else if (dst->Start < src->End) {
+ }
+ } else if (b->Start > a->Start) {
+ if (b->Start < a->End) {
+ DBG(" overlap\n");
+ return 1;
+ }
+ } else { /* a->Start == b->Start */
+ if (a->Start != a->End && b->Start != b->End) {
DBG(" overlap\n");
return 1;
- } else {
- src = src->Next;
}
}
return 0;
}
-static int try_add_live_intervals(struct regalloc_state * s,
- struct live_intervals ** dst, struct live_intervals * src)
-{
- if (overlap_live_intervals(*dst, src))
- return 0;
-
- add_live_intervals(s, dst, src);
- return 1;
-}
-
-static void scan_callback(void * data, struct rc_instruction * inst,
+static void scan_read_callback(void * data, struct rc_instruction * inst,
rc_register_file file, unsigned int index, unsigned int mask)
{
struct regalloc_state * s = data;
struct register_info * reg;
+ unsigned int i;
- if (file == RC_FILE_TEMPORARY)
- reg = &s->Temporary[index];
- else if (file == RC_FILE_INPUT)
- reg = &s->Input[index];
- else
+ if (file != RC_FILE_INPUT)
return;
- if (!reg->Used) {
- reg->Used = 1;
- if (file == RC_FILE_INPUT)
- reg->Live.Start = -1;
- else if (s->BeginLoop >= 0)
- reg->Live.Start = s->BeginLoop;
- else
- reg->Live.Start = inst->IP;
- reg->Live.End = inst->IP;
- } else if (s->EndLoop >= 0)
- reg->Live.End = s->EndLoop;
- else if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
-}
-
-static void compute_live_intervals(struct radeon_compiler *c,
- struct regalloc_state *s)
-{
- memset(s, 0, sizeof(*s));
- s->C = c;
- s->NumHwTemporaries = c->max_temp_regs;
- s->BeginLoop = -1;
- s->EndLoop = -1;
- s->HwTemporary =
- memory_pool_malloc(&c->Pool,
- s->NumHwTemporaries * sizeof(struct hardware_register));
- memset(s->HwTemporary, 0, s->NumHwTemporaries * sizeof(struct hardware_register));
-
- rc_recompute_ips(s->C);
-
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
-
- /* For all instructions inside of a loop, the ENDLOOP
- * instruction is used as the end of the live interval and
- * the BGNLOOP instruction is used as the beginning. */
- if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && s->EndLoop < 0) {
- int loops = 1;
- struct rc_instruction * tmp;
- s->BeginLoop = inst->IP;
- for(tmp = inst->Next;
- tmp != &s->C->Program.Instructions;
- tmp = tmp->Next) {
- if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
- loops++;
- } else if (tmp->U.I.Opcode
- == RC_OPCODE_ENDLOOP) {
- if(!--loops) {
- s->EndLoop = tmp->IP;
- break;
- }
- }
- }
- }
+ s->Input[index].Used = 1;
+ reg = &s->Input[index];
- if (inst->IP == s->EndLoop) {
- s->EndLoop = -1;
- s->BeginLoop = -1;
+ for (i = 0; i < 4; i++) {
+ if (!((mask >> i) & 0x1)) {
+ continue;
}
-
- rc_for_all_reads_mask(inst, scan_callback, s);
- rc_for_all_writes_mask(inst, scan_callback, s);
+ reg->Live[i].Used = 1;
+ reg->Live[i].Start = 0;
+ reg->Live[i].End = inst->IP;
}
}
struct regalloc_state * s = data;
const struct register_info * reg;
- if (*file == RC_FILE_TEMPORARY)
+ if (*file == RC_FILE_TEMPORARY && s->Simple)
reg = &s->Temporary[*index];
else if (*file == RC_FILE_INPUT)
reg = &s->Input[*index];
return;
if (reg->Allocated) {
- *file = reg->File;
*index = reg->Index;
}
}
-static void do_regalloc(struct regalloc_state * s)
+static void alloc_input_simple(void * data, unsigned int input,
+ unsigned int hwreg)
{
- /* Simple and stupid greedy register allocation */
- for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
- struct register_info * reg = &s->Temporary[index];
+ struct regalloc_state * s = data;
- if (!reg->Used)
- continue;
+ if (input >= s->NumInputs)
+ return;
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+}
- for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
- if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, ®->Live)) {
- reg->Allocated = 1;
- reg->File = RC_FILE_TEMPORARY;
- reg->Index = hwreg;
- goto success;
+/* This functions offsets the temporary register indices by the number
+ * of input registers, because input registers are actually temporaries and
+ * should not occupy the same space.
+ *
+ * This pass is supposed to be used to maintain correct allocation of inputs
+ * if the standard register allocation is disabled. */
+static void do_regalloc_inputs_only(struct regalloc_state * s)
+{
+ for (unsigned i = 0; i < s->NumTemporaries; i++) {
+ s->Temporary[i].Allocated = 1;
+ s->Temporary[i].File = RC_FILE_TEMPORARY;
+ s->Temporary[i].Index = i + s->NumInputs;
+ }
+}
+
+static unsigned int is_derivative(rc_opcode op)
+{
+ return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
+}
+
+static enum rc_reg_class variable_get_class(
+ struct rc_variable * variable,
+ struct rc_class * classes)
+{
+ unsigned int i;
+ unsigned int can_change_writemask= 1;
+ unsigned int writemask = rc_variable_writemask_sum(variable);
+ struct rc_list * readers = rc_variable_readers_union(variable);
+
+ if (!variable->C->is_r500) {
+ unsigned int mask_count = 0;
+ /* The assumption here is that if an instruction has type
+ * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
+ * r300 and r400 can't swizzle the result of a TEX lookup. */
+ if (variable->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = RC_MASK_XYZW;
+ }
+ for (i = 0; i < 4; i++) {
+ if (GET_BIT(writemask, i)) {
+ mask_count++;
}
}
+ /* XXX We should do swizzle packing for r300 and r400 here.
+ * We need to figure out how not to create non-native
+ * swizzles. */
+ if (mask_count > 1) {
+ can_change_writemask = 0;
+ }
+ }
- rc_error(s->C, "Ran out of hardware temporaries\n");
- return;
-
- success:;
+ if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
+ /* DDX/DDY seem to always fail when their writemasks are
+ * changed.*/
+ if (is_derivative(variable->Inst->U.P.RGB.Opcode)
+ || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ }
}
+ for ( ; readers; readers = readers->Next) {
+ struct rc_reader * r = readers->Item;
+ if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
+ if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
+ can_change_writemask = 0;
+ break;
+ }
+ /* DDX/DDY also fail when their swizzles are changed. */
+ if (is_derivative(r->Inst->U.P.RGB.Opcode)
+ || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
+ can_change_writemask = 0;
+ break;
+ }
+ }
+ }
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ unsigned int j;
+ if (!can_change_writemask && classes[i].WritemaskCount > 1) {
+ continue;
+ }
+ for (j = 0; j < 3; j++) {
+ if (classes[i].Writemasks[j] == writemask) {
+ return classes[i].Class;
+ }
+ }
+ }
+ rc_error(variable->C, "Could not find class for index=%u mask=%u\n",
+ variable->Dst.Index, writemask);
+ return 0;
+}
- /* Rewrite all instructions based on the translation table we built */
- for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
- inst != &s->C->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, s);
+static unsigned int overlap_live_intervals_array(
+ struct live_intervals * a,
+ struct live_intervals * b)
+{
+ unsigned int a_chan, b_chan;
+ for (a_chan = 0; a_chan < 4; a_chan++) {
+ for (b_chan = 0; b_chan < 4; b_chan++) {
+ if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
+ return 1;
+ }
+ }
}
+ return 0;
}
-static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+static unsigned int reg_get_index(int reg)
{
- struct regalloc_state * s = data;
+ return reg / RC_MASK_XYZW;
+}
- if (!s->Input[input].Used)
- return;
+static unsigned int reg_get_writemask(int reg)
+{
+ return (reg % RC_MASK_XYZW) + 1;
+}
- add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+static int get_reg_id(unsigned int index, unsigned int writemask)
+{
+ assert(writemask);
+ if (writemask == 0) {
+ return 0;
+ }
+ return (index * RC_MASK_XYZW) + (writemask - 1);
+}
- s->Input[input].Allocated = 1;
- s->Input[input].File = RC_FILE_TEMPORARY;
- s->Input[input].Index = hwreg;
+#if VERBOSE
+static void print_reg(int reg)
+{
+ unsigned int index = reg_get_index(reg);
+ unsigned int mask = reg_get_writemask(reg);
+ fprintf(stderr, "Temp[%u].%c%c%c%c", index,
+ mask & RC_MASK_X ? 'x' : '_',
+ mask & RC_MASK_Y ? 'y' : '_',
+ mask & RC_MASK_Z ? 'z' : '_',
+ mask & RC_MASK_W ? 'w' : '_');
+}
+#endif
+static void add_register_conflicts(
+ struct ra_regs * regs,
+ unsigned int max_temp_regs)
+{
+ unsigned int index, a_mask, b_mask;
+ for (index = 0; index < max_temp_regs; index++) {
+ for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
+ for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
+ b_mask++) {
+ if (a_mask & b_mask) {
+ ra_add_reg_conflict(regs,
+ get_reg_id(index, a_mask),
+ get_reg_id(index, b_mask));
+ }
+ }
+ }
+ }
}
-void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+static void do_advanced_regalloc(struct regalloc_state * s)
{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
+ struct rc_class rc_class_list [] = {
+ {RC_REG_CLASS_SINGLE, 3, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_Y,
+ RC_MASK_Z}},
+ {RC_REG_CLASS_DOUBLE, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_X | RC_MASK_Z,
+ RC_MASK_Y | RC_MASK_Z}},
+ {RC_REG_CLASS_TRIPLE, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ALPHA, 1, 0, 0,
+ {RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_W,
+ RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
+ {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_X, 1, 0, 0,
+ {RC_MASK_X,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Y, 1, 0, 0,
+ {RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_Z, 1, 0, 0,
+ {RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XY, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZ, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZ, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_ZW, 1, 0, 0,
+ {RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XYW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_YZW, 1, 0, 0,
+ {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}},
+ {RC_REG_CLASS_XZW, 1, 0, 0,
+ {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
+ RC_MASK_NONE,
+ RC_MASK_NONE}}
+ };
+
+ unsigned int i, j, index, input_node, node_count, node_index;
+ unsigned int * node_classes;
+ unsigned int * input_classes;
+ struct rc_instruction * inst;
+ struct rc_list * var_ptr;
+ struct rc_list * variables;
+ struct ra_regs * regs;
+ struct ra_graph * graph;
+
+ /* Allocate the main ra data structure */
+ regs = ra_alloc_reg_set(s->C->max_temp_regs * RC_MASK_XYZW);
+
+ /* Get list of program variables */
+ variables = rc_get_variables(s->C);
+ node_count = rc_list_count(variables);
+ node_classes = memory_pool_malloc(&s->C->Pool,
+ node_count * sizeof(unsigned int));
+ input_classes = memory_pool_malloc(&s->C->Pool,
+ s->NumInputs * sizeof(unsigned int));
+
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ unsigned int class_index;
+ /* Compute the live intervals */
+ rc_variable_compute_live_intervals(var_ptr->Item);
+
+ class_index = variable_get_class(var_ptr->Item, rc_class_list);
+
+ /* If we haven't used this register class yet, mark it
+ * as used and allocate space for it. */
+ if (!rc_class_list[class_index].Used) {
+ rc_class_list[class_index].Used = 1;
+ rc_class_list[class_index].Id = ra_alloc_reg_class(regs);
+ }
- compute_live_intervals(cc, &s);
+ node_classes[node_index] = rc_class_list[class_index].Id;
+ }
- c->AllocateHwInputs(c, &alloc_input, &s);
- do_regalloc(&s);
-}
+ /* Assign registers to the classes */
+ for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
+ struct rc_class class = rc_class_list[i];
+ if (!class.Used) {
+ continue;
+ }
-/* This functions offsets the temporary register indices by the number
- * of input registers, because input registers are actually temporaries and
- * should not occupy the same space.
- *
- * This pass is supposed to be used to maintain correct allocation of inputs
- * if the standard register allocation is disabled. */
-void rc_pair_regalloc_inputs_only(struct radeon_compiler *cc, void *user)
-{
- struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
- struct regalloc_state s;
- int temp_reg_offset;
+ for (index = 0; index < s->C->max_temp_regs; index++) {
+ for (j = 0; j < class.WritemaskCount; j++) {
+ int reg_id = get_reg_id(index,
+ class.Writemasks[j]);
+ ra_class_add_reg(regs, class.Id, reg_id);
+ }
+ }
+ }
+
+ /* Add register conflicts */
+ add_register_conflicts(regs, s->C->max_temp_regs);
+
+ /* Calculate live intervals for input registers */
+ for (inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, scan_read_callback, s);
+ }
+
+ /* Create classes for input registers */
+ for (i = 0; i < s->NumInputs; i++) {
+ unsigned int chan, class_id, writemask = 0;
+ for (chan = 0; chan < 4; chan++) {
+ if (s->Input[i].Live[chan].Used) {
+ writemask |= (1 << chan);
+ }
+ }
+ s->Input[i].Writemask = writemask;
+ if (!writemask) {
+ continue;
+ }
+
+ class_id = ra_alloc_reg_class(regs);
+ input_classes[i] = class_id;
+ ra_class_add_reg(regs, class_id,
+ get_reg_id(s->Input[i].Index, writemask));
+ }
+
+ ra_set_finalize(regs);
+
+ graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs);
- compute_live_intervals(cc, &s);
+ /* Build the interference graph */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next,node_index++) {
+ struct rc_list * a, * b;
+ unsigned int b_index;
- c->AllocateHwInputs(c, &alloc_input, &s);
+ ra_set_node_class(graph, node_index, node_classes[node_index]);
- temp_reg_offset = 0;
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Input[i].Allocated && temp_reg_offset <= s.Input[i].Index)
- temp_reg_offset = s.Input[i].Index + 1;
+ for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
+ b; b = b->Next, b_index++) {
+ struct rc_variable * var_a = a->Item;
+ while (var_a) {
+ struct rc_variable * var_b = b->Item;
+ while (var_b) {
+ if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
+ ra_add_node_interference(graph,
+ node_index, b_index);
+ }
+ var_b = var_b->Friend;
+ }
+ var_a = var_a->Friend;
+ }
+ }
}
- if (temp_reg_offset) {
- for (unsigned i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
- if (s.Temporary[i].Used) {
- s.Temporary[i].Allocated = 1;
- s.Temporary[i].File = RC_FILE_TEMPORARY;
- s.Temporary[i].Index = i + temp_reg_offset;
+ /* Add input registers to the interference graph */
+ for (i = 0, input_node = 0; i< s->NumInputs; i++) {
+ if (!s->Input[i].Writemask) {
+ continue;
+ }
+ ra_set_node_class(graph, node_count + input_node,
+ input_classes[i]);
+ for (var_ptr = variables, node_index = 0;
+ var_ptr; var_ptr = var_ptr->Next, node_index++) {
+ struct rc_variable * var = var_ptr->Item;
+ if (overlap_live_intervals_array(s->Input[i].Live,
+ var->Live)) {
+ ra_add_node_interference(graph, node_index,
+ node_count + input_node);
}
}
+ /* Manually allocate a register for this input */
+ ra_set_node_reg(graph, node_count + input_node, get_reg_id(
+ s->Input[i].Index, s->Input[i].Writemask));
+ input_node++;
+ }
+
+ if (!ra_allocate_no_spills(graph)) {
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+ }
+
+ /* Rewrite the registers */
+ for (var_ptr = variables, node_index = 0; var_ptr;
+ var_ptr = var_ptr->Next, node_index++) {
+ int reg = ra_get_node_reg(graph, node_index);
+ unsigned int writemask = reg_get_writemask(reg);
+ unsigned int index = reg_get_index(reg);
+ struct rc_variable * var = var_ptr->Item;
+
+ if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ writemask = rc_variable_writemask_sum(var);
+ }
+
+ if (var->Dst.File == RC_FILE_INPUT) {
+ continue;
+ }
+ rc_variable_change_dst(var, index, writemask);
+ }
+
+ ralloc_free(graph);
+ ralloc_free(regs);
+}
+
+/**
+ * @param user This parameter should be a pointer to an integer value. If this
+ * integer value is zero, then a simple register allocator will be used that
+ * only allocates space for input registers (\sa do_regalloc_inputs_only). If
+ * user is non-zero, then the regular register allocator will be used
+ * (\sa do_regalloc).
+ */
+void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
+{
+ struct r300_fragment_program_compiler *c =
+ (struct r300_fragment_program_compiler*)cc;
+ struct regalloc_state s;
+ int do_full_regalloc = (int)user;
+ struct rc_instruction * inst;
+
+ memset(&s, 0, sizeof(s));
+ s.C = cc;
+ s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
+ s.Input = memory_pool_malloc(&cc->Pool,
+ s.NumInputs * sizeof(struct register_info));
+ memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
+
+ s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
+ s.Temporary = memory_pool_malloc(&cc->Pool,
+ s.NumTemporaries * sizeof(struct register_info));
+ memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
+
+ for(inst = cc->Program.Instructions.Next;
+ inst != &cc->Program.Instructions;
+ inst = inst->Next) {
- /* Rewrite all registers. */
- for (struct rc_instruction *inst = cc->Program.Instructions.Next;
- inst != &cc->Program.Instructions;
- inst = inst->Next) {
- rc_remap_registers(inst, &remap_register, &s);
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ s.HasLoop = 1;
+ break;
}
}
+
+ rc_recompute_ips(s.C);
+
+ c->AllocateHwInputs(c, &alloc_input_simple, &s);
+ if (!s.HasLoop && do_full_regalloc) {
+ do_advanced_regalloc(&s);
+ } else {
+ s.Simple = 1;
+ do_regalloc_inputs_only(&s);
+ }
+
+ /* Rewrite inputs and if we are doing the simple allocation, rewrite
+ * temporaries too. */
+ for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
+ inst != &s.C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_register, &s);
+ }
}
/** True if tex instruction should do shadow comparison */
unsigned int TexShadow:1;
+
+ /**R500 Only. How to swizzle the result of a TEX lookup*/
+ unsigned int TexSwizzle:12;
/*@}*/
/** This holds information about the presubtract operation used by
#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+#define RC_SWIZZLE_UUUU RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_UNUSED)
/**
* \name Bitmasks for components of vectors.
return NULL;
}
}
+
+int rc_pair_get_src_index(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_source * src)
+{
+ int i;
+ for (i = 0; i < 3; i++) {
+ if (&pair_inst->RGB.Src[i] == src
+ || &pair_inst->Alpha.Src[i] == src) {
+ return i;
+ }
+ }
+ return -1;
+}
struct rc_pair_instruction_source * rc_pair_get_src(
struct rc_pair_instruction * pair_inst,
struct rc_pair_instruction_arg * arg);
+
+int rc_pair_get_src_index(
+ struct rc_pair_instruction * pair_inst,
+ struct rc_pair_instruction_source * src);
/*@}*/
--- /dev/null
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_variable.h"
+
+#include "memory_pool.h"
+#include "radeon_compiler_util.h"
+#include "radeon_dataflow.h"
+#include "radeon_list.h"
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+/**
+ * Rewrite the index and writemask for the destination register of var
+ * and its friends to new_index and new_writemask. This function also takes
+ * care of rewriting the swizzles for the sources of var.
+ */
+void rc_variable_change_dst(
+ struct rc_variable * var,
+ unsigned int new_index,
+ unsigned int new_writemask)
+{
+ unsigned int new_idx, old_idx;
+ unsigned int conversion_swizzle = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
+ struct rc_variable * var_ptr;
+ struct rc_list * readers;
+ unsigned int old_mask = rc_variable_writemask_sum(var);
+
+ new_idx = 0;
+ for (old_idx = 0; old_idx < 4; old_idx++) {
+ if (!GET_BIT(old_mask, old_idx))
+ continue;
+ for ( ; new_idx < 4; new_idx++) {
+ if (GET_BIT(new_writemask, new_idx)) {
+ SET_SWZ(conversion_swizzle, old_idx, new_idx);
+ new_idx++;
+ break;
+ }
+ }
+ }
+
+ for (var_ptr = var; var_ptr; var_ptr = var_ptr->Friend) {
+ if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ rc_normal_rewrite_writemask(var_ptr->Inst,
+ conversion_swizzle);
+ var_ptr->Inst->U.I.DstReg.Index = new_index;
+ } else {
+ struct rc_pair_sub_instruction * sub;
+ if (var_ptr->Dst.WriteMask == RC_MASK_W) {
+ assert(new_writemask & RC_MASK_W);
+ sub = &var_ptr->Inst->U.P.Alpha;
+ } else {
+ sub = &var_ptr->Inst->U.P.RGB;
+ rc_pair_rewrite_writemask(sub,
+ conversion_swizzle);
+ }
+ sub->DestIndex = new_index;
+ }
+ }
+
+ readers = rc_variable_readers_union(var);
+
+ for ( ; readers; readers = readers->Next) {
+ struct rc_reader * reader = readers->Item;
+ if (reader->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ reader->U.I.Src->Index = new_index;
+ reader->U.I.Src->Swizzle = rc_rewrite_swizzle(
+ reader->U.I.Src->Swizzle, conversion_swizzle);
+ } else {
+ struct rc_pair_instruction * pair_inst =
+ &reader->Inst->U.P;
+ unsigned int src_type = rc_source_type_swz(
+ reader->U.P.Arg->Swizzle);
+
+ int src_index = reader->U.P.Arg->Source;
+ if (src_index == RC_PAIR_PRESUB_SRC) {
+ src_index = rc_pair_get_src_index(
+ pair_inst, reader->U.P.Src);
+ }
+ /* Try to delete the old src, it is OK if this fails,
+ * because rc_pair_alloc_source might be able to
+ * find a source the ca be reused.
+ */
+ if (rc_pair_remove_src(reader->Inst, src_type,
+ src_index, old_mask)) {
+ /* Reuse the source index of the source that
+ * was just deleted and set its register
+ * index. We can't use rc_pair_alloc_source
+ * for this becuase it might return a source
+ * index that is already being used. */
+ if (src_type & RC_SOURCE_RGB) {
+ pair_inst->RGB.Src[src_index]
+ .Used = 1;
+ pair_inst->RGB.Src[src_index]
+ .Index = new_index;
+ pair_inst->RGB.Src[src_index]
+ .File = RC_FILE_TEMPORARY;
+ }
+ if (src_type & RC_SOURCE_ALPHA) {
+ pair_inst->Alpha.Src[src_index]
+ .Used = 1;
+ pair_inst->Alpha.Src[src_index]
+ .Index = new_index;
+ pair_inst->Alpha.Src[src_index]
+ .File = RC_FILE_TEMPORARY;
+ }
+ } else {
+ src_index = rc_pair_alloc_source(
+ &reader->Inst->U.P,
+ src_type & RC_SOURCE_RGB,
+ src_type & RC_SOURCE_ALPHA,
+ RC_FILE_TEMPORARY,
+ new_index);
+ if (src_index < 0) {
+ rc_error(var->C, "Rewrite of inst %u failed "
+ "Can't allocate source for "
+ "Inst %u src_type=%x "
+ "new_index=%u new_mask=%u\n",
+ var->Inst->IP, reader->Inst->IP, src_type, new_index, new_writemask);
+ continue;
+ }
+ }
+ reader->U.P.Arg->Swizzle = rc_rewrite_swizzle(
+ reader->U.P.Arg->Swizzle, conversion_swizzle);
+ if (reader->U.P.Arg->Source != RC_PAIR_PRESUB_SRC) {
+ reader->U.P.Arg->Source = src_index;
+ }
+ }
+ }
+}
+
+/**
+ * Compute the live intervals for var and its friends.
+ */
+void rc_variable_compute_live_intervals(struct rc_variable * var)
+{
+ while(var) {
+ unsigned int i;
+ unsigned int start = var->Inst->IP;
+
+ for (i = 0; i < var->ReaderCount; i++) {
+ unsigned int chan;
+ unsigned int mask = var->Readers[i].WriteMask;
+ for (chan = 0; chan < 4; chan++) {
+ if ((mask >> chan) & 0x1) {
+ var->Live[chan].Start = start;
+ var->Live[chan].End =
+ var->Readers[i].Inst->IP;
+ var->Live[chan].Used = 1;
+ }
+ }
+ }
+ var = var->Friend;
+ }
+}
+
+/**
+ * @return 1 if a and b share a reader
+ * @return 0 if they do not
+ */
+static unsigned int readers_intersect(
+ struct rc_variable * a,
+ struct rc_variable * b)
+{
+ unsigned int a_index, b_index;
+ for (a_index = 0; a_index < a->ReaderCount; a_index++) {
+ struct rc_reader reader_a = a->Readers[a_index];
+ for (b_index = 0; b_index < b->ReaderCount; b_index++) {
+ struct rc_reader reader_b = b->Readers[b_index];
+ if (reader_a.Inst->Type == RC_INSTRUCTION_NORMAL
+ && reader_b.Inst->Type == RC_INSTRUCTION_NORMAL
+ && reader_a.U.I.Src == reader_b.U.I.Src) {
+
+ return 1;
+ }
+
+ if (reader_a.Inst->Type == RC_INSTRUCTION_PAIR
+ && reader_b.Inst->Type == RC_INSTRUCTION_PAIR
+ && reader_a.U.P.Arg == reader_b.U.P.Arg) {
+
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+void rc_variable_add_friend(
+ struct rc_variable * var,
+ struct rc_variable * friend)
+{
+ while(var->Friend) {
+ var = var->Friend;
+ }
+ var->Friend = friend;
+}
+
+struct rc_variable * rc_variable(
+ struct radeon_compiler * c,
+ unsigned int DstFile,
+ unsigned int DstIndex,
+ unsigned int DstWriteMask,
+ struct rc_reader_data * reader_data)
+{
+ struct rc_variable * new =
+ memory_pool_malloc(&c->Pool, sizeof(struct rc_variable));
+ memset(new, 0, sizeof(struct rc_variable));
+ new->C = c;
+ new->Dst.File = DstFile;
+ new->Dst.Index = DstIndex;
+ new->Dst.WriteMask = DstWriteMask;
+ if (reader_data) {
+ new->Inst = reader_data->Writer;
+ new->ReaderCount = reader_data->ReaderCount;
+ new->Readers = reader_data->Readers;
+ }
+ return new;
+}
+
+static void get_variable_helper(
+ struct rc_list ** aborted_list,
+ struct rc_list ** variable_list,
+ unsigned int aborted,
+ struct rc_variable * variable)
+{
+ if (aborted) {
+ rc_list_add(aborted_list, rc_list(&variable->C->Pool, variable));
+ } else {
+ rc_list_add(variable_list, rc_list(&variable->C->Pool, variable));
+ }
+}
+
+static void get_variable_pair_helper(
+ struct rc_list ** aborted_list,
+ struct rc_list ** variable_list,
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ struct rc_pair_sub_instruction * sub_inst)
+{
+ struct rc_reader_data reader_data;
+ struct rc_variable * new_var;
+ rc_register_file file;
+ unsigned int writemask;
+
+ if (sub_inst->Opcode == RC_OPCODE_NOP) {
+ return;
+ }
+ memset(&reader_data, 0, sizeof(struct rc_reader_data));
+ rc_get_readers_sub(c, inst, sub_inst, &reader_data, NULL, NULL, NULL);
+
+ if (reader_data.ReaderCount == 0) {
+ return;
+ }
+
+ if (sub_inst->WriteMask) {
+ file = RC_FILE_TEMPORARY;
+ writemask = sub_inst->WriteMask;
+ } else if (sub_inst->OutputWriteMask) {
+ file = RC_FILE_OUTPUT;
+ writemask = sub_inst->OutputWriteMask;
+ } else {
+ writemask = 0;
+ file = RC_FILE_NONE;
+ }
+ new_var = rc_variable(c, file, sub_inst->DestIndex, writemask,
+ &reader_data);
+ get_variable_helper(aborted_list, variable_list, reader_data.Abort,
+ new_var);
+}
+
+/**
+ * Generate a list of variables used by the shader program. Each instruction
+ * that writes to a register is considered a variable. The struct rc_variable
+ * data structure includes a list of readers and is essentially a
+ * definition-use chain. Any two variables that share a reader are considered
+ * "friends" and they are linked together via the Friend attribute.
+ */
+struct rc_list * rc_get_variables(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+ struct rc_list * aborted_list = NULL;
+ struct rc_list * variable_list = NULL;
+ struct rc_list * var_ptr;
+ struct rc_list * search_ptr;
+
+ for (inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ struct rc_reader_data reader_data;
+ struct rc_variable * new_var;
+ memset(&reader_data, 0, sizeof(reader_data));
+
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ rc_get_readers(c, inst, &reader_data, NULL, NULL, NULL);
+ if (reader_data.ReaderCount == 0) {
+ continue;
+ }
+ new_var = rc_variable(c, inst->U.I.DstReg.File,
+ inst->U.I.DstReg.Index,
+ inst->U.I.DstReg.WriteMask, &reader_data);
+ get_variable_helper(&aborted_list, &variable_list,
+ reader_data.Abort, new_var);
+ } else {
+ get_variable_pair_helper(&aborted_list, &variable_list,
+ c, inst, &inst->U.P.RGB);
+ get_variable_pair_helper(&aborted_list, &variable_list,
+ c, inst, &inst->U.P.Alpha);
+ }
+ }
+
+ /* The aborted_list contains a list of variables that might share a
+ * reader with another variable. We need to search through this list
+ * and pair together variables that do share the same reader.
+ */
+ while (aborted_list) {
+ struct rc_list * search_ptr_next;
+ var_ptr = aborted_list;
+
+ search_ptr = var_ptr->Next;
+ while(search_ptr) {
+ search_ptr_next = search_ptr->Next;
+ if (readers_intersect(var_ptr->Item, search_ptr->Item)){
+ rc_list_remove(&aborted_list, search_ptr);
+ rc_variable_add_friend(var_ptr->Item,
+ search_ptr->Item);
+ }
+ search_ptr = search_ptr_next;
+ }
+ rc_list_remove(&aborted_list, var_ptr);
+ rc_list_add(&variable_list, rc_list(
+ &((struct rc_variable*)(var_ptr->Item))->C->Pool,
+ var_ptr->Item));
+ }
+ return variable_list;
+}
+
+/**
+ * @return The bitwise or of the writemasks of a variable and all of its
+ * friends.
+ */
+unsigned int rc_variable_writemask_sum(struct rc_variable * var)
+{
+ unsigned int writemask = 0;
+ while(var) {
+ writemask |= var->Dst.WriteMask;
+ var = var->Friend;
+ }
+ return writemask;
+}
+
+/*
+ * @return A list of readers for a variable and its friends. Readers
+ * that read from two different variable friends are only included once in
+ * this list.
+ */
+struct rc_list * rc_variable_readers_union(struct rc_variable * var)
+{
+ struct rc_list * list = NULL;
+ while (var) {
+ unsigned int i;
+ for (i = 0; i < var->ReaderCount; i++) {
+ struct rc_list * temp;
+ struct rc_reader * a = &var->Readers[i];
+ unsigned int match = 0;
+ for (temp = list; temp; temp = temp->Next) {
+ struct rc_reader * b = temp->Item;
+ if (a->Inst->Type != b->Inst->Type) {
+ continue;
+ }
+ if (a->Inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (a->U.I.Src == b->U.I.Src) {
+ match = 1;
+ break;
+ }
+ }
+ if (a->Inst->Type == RC_INSTRUCTION_PAIR) {
+ if (a->U.P.Arg == b->U.P.Arg
+ && a->U.P.Src == b->U.P.Src) {
+ match = 1;
+ break;
+ }
+ }
+ }
+ if (match) {
+ continue;
+ }
+ rc_list_add(&list, rc_list(&var->C->Pool, a));
+ }
+ var = var->Friend;
+ }
+ return list;
+}
+
+void rc_variable_print(struct rc_variable * var)
+{
+ unsigned int i;
+ while (var) {
+ fprintf(stderr, "%u: TEMP[%u].%u: ",
+ var->Inst->IP, var->Dst.Index, var->Dst.WriteMask);
+ for (i = 0; i < 4; i++) {
+ fprintf(stderr, "chan %u: start=%u end=%u ", i,
+ var->Live[i].Start, var->Live[i].End);
+ }
+ fprintf(stderr, "%u readers\n", var->ReaderCount);
+ if (var->Friend) {
+ fprintf(stderr, "Friend: \n\t");
+ }
+ var = var->Friend;
+ }
+}
--- /dev/null
+/*
+ * Copyright 2011 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_VARIABLE_H
+#define RADEON_VARIABLE_H
+
+#include "radeon_compiler.h"
+
+struct radeon_compiler;
+struct rc_list;
+struct rc_reader_data;
+struct rc_readers;
+
+struct live_intervals {
+ int Start;
+ int End;
+ int Used;
+};
+
+struct rc_variable {
+ struct radeon_compiler * C;
+ struct rc_dst_register Dst;
+
+ struct rc_instruction * Inst;
+ unsigned int ReaderCount;
+ struct rc_reader * Readers;
+ struct live_intervals Live[4];
+
+ /* A friend is a variable that shares a reader with another variable.
+ */
+ struct rc_variable * Friend;
+};
+
+void rc_variable_change_dst(
+ struct rc_variable * var,
+ unsigned int new_index,
+ unsigned int new_writemask);
+
+void rc_variable_compute_live_intervals(struct rc_variable * var);
+
+void rc_variable_add_friend(
+ struct rc_variable * var,
+ struct rc_variable * friend);
+
+struct rc_variable * rc_variable(
+ struct radeon_compiler * c,
+ unsigned int DstFile,
+ unsigned int DstIndex,
+ unsigned int DstWriteMask,
+ struct rc_reader_data * reader_data);
+
+struct rc_list * rc_get_variables(struct radeon_compiler * c);
+
+unsigned int rc_variable_writemask_sum(struct rc_variable * var);
+
+struct rc_list * rc_variable_readers_union(struct rc_variable * var);
+
+void rc_variable_print(struct rc_variable * var);
+
+#endif /* RADEON_VARIABLE_H */