From: Christian König Date: Sat, 18 Dec 2010 16:56:36 +0000 (+0100) Subject: r600g: rework bank swizzle code X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a25b91c2c2118741389f418a66de7ca145b8600b;p=mesa.git r600g: rework bank swizzle code --- diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index b0567dd55de..4ceb1d71a0a 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -32,6 +32,9 @@ #include "r600_formats.h" #include "r600d.h" +#define NUM_OF_CYCLES 3 +#define NUM_OF_COMPONENTS 4 + static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) { if(alu->is_op3) @@ -340,228 +343,220 @@ static int assign_alu_units(struct r600_bc_alu *alu_first, struct r600_bc_alu *a return 0; } -const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 - SQ_ALU_VEC_120, //001 - SQ_ALU_VEC_102, //010 - - SQ_ALU_VEC_201, //011 - SQ_ALU_VEC_012, //100 - SQ_ALU_VEC_021, //101 - - SQ_ALU_VEC_012, //110 - SQ_ALU_VEC_012}; //111 - -const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 - SQ_ALU_SCL_122, //001 - SQ_ALU_SCL_122, //010 - - SQ_ALU_SCL_221, //011 - SQ_ALU_SCL_212, //100 - SQ_ALU_SCL_122, //101 - - SQ_ALU_SCL_122, //110 - SQ_ALU_SCL_122}; //111 - -static int init_gpr(struct r600_bc_alu *alu) +struct alu_bank_swizzle { + int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; + int hw_cfile_addr[4]; + int hw_cfile_elem[4]; +}; + +const unsigned cycle_for_bank_swizzle_vec[][3] = { + [SQ_ALU_VEC_012] = { 0, 1, 2 }, + [SQ_ALU_VEC_021] = { 0, 2, 1 }, + [SQ_ALU_VEC_120] = { 1, 2, 0 }, + [SQ_ALU_VEC_102] = { 1, 0, 2 }, + [SQ_ALU_VEC_201] = { 2, 0, 1 }, + [SQ_ALU_VEC_210] = { 2, 1, 0 } +}; + +const unsigned cycle_for_bank_swizzle_scl[][3] = { + [SQ_ALU_SCL_210] = { 2, 1, 0 }, + [SQ_ALU_SCL_122] = { 1, 2, 2 }, + [SQ_ALU_SCL_212] = { 2, 1, 2 }, + [SQ_ALU_SCL_221] = { 2, 2, 1 } +}; + +static void init_bank_swizzle(struct alu_bank_swizzle *bs) { - int cycle, component; + int i, cycle, component; /* set up gpr use */ for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) for (component = 0; component < NUM_OF_COMPONENTS; component++) - alu->hw_gpr[cycle][component] = -1; - return 0; + bs->hw_gpr[cycle][component] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_addr[i] = -1; + for (i = 0; i < 4; i++) + bs->hw_cfile_elem[i] = -1; } - -#if 0 -static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle) + +static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle) { - if (alu->hw_gpr[cycle][chan] < 0) - alu->hw_gpr[cycle][chan] = sel; - else if (alu->hw_gpr[cycle][chan] != (int)sel) { - R600_ERR("Another scalar operation has already used GPR read port for channel\n"); + if (bs->hw_gpr[cycle][chan] == -1) + bs->hw_gpr[cycle][chan] = sel; + else if (bs->hw_gpr[cycle][chan] != (int)sel) { + // Another scalar operation has already used GPR read port for channel return -1; } return 0; } - -static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) + +static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) { - int table[3]; - int ret = 0; - switch (swiz) { - case SQ_ALU_SCL_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_122: - table[0] = 1; table[1] = 2; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_212: - table[0] = 2; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_221: - table[0] = 2; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - break; - default: - R600_ERR("bad scalar bank swizzle value\n"); - ret = -1; - break; + int res, resmatch = -1, resempty = -1; + for (res = 3; res >= 0; --res) { + if (bs->hw_cfile_addr[res] == -1) + resempty = res; + else if (bs->hw_cfile_addr[res] == sel && + bs->hw_cfile_elem[res] == chan) + resmatch = res; } - return ret; -} - -static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) -{ - int table[3]; - int ret; - - switch (swiz) { - case SQ_ALU_VEC_012: - table[0] = 0; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_021: - table[0] = 0; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_120: - table[0] = 1; table[1] = 2; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_102: - table[0] = 1; table[1] = 0; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_201: - table[0] = 2; table[1] = 0; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - default: - R600_ERR("bad vector bank swizzle value\n"); - ret = -1; - break; - } - return ret; -} - - - -static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) -{ - int num_src; - int i; - int channel_swizzle; - - num_src = r600_bc_get_num_operands(alu); - - for (i = 0; i < num_src; i++) { - channel_swizzle = alu->src[i].chan; - if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) - chan_counter[channel_swizzle]++; + if (resmatch != -1) + return 0; // Read for this scalar element already reserved, nothing to do here. + else if (resempty != -1) { + bs->hw_cfile_addr[resempty] = sel; + bs->hw_cfile_elem[resempty] = chan; + } else { + // All cfile read ports are used, cannot reference vector element + return -1; } + return 0; } - -/* we need something like this I think - but this is bogus */ -int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) + +static int is_gpr(unsigned sel) { - struct r600_bc_alu *alu; - int chan_counter[4] = { 0 }; - - update_chan_counter(alu_first, chan_counter); - - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - update_chan_counter(alu, chan_counter); - } - - if (chan_counter[0] > 3 || - chan_counter[1] > 3 || - chan_counter[2] > 3 || - chan_counter[3] > 3) { - R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", - alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); - return -1; - } - return 0; + return (sel >= 0 && sel <= 127); } -#endif /* CB constants start at 512, and get translated to a kcache index when ALU * clauses are constructed. Note that we handle kcache constants the same way * as (the now gone) cfile constants, is that really required? */ +static int is_cfile(unsigned sel) +{ + return (sel > 255 && sel < 512) || + (sel > 511 && sel < 4607) || // Kcache before translate + (sel > 127 && sel < 192); // Kcache after translate +} + static int is_const(int sel) { - if (sel > 511 && sel < 4607) - return 1; - return 0; + return is_cfile(sel) || + (sel >= V_SQ_ALU_SRC_0 && + sel <= V_SQ_ALU_SRC_LITERAL); } - -static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) + +static int check_vector(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { - unsigned swizzle_key; - - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; - return 0; + int r, src, num_src, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(alu); + for (src = 0; src < num_src; src++) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; + if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) + // Nothing to do; special-case optimization, + // second source uses first source’s reservation + continue; + else { + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + } else if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } + // No restrictions on PV, PS, literal or special constants } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - - alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; return 0; } - -static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) + +static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { - unsigned swizzle_key; - - if (alu->bank_swizzle_force) { - alu->bank_swizzle = alu->bank_swizzle_force; - return 0; + int r, src, num_src, const_count, sel, elem, cycle; + + num_src = r600_bc_get_num_operands(alu); + for (const_count = 0, src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_const(sel)) { // Any constant, including literal and inline constants + if (const_count >= 2) + // More than two references to a constant in + // transcendental operation. + return -1; + else + const_count++; + } + if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } + } + for (src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + // Cycle for GPR load conflicts with + // constant load in transcendental operation. + return -1; + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + // Constants already processed + // No restrictions on PV, PS } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); - - alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; return 0; } - + static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first) { struct r600_bc_alu *assignment[5]; + struct alu_bank_swizzle bs; + int bank_swizzle[5]; int i, r; - - r = init_gpr(alu_first); - if (r) - return r; - + r = assign_alu_units(alu_first, assignment); if (r) return r; + + if(alu_first->bank_swizzle_force) { + for (i = 0; i < 5; i++) + if (assignment[i]) + assignment[i]->bank_swizzle = assignment[i]->bank_swizzle_force; + return 0; + } + // just check every possible combination of bank swizzle + // not very efficent, but works on the first try in most of the cases for (i = 0; i < 4; i++) - if (assignment[i]) { - r = check_vector(bc, assignment[i]); - if (r) - return r; + bank_swizzle[i] = SQ_ALU_VEC_012; + bank_swizzle[4] = SQ_ALU_SCL_210; + while(bank_swizzle[4] <= SQ_ALU_SCL_221) { + init_bank_swizzle(&bs); + for (i = 0; i < 4; i++) { + if (assignment[i]) { + r = check_vector(assignment[i], &bs, bank_swizzle[i]); + if (r) + break; + } + } + if (!r && assignment[4]) { + r = check_scalar(assignment[4], &bs, bank_swizzle[4]); + } + if (!r) { + for (i = 0; i < 5; i++) { + if (assignment[i]) + assignment[i]->bank_swizzle = bank_swizzle[i]; + } + return 0; } - if (assignment[4]) { - r = check_scalar(bc, assignment[4]); - if (r) - return r; + for (i = 0; i < 5; i++) { + bank_swizzle[i]++; + if (bank_swizzle[i] <= SQ_ALU_VEC_210) + break; + else + bank_swizzle[i] = SQ_ALU_VEC_012; + } } - - return 0; + + // couldn't find a working swizzle + return -1; } /* This code handles kcache lines as single blocks of 32 constants. We could diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 4763ce03ec4..a9123299f1e 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -25,9 +25,6 @@ #include "util/u_double_list.h" -#define NUM_OF_CYCLES 3 -#define NUM_OF_COMPONENTS 4 - struct r600_vertex_element; struct r600_pipe_context; @@ -61,7 +58,6 @@ struct r600_bc_alu { unsigned bank_swizzle; unsigned bank_swizzle_force; u32 value[4]; - int hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS]; unsigned omod; };