X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr600%2Fr600_asm.c;h=8c01987318a0b77bac82f2d08c9b81bc562c2f2b;hb=ed4f740127d6896506a91c8c5b9c527a971e982b;hp=03fe950186783779de9f8c7f8c394f736e000e4f;hpb=09547e1bcee7df3444dd8682770d1b31da1a5822;p=mesa.git diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 03fe9501867..8c01987318a 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -20,13 +20,65 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "r600_asm.h" +#include "radeon.h" #include "r600_context.h" #include "util/u_memory.h" #include "r600_sq.h" +#include "r600_opcodes.h" +#include "r600_asm.h" #include #include +static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu) +{ + if(alu->is_op3) + return 3; + + switch (alu->inst) { + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP: + return 0; + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE: + return 2; + + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS: + return 1; + default: R600_ERR( + "Need instruction operand number for 0x%x.\n", alu->inst); + }; + + return 3; +} + int r700_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id); static struct r600_bc_cf *r600_bc_cf(void) @@ -49,6 +101,7 @@ static struct r600_bc_alu *r600_bc_alu(void) if (alu == NULL) return NULL; LIST_INITHEAD(&alu->list); + LIST_INITHEAD(&alu->bs_list); return alu; } @@ -93,6 +146,13 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family) case CHIP_RV740: bc->chiprev = 1; break; + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + bc->chiprev = 2; + break; default: R600_ERR("unknown family %d\n", bc->family); return -EINVAL; @@ -128,6 +188,224 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } +const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210, //000 + SQ_ALU_VEC_120, //001 + SQ_ALU_VEC_102, //010 + + SQ_ALU_VEC_201, //011 + SQ_ALU_VEC_012, //100 + SQ_ALU_VEC_021, //101 + + SQ_ALU_VEC_012, //110 + SQ_ALU_VEC_012}; //111 + +const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210, //000 + SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //010 + + SQ_ALU_SCL_221, //011 + SQ_ALU_SCL_212, //100 + SQ_ALU_SCL_122, //101 + + SQ_ALU_SCL_122, //110 + SQ_ALU_SCL_122}; //111 + +static int init_gpr(struct r600_bc_alu *alu) +{ + int cycle, component; + /* set up gpr use */ + for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++) + for (component = 0; component < NUM_OF_COMPONENTS; component++) + alu->hw_gpr[cycle][component] = -1; + return 0; +} + +#if 0 +static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle) +{ + if (alu->hw_gpr[cycle][chan] < 0) + alu->hw_gpr[cycle][chan] = sel; + else if (alu->hw_gpr[cycle][chan] != (int)sel) { + R600_ERR("Another scalar operation has already used GPR read port for channel\n"); + return -1; + } + return 0; +} + +static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +{ + int table[3]; + int ret = 0; + switch (swiz) { + case SQ_ALU_SCL_210: + table[0] = 2; table[1] = 1; table[2] = 0; + *p_cycle = table[sel]; + break; + case SQ_ALU_SCL_122: + table[0] = 1; table[1] = 2; table[2] = 2; + *p_cycle = table[sel]; + break; + case SQ_ALU_SCL_212: + table[0] = 2; table[1] = 1; table[2] = 2; + *p_cycle = table[sel]; + break; + case SQ_ALU_SCL_221: + table[0] = 2; table[1] = 2; table[2] = 1; + *p_cycle = table[sel]; + break; + break; + default: + R600_ERR("bad scalar bank swizzle value\n"); + ret = -1; + break; + } + return ret; +} + +static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) +{ + int table[3]; + int ret; + + switch (swiz) { + case SQ_ALU_VEC_012: + table[0] = 0; table[1] = 1; table[2] = 2; + *p_cycle = table[sel]; + break; + case SQ_ALU_VEC_021: + table[0] = 0; table[1] = 2; table[2] = 1; + *p_cycle = table[sel]; + break; + case SQ_ALU_VEC_120: + table[0] = 1; table[1] = 2; table[2] = 0; + *p_cycle = table[sel]; + break; + case SQ_ALU_VEC_102: + table[0] = 1; table[1] = 0; table[2] = 2; + *p_cycle = table[sel]; + break; + case SQ_ALU_VEC_201: + table[0] = 2; table[1] = 0; table[2] = 1; + *p_cycle = table[sel]; + break; + case SQ_ALU_VEC_210: + table[0] = 2; table[1] = 1; table[2] = 0; + *p_cycle = table[sel]; + break; + default: + R600_ERR("bad vector bank swizzle value\n"); + ret = -1; + break; + } + return ret; +} + + + +static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) +{ + int num_src; + int i; + int channel_swizzle; + + num_src = r600_bc_get_num_operands(alu); + + for (i = 0; i < num_src; i++) { + channel_swizzle = alu->src[i].chan; + if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) + chan_counter[channel_swizzle]++; + } +} + +/* we need something like this I think - but this is bogus */ +int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) +{ + struct r600_bc_alu *alu; + int chan_counter[4] = { 0 }; + + update_chan_counter(alu_first, chan_counter); + + LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { + update_chan_counter(alu, chan_counter); + } + + if (chan_counter[0] > 3 || + chan_counter[1] > 3 || + chan_counter[2] > 3 || + chan_counter[3] > 3) { + R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", + alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); + return -1; + } + return 0; +} +#endif + +static int is_const(int sel) +{ + if (sel > 255 && sel < 512) + return 1; + if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL) + return 1; + return 0; +} + +static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + unsigned swizzle_key; + + if (alu->bank_swizzle_force) { + alu->bank_swizzle = alu->bank_swizzle_force; + return 0; + } + swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + + (is_const(alu->src[1].sel) ? 2 : 0 ) + + (is_const(alu->src[2].sel) ? 1 : 0 ); + + alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; + return 0; +} + +static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) +{ + unsigned swizzle_key; + + if (alu->bank_swizzle_force) { + alu->bank_swizzle = alu->bank_swizzle_force; + return 0; + } + swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + + (is_const(alu->src[1].sel) ? 2 : 0 ) + + (is_const(alu->src[2].sel) ? 1 : 0 ); + + alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; + return 0; +} + +static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first) +{ + struct r600_bc_alu *alu = NULL; + int num_instr = 1; + + init_gpr(alu_first); + + LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { + num_instr++; + } + + if (num_instr == 1) { + check_scalar(bc, alu_first); + + } else { +/* check_read_slots(bc, bc->cf_last->curr_bs_head);*/ + check_vector(bc, alu_first); + LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { + check_vector(bc, alu); + } + } + return 0; +} + int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); @@ -150,6 +428,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } bc->cf_last->inst = (type << 3); } + if (!bc->cf_last->curr_bs_head) { + bc->cf_last->curr_bs_head = nalu; + LIST_INITHEAD(&nalu->bs_list); + } else { + LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); + } if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { bc->force_add_cf = 1; } @@ -180,12 +464,21 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; + + if (bc->use_mem_constant) + bc->cf_last->kcache0_mode = 2; + + /* process cur ALU instructions for bank swizzle */ + if (alu->last) { + check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); + bc->cf_last->curr_bs_head = NULL; + } return 0; } int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) { - return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU); + return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU)); } int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) @@ -198,6 +491,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { return 0; } + /* all same on EG */ if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL || @@ -207,6 +501,7 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { return 0; } + /* same on EG */ if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || LIST_IS_EMPTY(&bc->cf_last->alu)) { @@ -289,6 +584,7 @@ int r600_bc_add_cfinst(struct r600_bc *bc, int inst) return 0; } +/* common to all 3 families */ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | @@ -306,6 +602,7 @@ static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsign return 0; } +/* common to all 3 families */ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsigned id) { bc->bytecode[id++] = S_SQ_TEX_WORD0_TEX_INST(tex->inst) | @@ -335,45 +632,50 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign return 0; } +/* r600 only, r700/eg bits in r700_asm.c */ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id) { unsigned i; /* don't replace gpr by pv or ps for destination register */ + bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | + S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) | + S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | + S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | + S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | + S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) | + S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | + S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | + S_SQ_ALU_WORD0_LAST(alu->last); + if (alu->is_op3) { - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) | + S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) | S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) | S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) | S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle); } else { - bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) | - S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) | - S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) | - S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) | - S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) | - S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) | - S_SQ_ALU_WORD0_LAST(alu->last); bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) | S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) | + S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) | S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) | S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) | S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0) | + S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) | S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } if (alu->last) { + if (alu->nliteral && !alu->literal_added) { + R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst); + } for (i = 0; i < alu->nliteral; i++) { bc->bytecode[id++] = alu->value[i]; } @@ -381,6 +683,7 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign return 0; } +/* common for r600/r700 - eg in eg_asm.c */ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { unsigned id = cf->id; @@ -388,9 +691,12 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): - bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == 0 ? cf->r6xx_uses_waterfall : 0) | S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: @@ -465,6 +771,8 @@ int r600_bc_build(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: break; case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: @@ -488,7 +796,10 @@ int r600_bc_build(struct r600_bc *bc) return -ENOMEM; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { addr = cf->addr; - r = r600_bc_cf_build(bc, cf); + if (bc->chiprev == 2) + r = eg_bc_cf_build(bc, cf); + else + r = r600_bc_cf_build(bc, cf); if (r) return r; switch (cf->inst) { @@ -500,6 +811,7 @@ int r600_bc_build(struct r600_bc *bc) r = r600_bc_alu_build(bc, alu, addr); break; case 1: + case 2: /* eg alu is same encoding as r700 */ r = r700_bc_alu_build(bc, alu, addr); break; default: @@ -533,6 +845,8 @@ int r600_bc_build(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: + case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: