From a03d456f5a41926e39194de70b2d50776e64b8a2 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 25 Aug 2010 15:57:41 +1000 Subject: [PATCH] r600g: add initial if/else/endif support this adds handling for some more CF instructions and conditions also adds parameter for stack size emission These seem to pass on VS with the stack size hack but not on FS, TODO: fix FS + stack size calcs --- src/gallium/drivers/r600/r600_asm.c | 58 +++++++++++++-- src/gallium/drivers/r600/r600_asm.h | 21 +++++- src/gallium/drivers/r600/r600_shader.c | 98 ++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_sq.h | 5 ++ 4 files changed, 170 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index e6efae4c56d..d83bb346484 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -128,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output) return 0; } -int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); struct r600_bc_alu *lalu; @@ -140,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) nalu->nliteral = 0; /* cf can contains only alu or only vtx or only tex */ - if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) || bc->force_add_cf) { /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */ r = r600_bc_add_cf(bc); @@ -148,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) free(nalu); return r; } - bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3; + bc->cf_last->inst = (type << 3); } if (alu->last && (bc->cf_last->ndw >> 1) >= 124) { bc->force_add_cf = 1; @@ -183,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) return 0; } +int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu) +{ + return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU); +} + int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) { struct r600_bc_alu *alu; @@ -193,7 +198,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value) if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) { return 0; } - if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) || + if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE || + bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { + return 0; + } + if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) && + (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) || LIST_IS_EMPTY(&bc->cf_last->alu)) { R600_ERR("last CF is not ALU (%p)\n", bc->cf_last); return -EINVAL; @@ -262,6 +273,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex) return 0; } +int r600_bc_add_cfinst(struct r600_bc *bc, int inst) +{ + int r; + r = r600_bc_add_cf(bc); + if (r) + return r; + + bc->cf_last->cond = V_SQ_CF_COND_ACTIVE; + bc->cf_last->inst = inst; + return 0; +} + static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id) { bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | @@ -342,7 +365,9 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) | S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) | S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) | - S_SQ_ALU_WORD1_BANK_SWIZZLE(0); + S_SQ_ALU_WORD1_BANK_SWIZZLE(0) | + S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) | + S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate); } if (alu->last) { for (i = 0; i < alu->nliteral; i++) { @@ -358,6 +383,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | S_SQ_CF_ALU_WORD1_BARRIER(1) | @@ -385,6 +411,16 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) | S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program); break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(1) | + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); + + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -401,13 +437,13 @@ int r600_bc_build(struct r600_bc *bc) unsigned addr; int r; - /* first path compute addr of each CF block */ /* addr start after all the CF instructions */ addr = bc->cf_last->id + 2; LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): break; case V_SQ_CF_WORD1_SQ_CF_INST_TEX: case V_SQ_CF_WORD1_SQ_CF_INST_VTX: @@ -419,6 +455,12 @@ int r600_bc_build(struct r600_bc *bc) case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: break; + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + /* hack */ + bc->nstack = 3; + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -438,6 +480,7 @@ int r600_bc_build(struct r600_bc *bc) return r; switch (cf->inst) { case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): + case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { switch(bc->chiprev) { case 0: @@ -477,6 +520,9 @@ int r600_bc_build(struct r600_bc *bc) break; case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE: + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_POP: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index e944bd02de3..dbd885caf91 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -47,6 +47,7 @@ struct r600_bc_alu { unsigned inst; unsigned last; unsigned is_op3; + unsigned predicate; unsigned nliteral; unsigned literal_added; u32 value[4]; @@ -114,12 +115,25 @@ struct r600_bc_cf { unsigned addr; unsigned ndw; unsigned id; + unsigned cond; + unsigned pop_count; + unsigned cf_addr; /* control flow addr */ struct list_head alu; struct list_head tex; struct list_head vtx; struct r600_bc_output output; }; +#define FC_NONE 0 +#define FC_IF 1 +#define FC_LOOP 2 + +struct r600_cf_stack_entry { + int type; + struct r600_bc_cf *start; + struct r600_bc_cf *mid; /* used to store the else point */ +}; + struct r600_bc { enum radeon_family family; int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */ @@ -128,9 +142,13 @@ struct r600_bc { unsigned ndw; unsigned ncf; unsigned ngpr; + unsigned nstack; unsigned nresource; unsigned force_add_cf; u32 *bytecode; + + u32 fc_sp; + struct r600_cf_stack_entry fc_stack[32]; }; int r600_bc_init(struct r600_bc *bc, enum radeon_family family); @@ -140,5 +158,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx); int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex); int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output); int r600_bc_build(struct r600_bc *bc); - +int r600_bc_add_cfinst(struct r600_bc *bc, int inst); +int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type); #endif diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 1470bb5072a..052b4971f31 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -144,7 +144,8 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp; } state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2); - state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr); + state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack); rpshader->rstate = state; rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -200,7 +201,8 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) | S_0286CC_PERSP_GRADIENT_ENA(1); state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000; - state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr); + state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) | + S_028868_STACK_SIZE(rshader->bc.nstack); state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps; rpshader->rstate = state; rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo); @@ -276,10 +278,12 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) R600_ERR("predicate unsupported\n"); return -EINVAL; } +#if 0 if (i->Instruction.Label) { R600_ERR("label unsupported\n"); return -EINVAL; } +#endif for (j = 0; j < i->Instruction.NumSrcRegs; j++) { if (i->Src[j].Register.Indirect || i->Src[j].Register.Dimension || @@ -1721,6 +1725,90 @@ static int tgsi_exp(struct r600_shader_ctx *ctx) return tgsi_helper_copy(ctx, inst); } +static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + struct r600_bc_alu alu, *lalu; + struct r600_bc_cf *last; + int r; + + memset(&alu, 0, sizeof(struct r600_bc_alu)); + alu.inst = opcode; + alu.predicate = 1; + + alu.dst.sel = ctx->temp_reg; + alu.dst.write = 1; + alu.dst.chan = 0; + + r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]); + if (r) + return r; + alu.src[0].chan = tgsi_chan(&inst->Src[0], 0); + alu.src[1].sel = V_SQ_ALU_SRC_0; + alu.src[1].chan = 0; + + alu.last = 1; + + r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE); + if (r) + return r; + + return 0; +} + +static int pops(struct r600_shader_ctx *ctx, int pops) +{ + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP); + ctx->bc->cf_last->pop_count = pops; + return 0; +} + +static int tgsi_if(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + + emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE); + + ctx->bc->fc_sp++; + ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF; + ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL; + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP); + + ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; + return 0; +} + +static int tgsi_else(struct r600_shader_ctx *ctx) +{ + struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; + r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE); + ctx->bc->cf_last->pop_count = 1; + + /* fixup mid */ + ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last; + ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; + return 0; +} + +static int tgsi_endif(struct r600_shader_ctx *ctx) +{ + pops(ctx, 1); + if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { + R600_ERR("if/endif unbalanced in shader\n"); + return -1; + } + + if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { + ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; + ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; + } else { + ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2; + } + ctx->bc->fc_sp--; + + return 0; +} + static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_MOV, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2}, @@ -1799,12 +1887,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_DP2, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp}, {TGSI_OPCODE_TXL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_BRK, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_IF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if}, /* gap */ {75, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {76, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_ELSE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else}, + {TGSI_OPCODE_ENDIF, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif}, /* gap */ {79, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index ad4de0b0726..b4ed435e91f 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -603,4 +603,9 @@ #define G_SQ_TEX_WORD2_SRC_SEL_W(x) (((x) >> 29) & 0x7) #define C_SQ_TEX_WORD2_SRC_SEL_W 0x1FFFFFFF +#define V_SQ_CF_COND_ACTIVE 0x00 +#define V_SQ_CF_COND_FALSE 0x01 +#define V_SQ_CF_COND_BOOL 0x02 +#define V_SQ_CF_COND_NOT_BOOL 0x03 + #endif -- 2.30.2