r600g: add initial if/else/endif support
authorDave Airlie <airlied@redhat.com>
Wed, 25 Aug 2010 05:57:41 +0000 (15:57 +1000)
committerDave Airlie <airlied@redhat.com>
Fri, 27 Aug 2010 05:30:07 +0000 (15:30 +1000)
this adds handling for some more CF instructions and conditions
also adds parameter for stack size emission

These seem to pass on VS with the stack size hack but not on FS,

TODO: fix FS + stack size calcs

src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_sq.h

index e6efae4c56d3131e0f571fcbe3ba7c80605d1069..d83bb346484583167d2b79fc1159c128a552b6da 100644 (file)
@@ -128,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
        return 0;
 }
 
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
        struct r600_bc_alu *nalu = r600_bc_alu();
        struct r600_bc_alu *lalu;
@@ -140,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
        nalu->nliteral = 0;
 
        /* cf can contains only alu or only vtx or only tex */
-       if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+       if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
                bc->force_add_cf) {
                /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
                r = r600_bc_add_cf(bc);
@@ -148,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
                        free(nalu);
                        return r;
                }
-               bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+               bc->cf_last->inst = (type << 3);
        }
        if (alu->last && (bc->cf_last->ndw >> 1) >= 124) {
                bc->force_add_cf = 1;
@@ -183,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
        return 0;
 }
 
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+       return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+}
+
 int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 {
        struct r600_bc_alu *alu;
@@ -193,7 +198,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
        if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
                return 0;
        }
-       if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+       if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+               return 0;
+       }
+       if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+            (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
                LIST_IS_EMPTY(&bc->cf_last->alu)) {
                R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
                return -EINVAL;
@@ -262,6 +273,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
        return 0;
 }
 
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+{
+       int r;
+       r = r600_bc_add_cf(bc);
+       if (r)
+               return r;
+
+       bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
+       bc->cf_last->inst = inst;
+       return 0;
+}
+
 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
 {
        bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
@@ -342,7 +365,9 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
                                        S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
                                        S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
                                        S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
-                                       S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+                                       S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
+                                       S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
+                                       S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
        }
        if (alu->last) {
                for (i = 0; i < alu->nliteral; i++) {
@@ -358,6 +383,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 
        switch (cf->inst) {
        case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+       case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
                                        S_SQ_CF_ALU_WORD1_BARRIER(1) |
@@ -385,6 +411,16 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
                break;
+       case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+       case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+       case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+               bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
+               bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+                                       S_SQ_CF_WORD1_BARRIER(1) |
+                                       S_SQ_CF_WORD1_COND(cf->cond) |
+                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+
+               break;
        default:
                R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                return -EINVAL;
@@ -401,13 +437,13 @@ int r600_bc_build(struct r600_bc *bc)
        unsigned addr;
        int r;
 
-
        /* first path compute addr of each CF block */
        /* addr start after all the CF instructions */
        addr = bc->cf_last->id + 2;
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
                switch (cf->inst) {
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+               case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                        break;
                case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
                case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -419,6 +455,12 @@ int r600_bc_build(struct r600_bc *bc)
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
                        break;
+               case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+               case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+                       /* hack */
+                       bc->nstack = 3;
+                       break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                        return -EINVAL;
@@ -438,6 +480,7 @@ int r600_bc_build(struct r600_bc *bc)
                        return r;
                switch (cf->inst) {
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+               case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                        LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
                                switch(bc->chiprev) {
                                case 0:
@@ -477,6 +520,9 @@ int r600_bc_build(struct r600_bc *bc)
                        break;
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+               case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_POP:
                        break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
index e944bd02de39d9a5cced84a91d00fad9e2f08f48..dbd885caf91747bfaf2f1a4937cac93bcb544de5 100644 (file)
@@ -47,6 +47,7 @@ struct r600_bc_alu {
        unsigned                        inst;
        unsigned                        last;
        unsigned                        is_op3;
+       unsigned                        predicate;
        unsigned                        nliteral;
        unsigned                        literal_added;
        u32                             value[4];
@@ -114,12 +115,25 @@ struct r600_bc_cf {
        unsigned                        addr;
        unsigned                        ndw;
        unsigned                        id;
+       unsigned                        cond;
+       unsigned                        pop_count;
+       unsigned                        cf_addr; /* control flow addr */
        struct list_head                alu;
        struct list_head                tex;
        struct list_head                vtx;
        struct r600_bc_output           output;
 };
 
+#define FC_NONE 0
+#define FC_IF 1
+#define FC_LOOP 2
+
+struct r600_cf_stack_entry {
+       int type;
+       struct r600_bc_cf *start;
+       struct r600_bc_cf *mid; /* used to store the else point */
+};
+       
 struct r600_bc {
        enum radeon_family              family;
        int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
@@ -128,9 +142,13 @@ struct r600_bc {
        unsigned                        ndw;
        unsigned                        ncf;
        unsigned                        ngpr;
+       unsigned                        nstack;
        unsigned                        nresource;
        unsigned                        force_add_cf;
        u32                             *bytecode;
+
+       u32 fc_sp;
+       struct r600_cf_stack_entry fc_stack[32];
 };
 
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -140,5 +158,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
 int r600_bc_build(struct r600_bc *bc);
-
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
 #endif
index 1470bb5072a98c2d1f5fc11741abedcfbf99d561..052b4971f316f74c9a36cc477b45013da935160c 100644 (file)
@@ -144,7 +144,8 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
                state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
        }
        state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
-       state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+       state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+               S_028868_STACK_SIZE(rshader->bc.nstack);
        rpshader->rstate = state;
        rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
        rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -200,7 +201,8 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
                                                        S_0286CC_PERSP_GRADIENT_ENA(1);
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
-       state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+       state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+               S_028868_STACK_SIZE(rshader->bc.nstack);
        state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
        rpshader->rstate = state;
        rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -276,10 +278,12 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
                R600_ERR("predicate unsupported\n");
                return -EINVAL;
        }
+#if 0
        if (i->Instruction.Label) {
                R600_ERR("label unsupported\n");
                return -EINVAL;
        }
+#endif
        for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
                if (i->Src[j].Register.Indirect ||
                        i->Src[j].Register.Dimension ||
@@ -1721,6 +1725,90 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
        return tgsi_helper_copy(ctx, inst);
 }
 
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bc_alu alu, *lalu;
+       struct r600_bc_cf *last;
+       int r;
+
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = opcode;
+       alu.predicate = 1;
+
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.write = 1;
+       alu.dst.chan = 0;
+
+       r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+       if (r)
+               return r;
+       alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+       alu.src[1].sel = V_SQ_ALU_SRC_0;
+       alu.src[1].chan = 0;
+       
+       alu.last = 1;
+
+       r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
+       if (r)
+               return r;
+
+       return 0;
+}
+
+static int pops(struct r600_shader_ctx *ctx, int pops)
+{
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);      
+       ctx->bc->cf_last->pop_count = pops;
+       return 0;
+}
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+
+       emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+       ctx->bc->fc_sp++;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL;
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+       ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+       return 0;
+}
+
+static int tgsi_else(struct r600_shader_ctx *ctx)
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
+       ctx->bc->cf_last->pop_count = 1;
+
+       /* fixup mid */
+       ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+       return 0;
+}
+
+static int tgsi_endif(struct r600_shader_ctx *ctx)
+{
+       pops(ctx, 1);
+       if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+               R600_ERR("if/endif unbalanced in shader\n");
+               return -1;
+       }
+
+       if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
+               ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+               ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+       } else {
+               ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2;
+       }
+       ctx->bc->fc_sp--;
+
+       return 0;
+}
+
 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_ARL,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_MOV,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
@@ -1799,12 +1887,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_DP2,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
        {TGSI_OPCODE_TXL,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_BRK,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_IF,        0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_IF,        0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
        /* gap */
        {75,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {76,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_ELSE,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_ENDIF,     0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ELSE,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+       {TGSI_OPCODE_ENDIF,     0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
        /* gap */
        {79,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {80,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
index ad4de0b0726e5f9e03047d5d2aff41f17336f361..b4ed435e91f77d8dfb782ef0edae17d9561e7168 100644 (file)
 #define   G_SQ_TEX_WORD2_SRC_SEL_W(x)                                (((x) >> 29) & 0x7)
 #define   C_SQ_TEX_WORD2_SRC_SEL_W                                   0x1FFFFFFF
 
+#define V_SQ_CF_COND_ACTIVE                             0x00
+#define V_SQ_CF_COND_FALSE                              0x01
+#define V_SQ_CF_COND_BOOL                               0x02
+#define V_SQ_CF_COND_NOT_BOOL                           0x03
+
 #endif