r600g : add basic loop support.
authorDave Airlie <airlied@redhat.com>
Fri, 27 Aug 2010 06:08:55 +0000 (16:08 +1000)
committerDave Airlie <airlied@redhat.com>
Mon, 30 Aug 2010 03:38:33 +0000 (13:38 +1000)
Adds BGNLOOP, BRK, CONT, ENDLOOP support, ported from r600c.

17 piglits more on r300g.tests.

src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_shader.c

index d83bb346484583167d2b79fc1159c128a552b6da..03fe950186783779de9f8c7f8c394f736e000e4f 100644 (file)
@@ -200,6 +200,10 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
        }
        if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
            bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
            bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
                return 0;
        }
@@ -414,6 +418,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
        case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
        case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
        case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
                bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
                bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
                                        S_SQ_CF_WORD1_BARRIER(1) |
@@ -437,6 +445,9 @@ int r600_bc_build(struct r600_bc *bc)
        unsigned addr;
        int r;
 
+       if (bc->callstack[0].max > 0)
+           bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+
        /* first path compute addr of each CF block */
        /* addr start after all the CF instructions */
        addr = bc->cf_last->id + 2;
@@ -458,8 +469,10 @@ int r600_bc_build(struct r600_bc *bc)
                case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
                case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-                       /* hack */
-                       bc->nstack = 3;
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
                        break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -520,6 +533,10 @@ int r600_bc_build(struct r600_bc *bc)
                        break;
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
                case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
                case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                case V_SQ_CF_WORD1_SQ_CF_INST_POP:
index dbd885caf91747bfaf2f1a4937cac93bcb544de5..bb4f4b77b3343a15086ec24ffc781ed1ee386709 100644 (file)
@@ -127,11 +127,23 @@ struct r600_bc_cf {
 #define FC_NONE 0
 #define FC_IF 1
 #define FC_LOOP 2
+#define FC_REP 3
+#define FC_PUSH_VPM 4
+#define FC_PUSH_WQM 5
 
 struct r600_cf_stack_entry {
        int type;
        struct r600_bc_cf *start;
-       struct r600_bc_cf *mid; /* used to store the else point */
+       struct r600_bc_cf **mid; /* used to store the else point */
+       int num_mid;
+};
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+struct r600_cf_callstack {
+       unsigned fc_sp_before_entry;
+       int sub_desc_index;
+       int current;
+       int max;
 };
        
 struct r600_bc {
@@ -149,6 +161,9 @@ struct r600_bc {
 
        u32 fc_sp;
        struct r600_cf_stack_entry fc_stack[32];
+
+       unsigned call_sp;
+       struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
 };
 
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
index dabc7bec3058e568f1a06a70404f96d55e6ba6e7..82f4d73e43fe44e46afe979be5cbf42d5c3123ce 100644 (file)
@@ -1650,8 +1650,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bc_alu_src r600_src[3];
        struct r600_bc_alu alu;
-       uint32_t use_temp = 0;
-       int i, r;
+       int r;
 
        /* result.x = 2^floor(src); */
        if (inst->Dst[0].Register.WriteMask & 1) {
@@ -1753,8 +1752,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-       struct r600_bc_alu alu, *lalu;
-       struct r600_bc_cf *last;
+       struct r600_bc_alu alu;
        int r;
 
        memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1777,7 +1775,6 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
        r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
        if (r)
                return r;
-
        return 0;
 }
 
@@ -1788,29 +1785,158 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
        return 0;
 }
 
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
 {
-       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       switch(reason) {
+       case FC_PUSH_VPM:
+               ctx->bc->callstack[ctx->bc->call_sp].current--;
+               break;
+       case FC_PUSH_WQM:
+       case FC_LOOP:
+               ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+               break;
+       case FC_REP:
+               /* TOODO : for 16 vp asic should -= 2; */
+               ctx->bc->callstack[ctx->bc->call_sp].current --;
+               break;
+       }
+}
 
-       emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+{
+       if (check_max_only) {
+               int diff;
+               switch (reason) {
+               case FC_PUSH_VPM:
+                       diff = 1;
+                       break;
+               case FC_PUSH_WQM:
+                       diff = 4;
+                       break;
+               }
+               if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
+                   ctx->bc->callstack[ctx->bc->call_sp].max) {
+                       ctx->bc->callstack[ctx->bc->call_sp].max =
+                               ctx->bc->callstack[ctx->bc->call_sp].current + diff;
+               }
+               return;
+       }                                       
+       switch (reason) {
+       case FC_PUSH_VPM:
+               ctx->bc->callstack[ctx->bc->call_sp].current++;
+               break;
+       case FC_PUSH_WQM:
+       case FC_LOOP:
+               ctx->bc->callstack[ctx->bc->call_sp].current += 4;
+               break;
+       case FC_REP:
+               ctx->bc->callstack[ctx->bc->call_sp].current++;
+               break;
+       }
+
+       if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
+           ctx->bc->callstack[ctx->bc->call_sp].max) {
+               ctx->bc->callstack[ctx->bc->call_sp].max =
+                       ctx->bc->callstack[ctx->bc->call_sp].current;
+       }
+}
+
+static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
+{
+       struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
+
+       sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
+                                               sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+       sp->mid[sp->num_mid] = ctx->bc->cf_last;
+       sp->num_mid++;
+}
 
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
+{
        ctx->bc->fc_sp++;
-       ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF;
-       ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
+       ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+}
+
+static void fc_poplevel(struct r600_shader_ctx *ctx)
+{
+       struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+       if (sp->mid) {
+               free(sp->mid);
+               sp->mid = NULL;
+       }
+       sp->num_mid = 0;
+       sp->start = NULL;
+       sp->type = 0;
+       ctx->bc->fc_sp--;
+}
+
+#if 0
+static int emit_return(struct r600_shader_ctx *ctx)
+{
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+       return 0;
+}
+
+static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
+{
+
        r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+       ctx->bc->cf_last->pop_count = pops;
+       /* TODO work out offset */
+       return 0;
+}
 
-       ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
+{
+       return 0;
+}
+
+static void emit_testflag(struct r600_shader_ctx *ctx)
+{
+       
+}
+
+static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
+{
+       emit_testflag(ctx);
+       emit_jump_to_offset(ctx, 1, 4);
+       emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
+       pops(ctx, ifidx + 1);
+       emit_return(ctx);
+}
+
+static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
+{
+       emit_testflag(ctx);
+
+       r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+       ctx->bc->cf_last->pop_count = 1;
+
+       fc_set_mid(ctx, fc_sp);
+
+       pops(ctx, 1);
+}
+#endif
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+       emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+       fc_pushlevel(ctx, FC_IF);
+
+       callstack_check_depth(ctx, FC_PUSH_VPM, 0);
        return 0;
 }
 
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
-       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
        ctx->bc->cf_last->pop_count = 1;
 
-       /* fixup mid */
-       ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last;
+       fc_set_mid(ctx, ctx->bc->fc_sp);
        ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
        return 0;
 }
@@ -1827,10 +1953,76 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
                ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
                ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
        } else {
-               ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2;
+               ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
        }
-       ctx->bc->fc_sp--;
+       fc_poplevel(ctx);
+
+       callstack_decrease_current(ctx, FC_PUSH_VPM);
+       return 0;
+}
+
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
+{
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
+
+       fc_pushlevel(ctx, FC_LOOP);
 
+       /* check stack depth */
+       callstack_check_depth(ctx, FC_LOOP, 0);
+       return 0;
+}
+
+static int tgsi_endloop(struct r600_shader_ctx *ctx)
+{
+       int i;
+
+       r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
+
+       if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+               R600_ERR("loop/endloop in shader code are not paired.\n");
+               return -EINVAL;
+       }
+
+       /* fixup loop pointers - from r600isa
+          LOOP END points to CF after LOOP START,
+          LOOP START point to CF after LOOP END
+          BRK/CONT point to LOOP END CF
+       */
+       ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+
+       ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+
+       for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
+               ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+       }
+       /* TODO add LOOPRET support */
+       fc_poplevel(ctx);
+       callstack_decrease_current(ctx, FC_LOOP);
+       return 0;
+}
+
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
+{
+       unsigned int fscp;
+
+       for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+       {
+               if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+                       break;
+       }
+
+       if (fscp == 0) {
+               R600_ERR("Break not inside loop/endloop pair\n");
+               return -EINVAL;
+       }
+
+       r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+       ctx->bc->cf_last->pop_count = 1;
+
+       fc_set_mid(ctx, fscp);
+
+       pops(ctx, 1);
+       callstack_check_depth(ctx, FC_PUSH_VPM, 1);
        return 0;
 }
 
@@ -1911,7 +2103,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_DIV,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_DP2,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
        {TGSI_OPCODE_TXL,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_BRK,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_BRK,       0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
        {TGSI_OPCODE_IF,        0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
        /* gap */
        {75,                    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1937,12 +2129,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
        {TGSI_OPCODE_SAD,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_TXF,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_TXQ,       0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_CONT,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_CONT,      0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
        {TGSI_OPCODE_EMIT,      0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        {TGSI_OPCODE_ENDPRIM,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_BGNLOOP,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_BGNLOOP,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
        {TGSI_OPCODE_BGNSUB,    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-       {TGSI_OPCODE_ENDLOOP,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+       {TGSI_OPCODE_ENDLOOP,   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
        {TGSI_OPCODE_ENDSUB,    0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
        /* gap */
        {103,                   0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},