r600g: add missing literals
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
index 386adde6b8718ac8b7d7afad0fd573d5d83e8b6c..6483dac70398dfed47c849b01d6e73796796b525 100644 (file)
@@ -76,6 +76,27 @@ int r600_bc_init(struct r600_bc *bc, enum radeon_family family)
 {
        LIST_INITHEAD(&bc->cf);
        bc->family = family;
+       switch (bc->family) {
+       case CHIP_R600:
+       case CHIP_RV610:
+       case CHIP_RV630:
+       case CHIP_RV670:
+       case CHIP_RV620:
+       case CHIP_RV635:
+       case CHIP_RS780:
+       case CHIP_RS880:
+               bc->chiprev = 0;
+               break;
+       case CHIP_RV770:
+       case CHIP_RV730:
+       case CHIP_RV710:
+       case CHIP_RV740:
+               bc->chiprev = 1;
+               break;
+       default:
+               R600_ERR("unknown family %d\n", bc->family);
+               return -EINVAL;
+       }
        return 0;
 }
 
@@ -91,6 +112,7 @@ static int r600_bc_add_cf(struct r600_bc *bc)
        bc->cf_last = cf;
        bc->ncf++;
        bc->ndw += 2;
+       bc->force_add_cf = 0;
        return 0;
 }
 
@@ -106,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
        return 0;
 }
 
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
        struct r600_bc_alu *nalu = r600_bc_alu();
        struct r600_bc_alu *lalu;
@@ -118,15 +140,18 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
        nalu->nliteral = 0;
 
        /* cf can contains only alu or only vtx or only tex */
-       if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
-               (bc->cf_last->ndw >> 1) >= 120) {
+       if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
+               bc->force_add_cf) {
                /* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
                r = r600_bc_add_cf(bc);
                if (r) {
                        free(nalu);
                        return r;
                }
-               bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+               bc->cf_last->inst = (type << 3);
+       }
+       if (alu->last && (bc->cf_last->ndw >> 1) >= 124) {
+               bc->force_add_cf = 1;
        }
        /* number of gpr == the last gpr used in any alu */
        for (i = 0; i < 3; i++) {
@@ -158,29 +183,44 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
        return 0;
 }
 
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+       return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+}
+
 int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 {
        struct r600_bc_alu *alu;
 
        if (bc->cf_last == NULL) {
-               R600_ERR("no last CF\n");
-               return -EINVAL;
+               return 0;
        }
        if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
                return 0;
        }
-       if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+       if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
+           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+               return 0;
+       }
+       if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+            (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
                LIST_IS_EMPTY(&bc->cf_last->alu)) {
                R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
                return -EINVAL;
        }
        alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-       if (!alu->last || !alu->nliteral) {
+       if (!alu->last || !alu->nliteral || alu->literal_added) {
                return 0;
        }
        memcpy(alu->value, value, 4 * 4);
        bc->cf_last->ndw += alu->nliteral;
        bc->ndw += alu->nliteral;
+       alu->literal_added = 1;
        return 0;
 }
 
@@ -237,6 +277,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
        return 0;
 }
 
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+{
+       int r;
+       r = r600_bc_add_cf(bc);
+       if (r)
+               return r;
+
+       bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
+       bc->cf_last->inst = inst;
+       return 0;
+}
+
 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
 {
        bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
@@ -283,43 +335,49 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
        return 0;
 }
 
-int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
+static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
        unsigned i;
 
        /* don't replace gpr by pv or ps for destination register */
+       bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
+                               S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
+                               S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
+                               S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
+                               S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
+                               S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
+                               S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
+                               S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
+                               S_SQ_ALU_WORD0_LAST(alu->last);
+
        if (alu->is_op3) {
-               bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
-                                       S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
-                                       S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
-                                       S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
-                                       S_SQ_ALU_WORD0_LAST(alu->last);
                bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
                                        S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+                                       S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
                                        S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
                                        S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
+                                       S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
                                        S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
                                        S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
                                        S_SQ_ALU_WORD1_OP3_ALU_INST(alu->inst) |
                                        S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
        } else {
-               bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
-                                       S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
-                                       S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
-                                       S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
-                                       S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
-                                       S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
-                                       S_SQ_ALU_WORD0_LAST(alu->last);
                bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
                                        S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
+                                       S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
                                        S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
                                        S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
                                        S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
                                        S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
                                        S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
-                                       S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+                                       S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
+                                       S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
+                                       S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
        }
        if (alu->last) {
+               if (alu->nliteral && !alu->literal_added) {
+                       R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n");
+               }
                for (i = 0; i < alu->nliteral; i++) {
                        bc->bytecode[id++] = alu->value[i];
                }
@@ -327,12 +385,13 @@ int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
        return 0;
 }
 
-int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
+static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 {
        unsigned id = cf->id;
 
        switch (cf->inst) {
        case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+       case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
                bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
                                        S_SQ_CF_ALU_WORD1_BARRIER(1) |
@@ -360,6 +419,20 @@ int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
                break;
+       case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+       case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+       case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+               bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
+               bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+                                       S_SQ_CF_WORD1_BARRIER(1) |
+                                       S_SQ_CF_WORD1_COND(cf->cond) |
+                                       S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+
+               break;
        default:
                R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                return -EINVAL;
@@ -376,6 +449,8 @@ int r600_bc_build(struct r600_bc *bc)
        unsigned addr;
        int r;
 
+       if (bc->callstack[0].max > 0)
+           bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
 
        /* first path compute addr of each CF block */
        /* addr start after all the CF instructions */
@@ -383,6 +458,7 @@ int r600_bc_build(struct r600_bc *bc)
        LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
                switch (cf->inst) {
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+               case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                        break;
                case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
                case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -394,6 +470,14 @@ int r600_bc_build(struct r600_bc *bc)
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
                        break;
+               case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+               case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+                       break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
                        return -EINVAL;
@@ -413,22 +497,13 @@ int r600_bc_build(struct r600_bc *bc)
                        return r;
                switch (cf->inst) {
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+               case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
                        LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-                               switch (bc->family) {
-                               case CHIP_R600:
-                               case CHIP_RV610:
-                               case CHIP_RV630:
-                               case CHIP_RV670:
-                               case CHIP_RV620:
-                               case CHIP_RV635:
-                               case CHIP_RS780:
-                               case CHIP_RS880:
+                               switch(bc->chiprev) {
+                               case 0:
                                        r = r600_bc_alu_build(bc, alu, addr);
                                        break;
-                               case CHIP_RV770:
-                               case CHIP_RV730:
-                               case CHIP_RV710:
-                               case CHIP_RV740:
+                               case 1:
                                        r = r700_bc_alu_build(bc, alu, addr);
                                        break;
                                default:
@@ -462,6 +537,13 @@ int r600_bc_build(struct r600_bc *bc)
                        break;
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
                case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+               case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+               case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+               case V_SQ_CF_WORD1_SQ_CF_INST_POP:
                        break;
                default:
                        R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);