bc->ncf++;
bc->ndw += 2;
bc->force_add_cf = 0;
+ bc->ar_loaded = 0;
return 0;
}
return 0;
}
+/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
+static int load_ar(struct r600_bytecode *bc)
+{
+ struct r600_bytecode_alu alu;
+ int r;
+
+ if (bc->ar_loaded)
+ return 0;
+
+ /* hack to avoid making MOVA the last instruction in the clause */
+ if ((bc->cf_last->ndw>>1) >= 110)
+ bc->force_add_cf = 1;
+
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = BC_INST(bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+ alu.src[0].sel = bc->ar_reg;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(bc, &alu);
+ if (r)
+ return r;
+
+ bc->cf_last->r6xx_uses_waterfall = 1;
+ bc->ar_loaded = 1;
+ return 0;
+}
+
int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu, int type)
{
struct r600_bytecode_alu *nalu = r600_bytecode_alu();
}
bc->cf_last->inst = (type << 3);
+ /* Check AR usage and load it if required */
+ for (i = 0; i < 3; i++)
+ if (nalu->src[i].rel && !bc->ar_loaded)
+ load_ar(bc);
+
+ if (nalu->dst.rel && !bc->ar_loaded)
+ load_ar(bc);
+
/* Setup the kcache for this ALU instruction. This will start a new
* ALU clause if needed. */
if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
- unsigned ar_reg;
struct r600_shader_tgsi_instruction *inst_info;
struct r600_bytecode *bc;
struct r600_shader *shader;
memset(&alu, 0, sizeof(alu));
alu.inst = CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD_INT);
- alu.src[0].sel = ctx->ar_reg;
+ alu.src[0].sel = ctx->bc->ar_reg;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = offset;
ar_reg = dst_reg;
} else {
- ar_reg = ctx->ar_reg;
+ ar_reg = ctx->bc->ar_reg;
}
memset(&vtx, 0, sizeof(vtx));
ctx.file_offset[TGSI_FILE_CONSTANT] = 512;
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
- ctx.ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
+ ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx.temp_reg = ctx.ar_reg + 1;
+ ctx.temp_reg = ctx.bc->ar_reg + 1;
ctx.nliterals = 0;
ctx.literals = NULL;
alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
break;
case TGSI_OPCODE_UARL:
+ alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
break;
default:
assert(0);
return -1;
}
- if (alu.inst) {
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.last = 1;
- alu.dst.sel = ctx->ar_reg;
- alu.dst.write = 1;
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- }
-
- /* TODO: Note that the MOVA can be avoided if we never use AR for
- * indexing non-CB registers in the current ALU clause. Similarly, we
- * need to load AR from ar_reg again if we started a new clause
- * between ARL and AR usage. The easy way to do that is to remove
- * the MOVA here, and load it for the first AR access after ar_reg
- * has been modified in each clause. */
- memset(&alu, 0, sizeof(struct r600_bytecode_alu));
- alu.inst = EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- else {
- alu.src[0].sel = ctx->ar_reg;
- alu.src[0].chan = 0;
- }
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
alu.last = 1;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
+
+ ctx->bc->ar_loaded = 0;
return 0;
}
static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
{
- /* TODO from r600c, ar values don't persist between clauses */
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r;
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
- alu.src[0].sel = ctx->ar_reg;
- alu.dst.sel = ctx->ar_reg;
+ alu.src[0].sel = ctx->bc->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
memset(&alu, 0, sizeof(alu));
alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT;
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- alu.dst.sel = ctx->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
alu.dst.write = 1;
alu.last = 1;
return r;
break;
case TGSI_OPCODE_UARL:
+ memset(&alu, 0, sizeof(alu));
+ alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
break;
default:
assert(0);
return -1;
}
- memset(&alu, 0, sizeof(alu));
- alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT;
- if (inst->Instruction.Opcode == TGSI_OPCODE_UARL)
- r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
- else
- alu.src[0].sel = ctx->ar_reg;
- alu.last = 1;
-
- r = r600_bytecode_add_alu(ctx->bc, &alu);
- if (r)
- return r;
- ctx->bc->cf_last->r6xx_uses_waterfall = 1;
+ ctx->bc->ar_loaded = 0;
return 0;
}