+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = op;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
+ if (r)
+ return r;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
+ if (r)
+ return r;
+ r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
+ if (r)
+ return r;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_ucmp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r;
+ int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ for (i = 0; i < lasti + 1; i++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDE_INT;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ r600_bytecode_src(&alu.src[1], &ctx->src[2], i);
+ r600_bytecode_src(&alu.src[2], &ctx->src[1], i);
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ alu.is_op3 = 1;
+ if (i == lasti)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int tgsi_exp(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ unsigned i;
+
+ /* result.x = 2^floor(src); */
+ if (inst->Dst[0].Register.WriteMask & 1) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_FLOOR;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ alu.op = ALU_OP1_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ alu.dst.write = i == 0;
+ alu.last = i == 2;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ alu.op = ALU_OP1_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ /* result.y = tmp - floor(tmp); */
+ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_FRACT;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+#if 0
+ r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+ if (r)
+ return r;
+#endif
+ alu.dst.write = 1;
+ alu.dst.chan = 1;
+
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* result.z = RoughApprox2ToX(tmp);*/
+ if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) {
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_EXP_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 2) {
+ alu.dst.write = 1;
+ alu.last = 1;
+ }
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_EXP_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ /* result.w = 1.0;*/
+ if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_log(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ unsigned i;
+
+ /* result.x = floor(log2(|src|)); */
+ if (inst->Dst[0].Register.WriteMask & 1) {
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 0)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ alu.op = ALU_OP1_FLOOR;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */
+ if ((inst->Dst[0].Register.WriteMask >> 1) & 1) {
+
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_FLOOR;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_EXP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_RECIP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = i;
+ if (i == 1)
+ alu.dst.write = 1;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_RECIP_IEEE;
+ alu.src[0].sel = ctx->temp_reg;
+ alu.src[0].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP2_MUL;
+
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.src[1].sel = ctx->temp_reg;
+ alu.src[1].chan = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 1;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ /* result.z = log2(|src|);*/
+ if ((inst->Dst[0].Register.WriteMask >> 2) & 1) {
+ if (ctx->bc->chip_class == CAYMAN) {
+ for (i = 0; i < 3; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ if (i == 2)
+ alu.dst.write = 1;
+ alu.dst.chan = i;
+ if (i == 2)
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ } else {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_LOG_IEEE;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
+ r600_bytecode_src_set_abs(&alu.src[0]);
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 2;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ }
+
+ /* result.w = 1.0; */
+ if ((inst->Dst[0].Register.WriteMask >> 3) & 1) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ alu.src[0].chan = 0;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.chan = 3;
+ alu.dst.write = 1;
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ return tgsi_helper_copy(ctx, inst);
+}
+
+static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+ unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
+
+ assert(inst->Dst[0].Register.Index < 3);
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ alu.op = ALU_OP1_FLT_TO_INT_FLOOR;
+ break;
+ case TGSI_OPCODE_ARR:
+ alu.op = ALU_OP1_FLT_TO_INT;
+ break;
+ case TGSI_OPCODE_UARL:
+ alu.op = ALU_OP1_MOV;
+ break;
+ default:
+ assert(0);
+ return -1;
+ }
+
+ for (i = 0; i <= lasti; ++i) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+ continue;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ alu.dst.sel = reg;
+ alu.dst.chan = i;
+ alu.dst.write = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+
+ if (inst->Dst[0].Register.Index > 0)
+ ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0;
+ else
+ ctx->bc->ar_loaded = 0;
+
+ return 0;
+}
+static int tgsi_r600_arl(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int r;
+ int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_FLOOR;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
+
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_FLT_TO_INT;
+ alu.src[0].sel = ctx->bc->ar_reg;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ alu.dst.chan = i;
+ alu.src[0].chan = i;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ break;
+ case TGSI_OPCODE_ARR:
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_FLT_TO_INT;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ /* FLT_TO_INT is trans-only on r600/r700 */
+ alu.last = TRUE;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
+ break;
+ case TGSI_OPCODE_UARL:
+ memset(&alu, 0, sizeof(alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->bc->ar_reg;
+ alu.dst.write = 1;
+ for (i = 0; i <= lasti; ++i) {
+ if (inst->Dst[0].Register.WriteMask & (1 << i)) {
+ alu.dst.chan = i;
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ alu.last = i == lasti;
+ if ((r = r600_bytecode_add_alu(ctx->bc, &alu)))
+ return r;
+ }
+ }
+ break;
+ default:
+ assert(0);
+ return -1;
+ }
+
+ ctx->bc->ar_loaded = 0;
+ return 0;
+}
+
+static int tgsi_opdst(struct r600_shader_ctx *ctx)
+{
+ struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+ struct r600_bytecode_alu alu;
+ int i, r = 0;
+
+ for (i = 0; i < 4; i++) {
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+
+ alu.op = ALU_OP2_MUL;
+ tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
+
+ if (i == 0 || i == 3) {
+ alu.src[0].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r600_bytecode_src(&alu.src[0], &ctx->src[0], i);
+ }
+
+ if (i == 0 || i == 2) {
+ alu.src[1].sel = V_SQ_ALU_SRC_1;
+ } else {
+ r600_bytecode_src(&alu.src[1], &ctx->src[1], i);
+ }
+ if (i == 3)
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type,
+ struct r600_bytecode_alu_src *src)
+{
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = opcode;
+ alu.execute_mask = 1;
+ alu.update_pred = 1;
+
+ alu.dst.sel = ctx->temp_reg;
+ alu.dst.write = 1;
+ alu.dst.chan = 0;
+
+ alu.src[0] = *src;
+ alu.src[1].sel = V_SQ_ALU_SRC_0;
+ alu.src[1].chan = 0;
+
+ alu.last = 1;
+
+ r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int pops(struct r600_shader_ctx *ctx, int pops)
+{
+ unsigned force_pop = ctx->bc->force_add_cf;
+
+ if (!force_pop) {
+ int alu_pop = 3;
+ if (ctx->bc->cf_last) {
+ if (ctx->bc->cf_last->op == CF_OP_ALU)
+ alu_pop = 0;
+ else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER)
+ alu_pop = 1;
+ }
+ alu_pop += pops;
+ if (alu_pop == 1) {
+ ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER;
+ ctx->bc->force_add_cf = 1;
+ } else if (alu_pop == 2) {
+ ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
+ ctx->bc->force_add_cf = 1;
+ } else {
+ force_pop = 1;
+ }
+ }
+
+ if (force_pop) {
+ r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP);
+ ctx->bc->cf_last->pop_count = pops;
+ ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2;
+ }
+
+ return 0;
+}
+
+static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx,
+ unsigned reason)
+{
+ struct r600_stack_info *stack = &ctx->bc->stack;
+ unsigned elements;
+ int entries;
+
+ unsigned entry_size = stack->entry_size;
+
+ elements = (stack->loop + stack->push_wqm ) * entry_size;
+ elements += stack->push;
+
+ switch (ctx->bc->chip_class) {
+ case R600:
+ case R700:
+ /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on
+ * the stack must be reserved to hold the current active/continue
+ * masks */
+ if (reason == FC_PUSH_VPM) {