r300g: only check for an empty shader if there are no compile errors
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
index 23b3eabf52651b720356c2d08f9437d6e175b0cc..0ba26a23112f82ae36dc0adb9e4458a144ca3f02 100644 (file)
@@ -50,6 +50,7 @@ struct r600_shader_ctx {
        u32                                     value[4];
        u32                                     *literals;
        u32                                     nliterals;
+       u32                                     max_driver_temp_used;
 };
 
 struct r600_shader_tgsi_instruction {
@@ -133,10 +134,9 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
        struct radeon_state *state;
        unsigned i, tmp;
 
-       rpshader->rstate = radeon_state_decref(rpshader->rstate);
-       state = radeon_state(rscreen->rw, R600_VS_SHADER_TYPE, R600_VS_SHADER);
-       if (state == NULL)
-               return -ENOMEM;
+       state = &rpshader->rstate[0];
+       radeon_state_fini(&rpshader->rstate[0]);
+       radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_VS);
        for (i = 0; i < 10; i++) {
                state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i] = 0;
        }
@@ -148,12 +148,11 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
        state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
        state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
                S_028868_STACK_SIZE(rshader->bc.nstack);
-       rpshader->rstate = state;
-       rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
-       rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
-       rpshader->rstate->nbo = 2;
-       rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
-       rpshader->rstate->placement[2] = RADEON_GEM_DOMAIN_GTT;
+       state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+       state->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+       state->nbo = 2;
+       state->placement[0] = RADEON_GEM_DOMAIN_GTT;
+       state->placement[2] = RADEON_GEM_DOMAIN_GTT;
        return radeon_state_pm4(state);
 }
 
@@ -165,17 +164,20 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
        struct r600_context *rctx = r600_context(ctx);
        struct radeon_state *state;
        unsigned i, tmp, exports_ps, num_cout;
+       boolean have_pos = FALSE;
 
+       state = &rpshader->rstate[0];
        rasterizer = &rctx->rasterizer->state.rasterizer;
-       rpshader->rstate = radeon_state_decref(rpshader->rstate);
-       state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
-       if (state == NULL)
-               return -ENOMEM;
+       radeon_state_fini(state);
+       radeon_state_init(state, rscreen->rw, R600_STATE_SHADER, 0, R600_SHADER_PS);
        for (i = 0; i < rshader->ninput; i++) {
                tmp = S_028644_SEMANTIC(i);
                tmp |= S_028644_SEL_CENTROID(1);
+               if (rshader->input[i].name == TGSI_SEMANTIC_POSITION)
+                       have_pos = TRUE;
                if (rshader->input[i].name == TGSI_SEMANTIC_COLOR ||
-                       rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
+                   rshader->input[i].name == TGSI_SEMANTIC_BCOLOR ||
+                   rshader->input[i].name == TGSI_SEMANTIC_POSITION) {
                        tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
                }
                if (rasterizer->sprite_coord_enable & (1 << i)) {
@@ -200,14 +202,18 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
        }
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
                                                        S_0286CC_PERSP_GRADIENT_ENA(1);
+       if (have_pos) {
+               state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] |=  S_0286CC_POSITION_ENA(1) |
+                                                                      S_0286CC_BARYC_SAMPLE_CNTL(1);
+               state->states[R600_PS_SHADER__SPI_INPUT_Z] |= 1;
+       }
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
        state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
                S_028868_STACK_SIZE(rshader->bc.nstack);
        state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
-       rpshader->rstate = state;
-       rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
-       rpshader->rstate->nbo = 1;
-       rpshader->rstate->placement[0] = RADEON_GEM_DOMAIN_GTT;
+       state->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
+       state->nbo = 1;
+       state->placement[0] = RADEON_GEM_DOMAIN_GTT;
        return radeon_state_pm4(state);
 }
 
@@ -354,6 +360,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
        return 0;
 }
 
+static int r600_get_temp(struct r600_shader_ctx *ctx)
+{
+       return ctx->temp_reg + ctx->max_driver_temp_used++;
+}
+
 int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *shader)
 {
        struct tgsi_full_immediate *immediate;
@@ -436,6 +447,9 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                        r = tgsi_is_supported(&ctx);
                        if (r)
                                goto out_err;
+                       ctx.max_driver_temp_used = 0;
+                       /* reserve first tmp for everyone */
+                       r600_get_temp(&ctx);
                        opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
                        ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
                        r = ctx.inst_info->process(&ctx);
@@ -486,6 +500,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                                output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                        } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
                                output[i].array_base = 61;
+                               output[i].swizzle_x = 2;
+                               output[i].swizzle_y = output[i].swizzle_z = output[i].swizzle_w = 7;
                                output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                        } else {
                                R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
@@ -647,12 +663,13 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
        }
        for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) {
                if (inst->Src[j].Register.File == TGSI_FILE_CONSTANT && j > 0) {
+                       int treg = r600_get_temp(ctx);
                        for (k = 0; k < 4; k++) {
                                memset(&alu, 0, sizeof(struct r600_bc_alu));
                                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
-                               alu.src[0].sel = r600_src[0].sel;
+                               alu.src[0].sel = r600_src[j].sel;
                                alu.src[0].chan = k;
-                               alu.dst.sel = ctx->temp_reg + j;
+                               alu.dst.sel = treg;
                                alu.dst.chan = k;
                                alu.dst.write = 1;
                                if (k == 3)
@@ -661,13 +678,52 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
                                if (r)
                                        return r;
                        }
-                       r600_src[0].sel = ctx->temp_reg + j;
+                       r600_src[j].sel = treg;
                        j--;
                }
        }
        return 0;
 }
 
+/* need to move any immediate into a temp - for trig functions which use literal for PI stuff */
+static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_src r600_src[3])
+{
+       struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+       struct r600_bc_alu alu;
+       int i, j, k, nliteral, r;
+
+       for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               if (inst->Src[i].Register.File == TGSI_FILE_IMMEDIATE) {
+                       nliteral++;
+               }
+       }
+       for (i = 0, j = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               if (inst->Src[j].Register.File == TGSI_FILE_IMMEDIATE) {
+                       int treg = r600_get_temp(ctx);
+                       for (k = 0; k < 4; k++) {
+                               memset(&alu, 0, sizeof(struct r600_bc_alu));
+                               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+                               alu.src[0].sel = r600_src[j].sel;
+                               alu.src[0].chan = k;
+                               alu.dst.sel = treg;
+                               alu.dst.chan = k;
+                               alu.dst.write = 1;
+                               if (k == 3)
+                                       alu.last = 1;
+                               r = r600_bc_add_alu(ctx->bc, &alu);
+                               if (r)
+                                       return r;
+                       }
+                       r = r600_bc_add_literal(ctx->bc, ctx->value);
+                       if (r)
+                               return r;
+                       r600_src[j].sel = treg;
+                       j++;
+               }
+       }
+       return 0;
+}
+
 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -755,6 +811,11 @@ static int tgsi_setup_trig(struct r600_shader_ctx *ctx,
        r = tgsi_split_constant(ctx, r600_src);
        if (r)
                return r;
+
+       r = tgsi_split_literal_constant(ctx, r600_src);
+       if (r)
+               return r;
+
        lit_vals[0] = fui(1.0 /(3.1415926535 * 2));
        lit_vals[1] = fui(0.5f);
 
@@ -834,10 +895,7 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
        struct r600_bc_alu_src r600_src[3];
        struct r600_bc_alu alu;
        int i, r;
-
-       r = tgsi_split_constant(ctx, r600_src);
-       if (r)
-               return r;
+       int lasti = 0;
 
        r = tgsi_setup_trig(ctx, r600_src);
        if (r)
@@ -858,15 +916,21 @@ static int tgsi_trig(struct r600_shader_ctx *ctx)
 
        /* replicate result */
        for (i = 0; i < 4; i++) {
+               if (inst->Dst[0].Register.WriteMask & (1 << i))
+                       lasti = i;
+       }
+       for (i = 0; i < lasti + 1; i++) {
+               if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
+                       continue;
+
                memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.src[0].sel = ctx->temp_reg;
                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
-               alu.dst.chan = i;
+
+               alu.src[0].sel = ctx->temp_reg;
                r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
                if (r)
                        return r;
-               alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1;
-               if (i == 3)
+               if (i == lasti)
                        alu.last = 1;
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
@@ -882,10 +946,6 @@ static int tgsi_scs(struct r600_shader_ctx *ctx)
        struct r600_bc_alu alu;
        int r;
 
-       r = tgsi_split_constant(ctx, r600_src);
-       if (r)
-               return r;
-
        r = tgsi_setup_trig(ctx, r600_src);
        if (r)
                return r;
@@ -965,8 +1025,16 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
 {
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bc_alu alu;
+       struct r600_bc_alu_src r600_src[3];
        int r;
 
+       r = tgsi_split_constant(ctx, r600_src);
+       if (r)
+               return r;
+       r = tgsi_split_literal_constant(ctx, r600_src);
+       if (r)
+               return r;
+
        /* dst.x, <- 1.0  */
        memset(&alu, 0, sizeof(struct r600_bc_alu));
        alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
@@ -983,11 +1051,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
        /* dst.y = max(src.x, 0.0) */
        memset(&alu, 0, sizeof(struct r600_bc_alu));
        alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX;
-       r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
-       if (r)
-               return r;
+       alu.src[0] = r600_src[0];
        alu.src[1].sel  = V_SQ_ALU_SRC_0; /*0.0*/
-       alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
+       alu.src[1].chan = 0;
        r = tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
        if (r)
                return r;
@@ -996,14 +1062,6 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
        if (r)
                return r;
 
-       /* dst.z = NOP - fill Z slot */
-       memset(&alu, 0, sizeof(struct r600_bc_alu));
-       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP;
-       alu.dst.chan = 2;
-       r = r600_bc_add_alu(ctx->bc, &alu);
-       if (r)
-               return r;
-
        /* dst.w, <- 1.0  */
        memset(&alu, 0, sizeof(struct r600_bc_alu));
        alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
@@ -1018,6 +1076,10 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
        if (r)
                return r;
 
+       r = r600_bc_add_literal(ctx->bc, ctx->value);
+       if (r)
+               return r;
+
        if (inst->Dst[0].Register.WriteMask & (1 << 2))
        {
                int chan;
@@ -1026,9 +1088,7 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
                /* dst.z = log(src.y) */
                memset(&alu, 0, sizeof(struct r600_bc_alu));
                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED;
-               r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
-               if (r)
-                       return r;
+               alu.src[0] = r600_src[0];
                alu.src[0].chan = tgsi_chan(&inst->Src[0], 1);
                r = tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst);
                if (r)
@@ -1038,21 +1098,22 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
+
                chan = alu.dst.chan;
                sel = alu.dst.sel;
 
                /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
                memset(&alu, 0, sizeof(struct r600_bc_alu));
                alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
-               r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
-               if (r)
-                       return r;
+               alu.src[0] = r600_src[0];
                alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
                alu.src[1].sel  = sel;
                alu.src[1].chan = chan;
-               r = tgsi_src(ctx, &inst->Src[0], &alu.src[2]);
-               if (r)
-                       return r;
+
+               alu.src[2] = r600_src[0];
                alu.src[2].chan = tgsi_chan(&inst->Src[0], 0);
                alu.dst.sel = ctx->temp_reg;
                alu.dst.chan = 0;
@@ -1063,6 +1124,9 @@ static int tgsi_lit(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
                /* dst.z = exp(tmp.x) */
                memset(&alu, 0, sizeof(struct r600_bc_alu));
                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
@@ -1149,6 +1213,9 @@ static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx)
        alu.dst.write = 1;
        alu.last = 1;
        r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       r = r600_bc_add_literal(ctx->bc, ctx->value);
        if (r)
                return r;
        /* replicate result */
@@ -1399,6 +1466,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        struct r600_bc_alu alu;
        unsigned src_gpr;
        int r, i;
+       int opcode;
+       boolean src_not_temp = inst->Src[0].Register.File != TGSI_FILE_TEMPORARY;
+       uint32_t lit_vals[4];
 
        src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
 
@@ -1406,7 +1476,10 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                /* Add perspective divide */
                memset(&alu, 0, sizeof(struct r600_bc_alu));
                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
-               alu.src[0].sel = src_gpr;
+               r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+               if (r)
+                       return r;
+
                alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
                alu.dst.sel = ctx->temp_reg;
                alu.dst.chan = 3;
@@ -1421,7 +1494,9 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                        alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
                        alu.src[0].sel = ctx->temp_reg;
                        alu.src[0].chan = 3;
-                       alu.src[1].sel = src_gpr;
+                       r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+                       if (r)
+                               return r;
                        alu.src[1].chan = tgsi_chan(&inst->Src[0], i);
                        alu.dst.sel = ctx->temp_reg;
                        alu.dst.chan = i;
@@ -1441,8 +1516,122 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
                        return r;
+               src_not_temp = false;
                src_gpr = ctx->temp_reg;
-       } else if (inst->Src[0].Register.File != TGSI_FILE_TEMPORARY) {
+       }
+
+       if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+               int src_chan, src2_chan;
+
+               /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
+               for (i = 0; i < 4; i++) {
+                       memset(&alu, 0, sizeof(struct r600_bc_alu));
+                       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+                       switch (i) {
+                       case 0:
+                               src_chan = 2;
+                               src2_chan = 1;
+                               break;
+                       case 1:
+                               src_chan = 2;
+                               src2_chan = 0;
+                               break;
+                       case 2:
+                               src_chan = 0;
+                               src2_chan = 2;
+                               break;
+                       case 3:
+                               src_chan = 1;
+                               src2_chan = 2;
+                               break;
+                       }
+                       r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+                       if (r)
+                               return r;
+                       alu.src[0].chan = tgsi_chan(&inst->Src[0], src_chan);
+                       r = tgsi_src(ctx, &inst->Src[0], &alu.src[1]);
+                       if (r)
+                               return r;
+                       alu.src[1].chan = tgsi_chan(&inst->Src[0], src2_chan);
+                       alu.dst.sel = ctx->temp_reg;
+                       alu.dst.chan = i;
+                       if (i == 3)
+                               alu.last = 1;
+                       alu.dst.write = 1;
+                       r = r600_bc_add_alu(ctx->bc, &alu);
+                       if (r)
+                               return r;
+               }
+
+               /* tmp1.z = RCP_e(|tmp1.z|) */
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+               alu.src[0].sel = ctx->temp_reg;
+               alu.src[0].chan = 2;
+               alu.src[0].abs = 1;
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = 2;
+               alu.dst.write = 1;
+               alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+               
+               /* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
+                * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
+                * muladd has no writemask, have to use another temp 
+                */
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+               alu.is_op3 = 1;
+
+               alu.src[0].sel = ctx->temp_reg;
+               alu.src[0].chan = 0;
+               alu.src[1].sel = ctx->temp_reg;
+               alu.src[1].chan = 2;
+               
+               alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[2].chan = 0;
+
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = 0;
+               alu.dst.write = 1;
+
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+
+               memset(&alu, 0, sizeof(struct r600_bc_alu));
+               alu.inst = V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MULADD;
+               alu.is_op3 = 1;
+
+               alu.src[0].sel = ctx->temp_reg;
+               alu.src[0].chan = 1;
+               alu.src[1].sel = ctx->temp_reg;
+               alu.src[1].chan = 2;
+               
+               alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+               alu.src[2].chan = 0;
+
+               alu.dst.sel = ctx->temp_reg;
+               alu.dst.chan = 1;
+               alu.dst.write = 1;
+
+               alu.last = 1;
+               r = r600_bc_add_alu(ctx->bc, &alu);
+               if (r)
+                       return r;
+
+               lit_vals[0] = fui(1.5f);
+
+               r = r600_bc_add_literal(ctx->bc, lit_vals);
+               if (r)
+                       return r;
+               src_not_temp = false;
+               src_gpr = ctx->temp_reg;
+       }
+
+       if (src_not_temp) {
                for (i = 0; i < 4; i++) {
                        memset(&alu, 0, sizeof(struct r600_bc_alu));
                        alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
@@ -1459,9 +1648,14 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                }
                src_gpr = ctx->temp_reg;
        }
+       
+       opcode = ctx->inst_info->r600_opcode;
+       if (opcode == SQ_TEX_INST_SAMPLE &&
+           (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D))
+               opcode = SQ_TEX_INST_SAMPLE_C;
 
        memset(&tex, 0, sizeof(struct r600_bc_tex));
-       tex.inst = ctx->inst_info->r600_opcode;
+       tex.inst = opcode;
        tex.resource_id = ctx->file_offset[inst->Src[1].Register.File] + inst->Src[1].Register.Index;
        tex.sampler_id = tex.resource_id;
        tex.src_gpr = src_gpr;
@@ -1475,13 +1669,30 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        tex.src_sel_z = 2;
        tex.src_sel_w = 3;
 
+       if (inst->Texture.Texture == TGSI_TEXTURE_CUBE) {
+               tex.src_sel_x = 1;
+               tex.src_sel_y = 0;
+               tex.src_sel_z = 3;
+               tex.src_sel_w = 1;
+       }
+
        if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
                tex.coord_type_x = 1;
                tex.coord_type_y = 1;
                tex.coord_type_z = 1;
                tex.coord_type_w = 1;
        }
-       return r600_bc_add_tex(ctx->bc, &tex);
+
+       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D)
+               tex.src_sel_w = 2;
+
+       r = r600_bc_add_tex(ctx->bc, &tex);
+       if (r)
+               return r;
+
+       /* add shadow ambient support  - gallium doesn't do it yet */
+       return 0;
+       
 }
 
 static int tgsi_lrp(struct r600_shader_ctx *ctx)
@@ -1760,6 +1971,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                if (r)
                        return r;
 
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
+
                alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE;
                alu.src[0].sel = ctx->temp_reg;
                alu.src[0].chan = 0;
@@ -1771,6 +1986,10 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
                        return r;
+
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
        }
                
        /* result.y = tmp - floor(tmp); */
@@ -1796,6 +2015,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
                        return r;
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
        }
 
        /* result.z = RoughApprox2ToX(tmp);*/
@@ -1816,7 +2038,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
                        return r;
-
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
        }
 
        /* result.w = 1.0;*/
@@ -1834,6 +2058,9 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
                r = r600_bc_add_alu(ctx->bc, &alu);
                if (r)
                        return r;
+               r = r600_bc_add_literal(ctx->bc, ctx->value);
+               if (r)
+                       return r;
        }
        return tgsi_helper_copy(ctx, inst);
 }