r600g: add point/sprite rendering support
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
index 43b3e40fad11735abee26b5ac6813a47ff0aacf7..ca65bff24c1bdedf6aeb939544424f630e32e3db 100644 (file)
@@ -155,11 +155,14 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
 
 static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_state *rpshader)
 {
+       const struct pipe_rasterizer_state *rasterizer;
        struct r600_screen *rscreen = r600_screen(ctx->screen);
        struct r600_shader *rshader = &rpshader->shader;
+       struct r600_context *rctx = r600_context(ctx);
        struct radeon_state *state;
-       unsigned i, tmp;
+       unsigned i, tmp, exports_ps, num_cout;
 
+       rasterizer = &rctx->rasterizer->state.rasterizer;
        rpshader->rstate = radeon_state_decref(rpshader->rstate);
        state = radeon_state(rscreen->rw, R600_PS_SHADER_TYPE, R600_PS_SHADER);
        if (state == NULL)
@@ -171,13 +174,27 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
                        rshader->input[i].name == TGSI_SEMANTIC_BCOLOR) {
                        tmp |= S_028644_FLAT_SHADE(rshader->flat_shade);
                }
+               if (rasterizer->sprite_coord_enable & (1 << i)) {
+                       tmp |= S_028644_PT_SPRITE_TEX(1);
+               }
                state->states[R600_PS_SHADER__SPI_PS_INPUT_CNTL_0 + i] = tmp;
        }
+
+       exports_ps = 0;
+       num_cout = 0;
+       for (i = 0; i < rshader->noutput; i++) {
+               if (rshader->output[i].name == TGSI_SEMANTIC_POSITION)
+                       exports_ps |= 1;
+               else if (rshader->output[i].name == TGSI_SEMANTIC_COLOR) {
+                       exports_ps |= (1 << (num_cout+1));
+                       num_cout++;
+               }
+       }
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
                                                        S_0286CC_PERSP_GRADIENT_ENA(1);
        state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
        state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
-       state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = 0x00000002;
+       state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
        rpshader->rstate = state;
        rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
        rpshader->rstate->nbo = 1;
@@ -328,7 +345,8 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
 {
        struct tgsi_full_immediate *immediate;
        struct r600_shader_ctx ctx;
-       struct r600_bc_output output;
+       struct r600_bc_output output[32];
+       unsigned output_done, noutput;
        unsigned opcode;
        int i, r = 0, pos0;
 
@@ -406,31 +424,41 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                }
        }
        /* export output */
-       for (i = 0, pos0 = 0; i < shader->noutput; i++) {
-               memset(&output, 0, sizeof(struct r600_bc_output));
-               output.gpr = shader->output[i].gpr;
-               output.elem_size = 3;
-               output.swizzle_x = 0;
-               output.swizzle_y = 1;
-               output.swizzle_z = 2;
-               output.swizzle_w = 3;
-               output.barrier = 1;
-               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
-               output.array_base = i - pos0;
-               output.inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
-               switch (ctx.type == TGSI_PROCESSOR_VERTEX) {
+       noutput = shader->noutput;
+       for (i = 0, pos0 = 0; i < noutput; i++) {
+               memset(&output[i], 0, sizeof(struct r600_bc_output));
+               output[i].gpr = shader->output[i].gpr;
+               output[i].elem_size = 3;
+               output[i].swizzle_x = 0;
+               output[i].swizzle_y = 1;
+               output[i].swizzle_z = 2;
+               output[i].swizzle_w = 3;
+               output[i].barrier = 1;
+               output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+               output[i].array_base = i - pos0;
+               output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+               switch (ctx.type) {
                case TGSI_PROCESSOR_VERTEX:
                        if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
-                               output.array_base = 60;
-                               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                               output[i].array_base = 60;
+                               output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
                                /* position doesn't count in array_base */
-                               pos0 = 1;
+                               pos0++;
+                       }
+                       if (shader->output[i].name == TGSI_SEMANTIC_PSIZE) {
+                               output[i].array_base = 61;
+                               output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
+                               /* position doesn't count in array_base */
+                               pos0++;
                        }
                        break;
                case TGSI_PROCESSOR_FRAGMENT:
                        if (shader->output[i].name == TGSI_SEMANTIC_COLOR) {
-                               output.array_base = 0;
-                               output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                               output[i].array_base = shader->output[i].sid;
+                               output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
+                       } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) {
+                               output[i].array_base = 61;
+                               output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
                        } else {
                                R600_ERR("unsupported fragment output name %d\n", shader->output[i].name);
                                r = -EINVAL;
@@ -442,10 +470,43 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
                        r = -EINVAL;
                        goto out_err;
                }
-               if (i == (shader->noutput - 1)) {
-                       output.end_of_program = 1;
+       }
+       /* add fake param output for vertex shader if no param is exported */
+       if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+               for (i = 0, pos0 = 0; i < noutput; i++) {
+                       if (output[i].type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) {
+                               pos0 = 1;
+                               break;
+                       }
                }
-               r = r600_bc_add_output(ctx.bc, &output);
+               if (!pos0) {
+                       memset(&output[i], 0, sizeof(struct r600_bc_output));
+                       output[i].gpr = 0;
+                       output[i].elem_size = 3;
+                       output[i].swizzle_x = 0;
+                       output[i].swizzle_y = 1;
+                       output[i].swizzle_z = 2;
+                       output[i].swizzle_w = 3;
+                       output[i].barrier = 1;
+                       output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
+                       output[i].array_base = 0;
+                       output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT;
+                       noutput++;
+               }
+       }
+       /* set export done on last export of each type */
+       for (i = noutput - 1, output_done = 0; i >= 0; i--) {
+               if (i == (noutput - 1)) {
+                       output[i].end_of_program = 1;
+               }
+               if (!(output_done & (1 << output[i].type))) {
+                       output_done |= (1 << output[i].type);
+                       output[i].inst = V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE;
+               }
+       }
+       /* add output to bytecode */
+       for (i = 0; i < noutput; i++) {
+               r = r600_bc_add_output(ctx.bc, &output[i]);
                if (r)
                        goto out_err;
        }
@@ -963,68 +1024,66 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        src_gpr = ctx->file_offset[inst->Src[0].Register.File] + inst->Src[0].Register.Index;
 
        /* Add perspective divide */
-       if (ctx->inst_info->tgsi_opcode == TGSI_OPCODE_TXP) {
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
-               alu.src[0].sel = src_gpr;
-               alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 3;
-               alu.last = 1;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE;
+       alu.src[0].sel = src_gpr;
+       alu.src[0].chan = tgsi_chan(&inst->Src[0], 3);
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.chan = 3;
+       alu.last = 1;
+       alu.dst.write = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
 
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 3;
-               alu.src[1].sel = src_gpr;
-               alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 0;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 3;
-               alu.src[1].sel = src_gpr;
-               alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 1;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
-               alu.src[0].sel = ctx->temp_reg;
-               alu.src[0].chan = 3;
-               alu.src[1].sel = src_gpr;
-               alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 2;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
-               memset(&alu, 0, sizeof(struct r600_bc_alu));
-               alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
-               alu.src[0].sel = 249;
-               alu.src[0].chan = 0;
-               alu.dst.sel = ctx->temp_reg;
-               alu.dst.chan = 3;
-               alu.last = 1;
-               alu.dst.write = 1;
-               r = r600_bc_add_alu(ctx->bc, &alu);
-               if (r)
-                       return r;
-               src_gpr = ctx->temp_reg;
-       }
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+       alu.src[0].sel = ctx->temp_reg;
+       alu.src[0].chan = 3;
+       alu.src[1].sel = src_gpr;
+       alu.src[1].chan = tgsi_chan(&inst->Src[0], 0);
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.chan = 0;
+       alu.dst.write = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+       alu.src[0].sel = ctx->temp_reg;
+       alu.src[0].chan = 3;
+       alu.src[1].sel = src_gpr;
+       alu.src[1].chan = tgsi_chan(&inst->Src[0], 1);
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.chan = 1;
+       alu.dst.write = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL;
+       alu.src[0].sel = ctx->temp_reg;
+       alu.src[0].chan = 3;
+       alu.src[1].sel = src_gpr;
+       alu.src[1].chan = tgsi_chan(&inst->Src[0], 2);
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.chan = 2;
+       alu.dst.write = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       memset(&alu, 0, sizeof(struct r600_bc_alu));
+       alu.inst = V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV;
+       alu.src[0].sel = 249;
+       alu.src[0].chan = 0;
+       alu.dst.sel = ctx->temp_reg;
+       alu.dst.chan = 3;
+       alu.last = 1;
+       alu.dst.write = 1;
+       r = r600_bc_add_alu(ctx->bc, &alu);
+       if (r)
+               return r;
+       src_gpr = ctx->temp_reg;
 
        /* TODO use temp if src_gpr is not a temporary reg (File != TEMPORARY) */
        memset(&tex, 0, sizeof(struct r600_bc_tex));
@@ -1041,6 +1100,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
        tex.src_sel_y = 1;
        tex.src_sel_z = 2;
        tex.src_sel_w = 3;
+
        if (inst->Texture.Texture != TGSI_TEXTURE_RECT) {
                tex.coord_type_x = 1;
                tex.coord_type_y = 1;