r600g/sb: initial commit of the optimizing shader backend
[mesa.git] / src / gallium / drivers / r600 / r600_shader.c
index 465186d8a494d7c35eed8dc8614a3994f32e1aa6..f0d3be405d23ae089f1317cb120261c807fa3eb6 100644 (file)
@@ -271,14 +271,16 @@ int r600_compute_shader_create(struct pipe_context * ctx,
        unsigned char * bytes;
        unsigned byte_count;
        struct r600_shader_ctx shader_ctx;
+       boolean use_kill = false;
        bool dump = (r600_ctx->screen->debug_flags & DBG_CS) != 0;
 
-       r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family , dump);
        shader_ctx.bc = bytecode;
        r600_bytecode_init(shader_ctx.bc, r600_ctx->chip_class, r600_ctx->family,
                           r600_ctx->screen->msaa_texture_support);
        shader_ctx.bc->type = TGSI_PROCESSOR_COMPUTE;
        shader_ctx.bc->isa = r600_ctx->isa;
+       r600_llvm_compile(mod, &bytes, &byte_count, r600_ctx->family,
+                               shader_ctx.bc, &use_kill, dump);
        r600_bytecode_from_byte_stream(&shader_ctx, bytes, byte_count);
        if (shader_ctx.bc->chip_class == CAYMAN) {
                cm_bytecode_add_cf_end(shader_ctx.bc);
@@ -590,7 +592,6 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx,
                                unsigned char * bytes,  unsigned num_bytes)
 {
        unsigned bytes_read = 0;
-       ctx->bc->nstack = bytes[bytes_read++];
        unsigned i, byte;
        while (bytes_read < num_bytes) {
                char inst_type = bytes[bytes_read++];
@@ -1445,6 +1446,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                struct radeon_llvm_context radeon_llvm_ctx;
                LLVMModuleRef mod;
                bool dump = r600_can_dump_shader(rscreen, ctx.type);
+               boolean use_kill = false;
 
                memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
                radeon_llvm_ctx.type = ctx.type;
@@ -1461,7 +1463,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                mod = r600_tgsi_llvm(&radeon_llvm_ctx, tokens);
 
                if (r600_llvm_compile(mod, &inst_bytes, &inst_byte_count,
-                                     rscreen->family, &ctx.bc->ngpr, dump)) {
+                                     rscreen->family, ctx.bc, &use_kill, dump)) {
                        FREE(inst_bytes);
                        radeon_llvm_dispose(&radeon_llvm_ctx);
                        use_llvm = 0;
@@ -1471,6 +1473,8 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                        ctx.file_offset[TGSI_FILE_OUTPUT] =
                                        ctx.file_offset[TGSI_FILE_INPUT];
                }
+               if (use_kill)
+                       ctx.shader->uses_kill = use_kill;
                radeon_llvm_dispose(&radeon_llvm_ctx);
        }
 #endif
@@ -1898,7 +1902,7 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                }
        }
        /* add program end */
-       if (ctx.bc->chip_class == CAYMAN)
+       if (!use_llvm && ctx.bc->chip_class == CAYMAN)
                cm_bytecode_add_cf_end(ctx.bc);
 
        /* check GPR limit - we have 124 = 128 - 4
@@ -4739,6 +4743,26 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
                /* the array index is read from Z */
                tex.coord_type_z = 0;
 
+       /* mask unused source components */
+       if (opcode == FETCH_OP_SAMPLE) {
+               switch (inst->Texture.Texture) {
+               case TGSI_TEXTURE_2D:
+               case TGSI_TEXTURE_RECT:
+                       tex.src_sel_z = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               case TGSI_TEXTURE_1D_ARRAY:
+                       tex.src_sel_y = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               case TGSI_TEXTURE_1D:
+                       tex.src_sel_y = 7;
+                       tex.src_sel_z = 7;
+                       tex.src_sel_w = 7;
+                       break;
+               }
+       }
+
        r = r600_bytecode_add_tex(ctx->bc, &tex);
        if (r)
                return r;