r600g: improve inputs/interpolation handling with llvm backend
authorVadim Girlin <vadimgirlin@gmail.com>
Wed, 23 Jan 2013 20:30:02 +0000 (21:30 +0100)
committerTom Stellard <thomas.stellard@amd.com>
Mon, 28 Jan 2013 18:30:38 +0000 (18:30 +0000)
Get rid of special handling for reserved regs.
Use one intrinsic for all kinds of interpolation.

v2[Vincent Lejeune]: Rebased against current master

Reviewed-by: Tom Stellard <thomas.stellard@amd.com>
Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
src/gallium/drivers/r600/r600_llvm.c
src/gallium/drivers/r600/r600_shader.c
src/gallium/drivers/r600/r600_shader.h
src/gallium/drivers/radeon/radeon_llvm.h

index 9f8a0954f75ee5970e0a41bb367fa03fec61b9c6..913dccc53a2dc5694257e889d41ec295bc449795 100644 (file)
@@ -83,48 +83,40 @@ static LLVMValueRef llvm_fetch_system_value(
 static LLVMValueRef
 llvm_load_input_helper(
        struct radeon_llvm_context * ctx,
-       const char *intrinsic, unsigned idx)
+       unsigned idx, int interp, int ij_index)
 {
-       LLVMValueRef reg = lp_build_const_int32(
-               ctx->soa.bld_base.base.gallivm,
-               idx);
-       return build_intrinsic(
-               ctx->soa.bld_base.base.gallivm->builder,
-               intrinsic,
-               ctx->soa.bld_base.base.elem_type, &reg, 1,
-               LLVMReadNoneAttribute);
+       const struct lp_build_context * bb = &ctx->soa.bld_base.base;
+       LLVMValueRef arg[2];
+       int arg_count;
+       const char * intrinsic;
+
+       arg[0] = lp_build_const_int32(bb->gallivm, idx);
+
+       if (interp) {
+               intrinsic = "llvm.R600.interp.input";
+               arg[1] = lp_build_const_int32(bb->gallivm, ij_index);
+               arg_count = 2;
+       } else {
+               intrinsic = "llvm.R600.load.input";
+               arg_count = 1;
+       }
+
+       return build_intrinsic(bb->gallivm->builder, intrinsic,
+               bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute);
 }
 
 static LLVMValueRef
 llvm_face_select_helper(
        struct radeon_llvm_context * ctx,
-       const char *intrinsic, unsigned face_register,
-       unsigned frontcolor_register, unsigned backcolor_regiser)
+       unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color)
 {
-
-       LLVMValueRef backcolor = llvm_load_input_helper(
-               ctx,
-               intrinsic,
-               backcolor_regiser);
-       LLVMValueRef front_color = llvm_load_input_helper(
-               ctx,
-               intrinsic,
-               frontcolor_register);
-       LLVMValueRef face = llvm_load_input_helper(
-               ctx,
-               "llvm.R600.load.input",
-               face_register);
-       LLVMValueRef is_face_positive = LLVMBuildFCmp(
-               ctx->soa.bld_base.base.gallivm->builder,
-               LLVMRealUGT, face,
-               lp_build_const_float(ctx->soa.bld_base.base.gallivm, 0.0f),
-               "");
-       return LLVMBuildSelect(
-               ctx->soa.bld_base.base.gallivm->builder,
-               is_face_positive,
-               front_color,
-               backcolor,
-               "");
+       const struct lp_build_context * bb = &ctx->soa.bld_base.base;
+       LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0);
+       LLVMValueRef is_front = LLVMBuildFCmp(
+               bb->gallivm->builder, LLVMRealUGT, face,
+               lp_build_const_float(bb->gallivm, 0.0f),        "");
+       return LLVMBuildSelect(bb->gallivm->builder, is_front,
+               front_color, back_color, "");
 }
 
 static void llvm_load_input(
@@ -132,110 +124,59 @@ static void llvm_load_input(
        unsigned input_index,
        const struct tgsi_full_declaration *decl)
 {
+       const struct r600_shader_io * input = &ctx->r600_inputs[input_index];
        unsigned chan;
-
-       const char *intrinsics = "llvm.R600.load.input";
-       unsigned offset = 4 * ctx->reserved_reg_count;
-
-       if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->chip_class >= EVERGREEN) {
-               switch (decl->Interp.Interpolate) {
-               case TGSI_INTERPOLATE_COLOR:
-               case TGSI_INTERPOLATE_PERSPECTIVE:
-                       offset = 0;
-                       intrinsics = "llvm.R600.load.input.perspective";
-                       break;
-               case TGSI_INTERPOLATE_LINEAR:
-                       offset = 0;
-                       intrinsics = "llvm.R600.load.input.linear";
-                       break;
-               case TGSI_INTERPOLATE_CONSTANT:
-                       offset = 0;
-                       intrinsics = "llvm.R600.load.input.constant";
-                       break;
-               default:
-                       assert(0 && "Unknow Interpolate mode");
-               }
+       unsigned interp = 0;
+       int ij_index;
+       int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR);
+       LLVMValueRef v;
+
+       if (ctx->chip_class >= EVERGREEN && ctx->type == TGSI_PROCESSOR_FRAGMENT &&
+                       input->spi_sid) {
+               interp = 1;
+               ij_index = (input->interpolate > 0) ? input->ij_index : -1;
        }
 
        for (chan = 0; chan < 4; chan++) {
-               unsigned soa_index = radeon_llvm_reg_index_soa(input_index,
-                                                               chan);
-
-               switch (decl->Semantic.Name) {
-               case TGSI_SEMANTIC_FACE:
-                       ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
-                               "llvm.R600.load.input",
-                               4 * ctx->face_input);
-                       break;
-               case TGSI_SEMANTIC_POSITION:
-                       if (ctx->type != TGSI_PROCESSOR_FRAGMENT || chan != 3) {
-                               ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
-                                       "llvm.R600.load.input",
-                                       soa_index + (ctx->reserved_reg_count * 4));
-                       } else {
-                               LLVMValueRef w_coord = llvm_load_input_helper(ctx,
-                               "llvm.R600.load.input",
-                               soa_index + (ctx->reserved_reg_count * 4));
-                               ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder,
-                               lp_build_const_float(&(ctx->gallivm), 1.0f), w_coord, "");
-                       }
-                       break;
-               case TGSI_SEMANTIC_COLOR:
-                       if (ctx->two_side) {
-                               unsigned front_location, back_location;
-                               unsigned back_reg = ctx->r600_inputs[input_index]
-                                       .potential_back_facing_reg;
-                               if (ctx->chip_class >= EVERGREEN) {
-                                       front_location = 4 * ctx->r600_inputs[input_index].lds_pos + chan;
-                                       back_location = 4 * ctx->r600_inputs[back_reg].lds_pos + chan;
-                               } else {
-                                       front_location = soa_index + 4 * ctx->reserved_reg_count;
-                                       back_location = radeon_llvm_reg_index_soa(
-                                               ctx->r600_inputs[back_reg].gpr,
-                                               chan);
-                               }
-                               ctx->inputs[soa_index] = llvm_face_select_helper(ctx,
-                                       intrinsics,
-                                       4 * ctx->face_input, front_location, back_location);
-                               break;
-                       }
-               default:
-                       {
-                               unsigned location;
-                               if (ctx->chip_class >= EVERGREEN) {
-                                       location = 4 * ctx->r600_inputs[input_index].lds_pos + chan;
-                               } else {
-                                       location = soa_index + 4 * ctx->reserved_reg_count;
-                               }
-                               /* The * 4 is assuming that we are in soa mode. */
-                               ctx->inputs[soa_index] = llvm_load_input_helper(ctx,
-                                       intrinsics, location);
-                                       
-                       break;
-                       }
+               unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
+               int loc;
+
+               if (interp) {
+                       loc = 4 * input->lds_pos + chan;
+               } else {
+                       if (input->name == TGSI_SEMANTIC_FACE)
+                               loc = 4 * ctx->face_gpr;
+                       else
+                               loc = 4 * input->gpr + chan;
+               }
+
+               v = llvm_load_input_helper(ctx, loc, interp, ij_index);
+
+               if (two_side) {
+                       struct r600_shader_io * back_input =
+                                       &ctx->r600_inputs[input->back_color_input];
+                       int back_loc = interp ? back_input->lds_pos : back_input->gpr;
+                       LLVMValueRef v2;
+
+                       back_loc = 4 * back_loc + chan;
+                       v2 = llvm_load_input_helper(ctx, back_loc, interp, ij_index);
+                       v = llvm_face_select_helper(ctx, 4 * ctx->face_gpr, v, v2);
+               } else if (input->name == TGSI_SEMANTIC_POSITION &&
+                               ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) {
+                       /* RCP for fragcoord.w */
+                       v = LLVMBuildFDiv(ctx->gallivm.builder,
+                                       lp_build_const_float(&(ctx->gallivm), 1.0f),
+                                       v, "");
                }
+
+               ctx->inputs[soa_index] = v;
        }
 }
 
 static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
 {
        struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
-       struct lp_build_context * base = &bld_base->base;
-       unsigned i;
 
-       /* Reserve special input registers */
-       for (i = 0; i < ctx->reserved_reg_count; i++) {
-               unsigned chan;
-               for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                       LLVMValueRef reg_index = lp_build_const_int32(
-                                       base->gallivm,
-                                       radeon_llvm_reg_index_soa(i, chan));
-                       lp_build_intrinsic_unary(base->gallivm->builder,
-                               "llvm.AMDGPU.reserve.reg",
-                               LLVMVoidTypeInContext(base->gallivm->context),
-                               reg_index);
-               }
-       }
 }
 
 static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
index c16e467c853bbcb489aec1ca5b94f230c87ea359..e8992ba5bed1a8b8156cbe1cfb88880d910fe576 100644 (file)
@@ -650,19 +650,15 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
        return 0;
 }
 
-static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
+static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx,
+               int input)
 {
-       int i, r;
-       struct r600_bytecode_alu alu;
-       int gpr = 0, base_chan = 0;
        int ij_index = 0;
 
        if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) {
-               ij_index = 0;
                if (ctx->shader->input[input].centroid)
                        ij_index++;
        } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) {
-               ij_index = 0;
                /* if we have perspective add one */
                if (ctx->input_perspective)  {
                        ij_index++;
@@ -674,6 +670,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
                        ij_index++;
        }
 
+       ctx->shader->input[input].ij_index = ij_index;
+}
+
+static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input)
+{
+       int i, r;
+       struct r600_bytecode_alu alu;
+       int gpr = 0, base_chan = 0;
+       int ij_index = ctx->shader->input[input].ij_index;
+
        /* work out gpr and base_chan from index */
        gpr = ij_index / 2;
        base_chan = (2 * (ij_index % 2)) + 1;
@@ -806,12 +812,13 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index)
 
        if (ctx->shader->input[index].spi_sid) {
                ctx->shader->input[index].lds_pos = ctx->shader->nlds++;
-               if (!ctx->use_llvm) {
-                       if (ctx->shader->input[index].interpolate > 0) {
+               if (ctx->shader->input[index].interpolate > 0) {
+                       evergreen_interp_assign_ij_index(ctx, index);
+                       if (!ctx->use_llvm)
                                r = evergreen_interp_alu(ctx, index);
-                       } else {
+               } else {
+                       if (!ctx->use_llvm)
                                r = evergreen_interp_flat(ctx, index);
-                       }
                }
        }
        return r;
@@ -857,11 +864,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                i = ctx->shader->ninput++;
                ctx->shader->input[i].name = d->Semantic.Name;
                ctx->shader->input[i].sid = d->Semantic.Index;
-               ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
                ctx->shader->input[i].interpolate = d->Interp.Interpolate;
                ctx->shader->input[i].centroid = d->Interp.Centroid;
                ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First;
                if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                       ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]);
                        switch (ctx->shader->input[i].name) {
                        case TGSI_SEMANTIC_FACE:
                                ctx->face_gpr = ctx->shader->input[i].gpr;
@@ -883,11 +890,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
                i = ctx->shader->noutput++;
                ctx->shader->output[i].name = d->Semantic.Name;
                ctx->shader->output[i].sid = d->Semantic.Index;
-               ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
                ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First;
                ctx->shader->output[i].interpolate = d->Interp.Interpolate;
                ctx->shader->output[i].write_mask = d->Declaration.UsageMask;
                if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                       ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);
                        switch (d->Semantic.Name) {
                        case TGSI_SEMANTIC_CLIPDIST:
                                ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2);
@@ -1193,17 +1200,9 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx)
 
        for (i = 0; i < count; i++) {
                if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) {
-                       unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg;
-                       if (ctx->bc->chip_class >= EVERGREEN) {
-                               if ((r = evergreen_interp_input(ctx, back_facing_reg)))
-                                       return r;
-                       }
-                       
-                       if (!ctx->use_llvm) {
-                               r = select_twoside_color(ctx, i, back_facing_reg);
-                               if (r)
-                                       return r;
-                       }
+                       r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input);
+                       if (r)
+                               return r;
                }
        }
        return 0;
@@ -1396,7 +1395,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                // TGSI to LLVM needs to know the lds position of inputs.
                                // Non LLVM path computes it later (in process_twoside_color)
                                ctx.shader->input[ni].lds_pos = next_lds_loc++;
-                               ctx.shader->input[i].potential_back_facing_reg = ni;
+                               ctx.shader->input[i].back_color_input = ni;
+                               if (ctx.bc->chip_class >= EVERGREEN) {
+                                       if ((r = evergreen_interp_input(&ctx, ni)))
+                                               return r;
+                               }
                        }
                }
        }
@@ -1408,10 +1411,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                LLVMModuleRef mod;
                unsigned dump = 0;
                memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx));
-               radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT];
                radeon_llvm_ctx.type = ctx.type;
                radeon_llvm_ctx.two_side = shader->two_side;
-               radeon_llvm_ctx.face_input = ctx.face_gpr;
+               radeon_llvm_ctx.face_gpr = ctx.face_gpr;
                radeon_llvm_ctx.r600_inputs = ctx.shader->input;
                radeon_llvm_ctx.r600_outputs = ctx.shader->output;
                radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1);
@@ -1442,9 +1444,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
        if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN)
                shader->nr_ps_max_color_exports = 8;
 
-       if (ctx.fragcoord_input >= 0 && !use_llvm) {
-               if (ctx.bc->chip_class == CAYMAN) {
-                       for (j = 0 ; j < 4; j++) {
+       if (!use_llvm) {
+               if (ctx.fragcoord_input >= 0) {
+                       if (ctx.bc->chip_class == CAYMAN) {
+                               for (j = 0 ; j < 4; j++) {
+                                       struct r600_bytecode_alu alu;
+                                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+                                       alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
+                                       alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
+                                       alu.src[0].chan = 3;
+
+                                       alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
+                                       alu.dst.chan = j;
+                                       alu.dst.write = (j == 3);
+                                       alu.last = 1;
+                                       if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+                                               return r;
+                               }
+                       } else {
                                struct r600_bytecode_alu alu;
                                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
                                alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
@@ -1452,65 +1469,49 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
                                alu.src[0].chan = 3;
 
                                alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
-                               alu.dst.chan = j;
-                               alu.dst.write = (j == 3);
+                               alu.dst.chan = 3;
+                               alu.dst.write = 1;
                                alu.last = 1;
                                if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
                                        return r;
                        }
-               } else {
-                       struct r600_bytecode_alu alu;
-                       memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-                       alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE);
-                       alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr;
-                       alu.src[0].chan = 3;
+               }
 
-                       alu.dst.sel = shader->input[ctx.fragcoord_input].gpr;
-                       alu.dst.chan = 3;
-                       alu.dst.write = 1;
-                       alu.last = 1;
-                       if ((r = r600_bytecode_add_alu(ctx.bc, &alu)))
+               if (shader->two_side && ctx.colors_used) {
+                       if ((r = process_twoside_color_inputs(&ctx)))
                                return r;
                }
-       }
-
-       if (shader->two_side && ctx.colors_used) {
-               if ((r = process_twoside_color_inputs(&ctx)))
-                       return r;
-       }
 
-       tgsi_parse_init(&ctx.parse, tokens);
-       while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
-               tgsi_parse_token(&ctx.parse);
-               switch (ctx.parse.FullToken.Token.Type) {
-               case TGSI_TOKEN_TYPE_INSTRUCTION:
-                       if (use_llvm) {
-                               continue;
+               tgsi_parse_init(&ctx.parse, tokens);
+               while (!tgsi_parse_end_of_tokens(&ctx.parse)) {
+                       tgsi_parse_token(&ctx.parse);
+                       switch (ctx.parse.FullToken.Token.Type) {
+                       case TGSI_TOKEN_TYPE_INSTRUCTION:
+                               r = tgsi_is_supported(&ctx);
+                               if (r)
+                                       goto out_err;
+                               ctx.max_driver_temp_used = 0;
+                               /* reserve first tmp for everyone */
+                               r600_get_temp(&ctx);
+
+                               opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
+                               if ((r = tgsi_split_constant(&ctx)))
+                                       goto out_err;
+                               if ((r = tgsi_split_literal_constant(&ctx)))
+                                       goto out_err;
+                               if (ctx.bc->chip_class == CAYMAN)
+                                       ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
+                               else if (ctx.bc->chip_class >= EVERGREEN)
+                                       ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
+                               else
+                                       ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
+                               r = ctx.inst_info->process(&ctx);
+                               if (r)
+                                       goto out_err;
+                               break;
+                       default:
+                               break;
                        }
-                       r = tgsi_is_supported(&ctx);
-                       if (r)
-                               goto out_err;
-                       ctx.max_driver_temp_used = 0;
-                       /* reserve first tmp for everyone */
-                       r600_get_temp(&ctx);
-
-                       opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode;
-                       if ((r = tgsi_split_constant(&ctx)))
-                               goto out_err;
-                       if ((r = tgsi_split_literal_constant(&ctx)))
-                               goto out_err;
-                       if (ctx.bc->chip_class == CAYMAN)
-                               ctx.inst_info = &cm_shader_tgsi_instruction[opcode];
-                       else if (ctx.bc->chip_class >= EVERGREEN)
-                               ctx.inst_info = &eg_shader_tgsi_instruction[opcode];
-                       else
-                               ctx.inst_info = &r600_shader_tgsi_instruction[opcode];
-                       r = ctx.inst_info->process(&ctx);
-                       if (r)
-                               goto out_err;
-                       break;
-               default:
-                       break;
                }
        }
 
index d61efcb1a726ebd874f2cb710c0a9b2e4c11081c..f55e002aea65874526fae8c6cbe5e411ef0d82ee 100644 (file)
@@ -32,9 +32,10 @@ struct r600_shader_io {
        int                     sid;
        int                     spi_sid;
        unsigned                interpolate;
+       unsigned                ij_index;
        boolean                 centroid;
        unsigned                lds_pos; /* for evergreen */
-       unsigned                potential_back_facing_reg;
+       unsigned                back_color_input;
        unsigned                write_mask;
 };
 
index 1edcbd46d721dc1f26ccab379682b77efe867a3d..64d838ab46bdfc3ac27402cc8970c5a310849fa6 100644 (file)
@@ -56,7 +56,7 @@ struct radeon_llvm_context {
 
        unsigned chip_class;
        unsigned type;
-       unsigned face_input;
+       unsigned face_gpr;
        unsigned two_side;
        unsigned clip_vertex;
        struct r600_shader_io * r600_inputs;
@@ -108,7 +108,6 @@ struct radeon_llvm_context {
 
        LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
 
-       unsigned reserved_reg_count;
        /*=== Private Members ===*/
 
        struct radeon_llvm_branch branch[RADEON_LLVM_MAX_BRANCH_DEPTH];