r600g: add support for optionally using non-IEEE mul ops
authorIlia Mirkin <imirkin@alum.mit.edu>
Tue, 24 Jan 2017 02:02:28 +0000 (21:02 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Sun, 29 Jan 2017 04:59:43 +0000 (23:59 -0500)
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/r600/r600_pipe.c
src/gallium/drivers/r600/r600_shader.c

index 7e58feaeb44052d8621d2d117075961d5100ebf7..d48c56635feb315695c3ad75bbb728d683ed21a7 100644 (file)
@@ -286,6 +286,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
        case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
        case PIPE_CAP_CLEAR_TEXTURE:
+       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
                return 1;
 
        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
@@ -378,7 +379,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
        case PIPE_CAP_NATIVE_FENCE_FD:
        case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
        case PIPE_CAP_TGSI_FS_FBFETCH:
-       case PIPE_CAP_TGSI_MUL_ZERO_WINS:
        case PIPE_CAP_INT64:
                return 0;
 
index eaabb042f971a04d1d8405048c8e2092d264d35a..d3f34ddf8f4ecbb54ef97894b6a2d872a7fc123f 100644 (file)
@@ -3906,6 +3906,11 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
        int i, j, r, lasti = tgsi_last_instruction(write_mask);
        /* use temp register if trans_only and more than one dst component */
        int use_tmp = trans_only && (write_mask ^ (1 << lasti));
+       unsigned op = ctx->inst_info->op;
+
+       if (op == ALU_OP2_MUL_IEEE &&
+           ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+               op = ALU_OP2_MUL;
 
        for (i = 0; i <= lasti; i++) {
                if (!(write_mask & (1 << i)))
@@ -3919,7 +3924,7 @@ static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only)
                } else
                        tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
 
-               alu.op = ctx->inst_info->op;
+               alu.op = op;
                if (!swap) {
                        for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
                                r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
@@ -6543,6 +6548,11 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
        int i, j, r;
        int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
        int temp_regs[4];
+       unsigned op = ctx->inst_info->op;
+
+       if (op == ALU_OP3_MULADD_IEEE &&
+           ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+               op = ALU_OP3_MULADD;
 
        for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
                temp_regs[j] = 0;
@@ -6554,7 +6564,7 @@ static int tgsi_op3(struct r600_shader_ctx *ctx)
                        continue;
 
                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-               alu.op = ctx->inst_info->op;
+               alu.op = op;
                for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
                        r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]);
                        if (r)
@@ -6580,10 +6590,14 @@ static int tgsi_dp(struct r600_shader_ctx *ctx)
        struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
        struct r600_bytecode_alu alu;
        int i, j, r;
+       unsigned op = ctx->inst_info->op;
+       if (op == ALU_OP2_DOT4_IEEE &&
+           ctx->info.properties[TGSI_PROPERTY_MUL_ZERO_WINS])
+               op = ALU_OP2_DOT4;
 
        for (i = 0; i < 4; i++) {
                memset(&alu, 0, sizeof(struct r600_bytecode_alu));
-               alu.op = ctx->inst_info->op;
+               alu.op = op;
                for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
                        r600_bytecode_src(&alu.src[j], &ctx->src[j], i);
                }