r300/compiler: implement SEQ unwound for r3xx VS, natively for r5xx VS
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 02:18:28 +0000 (04:18 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 05:03:15 +0000 (07:03 +0200)
Fixes piglit/glsl-vs-vec4-indexing-4.

src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c

index 02bef5603f38d9e91884e312cb3e4f5f4ecfdc9c..755161cdc766ea5ec53e7e32c8c8977194db3877 100644 (file)
@@ -353,6 +353,8 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                        return;
                }
 
+               assert(compiler->Base.is_r500 || vpi->Opcode != RC_OPCODE_SEQ);
+
                switch (vpi->Opcode) {
                case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
                case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
@@ -372,6 +374,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
                case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
                case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+               case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
                case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
                case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
                default:
index 0240fc0522b41db593b1750732a67d606657741b..67e2cc0c82cc6b729fc31b6c1c01b445b9cdba43 100644 (file)
@@ -600,6 +600,33 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
        inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
 }
 
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+       struct rc_instruction *inst)
+{
+       /* x = y  <==>  x >= y && y >= x */
+       int tmp = rc_find_free_temporary(c);
+
+       /* x <= y */
+       emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+             dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             inst->U.I.SrcReg[1]);
+
+       /* y <= x */
+       emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+             inst->U.I.DstReg,
+             inst->U.I.SrcReg[1],
+             inst->U.I.SrcReg[0]);
+
+       /* x && y  =  x * y */
+       emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, tmp),
+             srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+       rc_remove_instruction(inst);
+}
+
 static void transform_r300_vertex_SGT(struct radeon_compiler* c,
        struct rc_instruction* inst)
 {
@@ -636,6 +663,12 @@ int r300_transform_vertex_alu(
        case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
        case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
        case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+       case RC_OPCODE_SEQ:
+               if (!c->is_r500) {
+                       transform_r300_vertex_SEQ(c, inst);
+                       return 1;
+               }
+               return 0;
        case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
        case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
        case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;