r300/compiler: implement SNE unwound for r3xx VS, natively for r5xx VS
authorMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 02:58:40 +0000 (04:58 +0200)
committerMarek Olšák <maraeo@gmail.com>
Sat, 5 Jun 2010 05:03:15 +0000 (07:03 +0200)
src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c

index 755161cdc766ea5ec53e7e32c8c8977194db3877..dd307856a77de667fc2cc547dd8593bdb0a27df9 100644 (file)
@@ -353,7 +353,9 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                        return;
                }
 
-               assert(compiler->Base.is_r500 || vpi->Opcode != RC_OPCODE_SEQ);
+               assert(compiler->Base.is_r500 ||
+                      (vpi->Opcode != RC_OPCODE_SEQ &&
+                       vpi->Opcode != RC_OPCODE_SNE));
 
                switch (vpi->Opcode) {
                case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
@@ -377,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
                case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
                case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
                case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+               case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
                default:
                        rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
                        return;
index 67e2cc0c82cc6b729fc31b6c1c01b445b9cdba43..d14de79565d667a7b552bb02cd9527722ac5fa08 100644 (file)
@@ -627,6 +627,33 @@ static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
        rc_remove_instruction(inst);
 }
 
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+       struct rc_instruction *inst)
+{
+       /* x != y  <==>  x < y || y < x */
+       int tmp = rc_find_free_temporary(c);
+
+       /* x < y */
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+             inst->U.I.SrcReg[0],
+             inst->U.I.SrcReg[1]);
+
+       /* y < x */
+       emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+             inst->U.I.DstReg,
+             inst->U.I.SrcReg[1],
+             inst->U.I.SrcReg[0]);
+
+       /* x || y  =  max(x, y) */
+       emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+             inst->U.I.DstReg,
+             srcreg(RC_FILE_TEMPORARY, tmp),
+             srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+       rc_remove_instruction(inst);
+}
+
 static void transform_r300_vertex_SGT(struct radeon_compiler* c,
        struct rc_instruction* inst)
 {
@@ -672,6 +699,12 @@ int r300_transform_vertex_alu(
        case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
        case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
        case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+       case RC_OPCODE_SNE:
+               if (!c->is_r500) {
+                       transform_r300_vertex_SNE(c, inst);
+                       return 1;
+               }
+               return 0;
        case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
        case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
        case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;