From: Marek Olšák Date: Sat, 5 Jun 2010 02:58:40 +0000 (+0200) Subject: r300/compiler: implement SNE unwound for r3xx VS, natively for r5xx VS X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=722db9fa77de16b92adf19fda00c9671d896749e;p=mesa.git r300/compiler: implement SNE unwound for r3xx VS, natively for r5xx VS --- diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 755161cdc76..dd307856a77 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -353,7 +353,9 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi return; } - assert(compiler->Base.is_r500 || vpi->Opcode != RC_OPCODE_SEQ); + assert(compiler->Base.is_r500 || + (vpi->Opcode != RC_OPCODE_SEQ && + vpi->Opcode != RC_OPCODE_SNE)); switch (vpi->Opcode) { case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break; @@ -377,6 +379,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break; case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; + case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; default: rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 67e2cc0c82c..d14de79565d 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -627,6 +627,33 @@ static void transform_r300_vertex_SEQ(struct radeon_compiler *c, rc_remove_instruction(inst); } +static void transform_r300_vertex_SNE(struct radeon_compiler *c, + struct rc_instruction *inst) +{ + /* x != y <==> x < y || y < x */ + int tmp = rc_find_free_temporary(c); + + /* x < y */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + inst->U.I.SrcReg[1]); + + /* y < x */ + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + inst->U.I.DstReg, + inst->U.I.SrcReg[1], + inst->U.I.SrcReg[0]); + + /* x || y = max(x, y) */ + emit2(c, inst->Prev, RC_OPCODE_MAX, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp), + srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); + + rc_remove_instruction(inst); +} + static void transform_r300_vertex_SGT(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -672,6 +699,12 @@ int r300_transform_vertex_alu( case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; + case RC_OPCODE_SNE: + if (!c->is_r500) { + transform_r300_vertex_SNE(c, inst); + return 1; + } + return 0; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;