From: Ben Skeggs Date: Sat, 28 May 2005 03:19:56 +0000 (+0000) Subject: Play with swizzling a bit. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=908dbc44c23ee96415ba758dc05050ae5fb0ca3c;p=mesa.git Play with swizzling a bit. --- diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 7090756d0b5..de6a36f50bc 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -40,6 +40,8 @@ * fglrx does (see r300_reg.h). * - Verify results of opcodes for accuracy, I've only checked them * in specific cases. + * - Learn more about interaction between xyz/w units.. A few bugs are + * caused by something I'm missing.. * - and more... */ @@ -170,10 +172,7 @@ const pfs_reg_t pfs_default_reg = { index: 0, v_swz: 0 /* matches XYZ in table */, s_swz: SWIZZLE_W, - vcross: 0, - scross: 0, negate: 0, - has_w: GL_FALSE, valid: GL_FALSE }; @@ -297,25 +296,26 @@ static int swz_native(struct r300_fragment_program *rp, { /* Native swizzle, nothing to see here */ *r = src; - r->has_w = GL_TRUE; return 3; } static int swz_emit_partial(struct r300_fragment_program *rp, - pfs_reg_t src, pfs_reg_t *r, int mask) + pfs_reg_t src, pfs_reg_t *r, int mask, int mc) { if (!r->valid) *r = get_temp_reg(rp); /* A partial match, src.v_swz/mask define what parts of the * desired swizzle we match */ - emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); - + if (mc + s_mask[mask].count == 3) + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, src, pfs_one, pfs_zero, 0); + else + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); return s_mask[mask].count; } static int swz_special_case(struct r300_fragment_program *rp, - pfs_reg_t src, pfs_reg_t *r, int mask) + pfs_reg_t src, pfs_reg_t *r, int mask, int mc) { pfs_reg_t ssrc = pfs_default_reg; @@ -323,13 +323,18 @@ static int swz_special_case(struct r300_fragment_program *rp, case SWIZZLE_W: ssrc = get_temp_reg(rp); src.v_swz = SWIZZLE_WZY; - src.vcross = GL_TRUE; if (s_mask[mask].count == 3) { emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0); *r = ssrc; r->v_swz = SWIZZLE_XXX; r->s_swz = SWIZZLE_W; - r->has_w = GL_TRUE; + } else if (mc + s_mask[mask].count == 3) { + if (!r->valid) + *r = get_temp_reg(rp); + emit_arith(rp, PFS_OP_MAD, ssrc, WRITEMASK_XW, src, pfs_one, pfs_zero, 0); + ssrc.v_swz = SWIZZLE_XXX; + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, ssrc, pfs_one, pfs_zero, 0); + free_temp(rp, ssrc); } else { if (!r->valid) *r = get_temp_reg(rp); @@ -341,6 +346,13 @@ static int swz_special_case(struct r300_fragment_program *rp, break; case SWIZZLE_ONE: case SWIZZLE_ZERO: + if (!r->valid) + *r = get_temp_reg(rp); + if (mc + s_mask[mask].count == 3) + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask|WRITEMASK_W, src, pfs_one, pfs_zero, 0); + else + emit_arith(rp, PFS_OP_MAD, *r, s_mask[mask].mask, src, pfs_one, pfs_zero, 0); + break; default: ERROR("Unknown special-case swizzle! %d\n", src.v_swz); return 0; @@ -359,38 +371,27 @@ static pfs_reg_t swizzle(struct r300_fragment_program *rp, int v_matched = 0; src.v_swz = SWIZZLE_XYZ; src.s_swz = GET_SWZ(arbswz, 3); - if (src.s_swz >= SWIZZLE_X && src.s_swz <= SWIZZLE_Z) - src.scross = GL_TRUE; do { do { #define CUR_HASH (v_swiz[src.v_swz].hash & s_mask[c_mask].hash) if (CUR_HASH == (arbswz & s_mask[c_mask].hash)) { if (v_swiz[src.v_swz].native == GL_FALSE) - v_matched += swz_special_case(rp, src, &r, c_mask); + v_matched += swz_special_case(rp, src, &r, c_mask, v_matched); else if (s_mask[c_mask].count == 3) v_matched += swz_native(rp, src, &r); else - v_matched += swz_emit_partial(rp, src, &r, c_mask); - - if (v_matched == 3) { - if (!r.has_w) { - emit_arith(rp, PFS_OP_MAD, r, WRITEMASK_W, src, pfs_one, pfs_zero, 0); - r.s_swz = SWIZZLE_W; - } - - if (r.type != REG_TYPE_CONST) { - if (r.v_swz == SWIZZLE_WZY) - r.vcross = GL_TRUE; - if (r.s_swz >= SWIZZLE_X && r.s_swz <= SWIZZLE_Z) - r.scross = GL_TRUE; - } + v_matched += swz_emit_partial(rp, src, &r, c_mask, v_matched); + + if (v_matched == 3) return r; - } - - arbswz &= ~s_mask[c_mask].hash; + + /* Fill with something invalid.. all 0's was wrong before, matched + * SWIZZLE_X. So all 1's will be okay for now */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); } } while(v_swiz[++src.v_swz].hash != PFS_INVAL); + src.v_swz = SWIZZLE_XYZ; } while (s_mask[++c_mask].hash != PFS_INVAL); ERROR("should NEVER get here\n"); @@ -612,8 +613,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op, break; case REG_TYPE_TEMP: /* make sure insn ordering is right... */ - if ((src[i].vcross && v_idx < s_idx) || - (src[i].scross && s_idx < v_idx)) { + if ((v_swiz[src[i].v_swz].dep_sca && v_idx < s_idx) || + (s_swiz[src[i].s_swz].dep_vec && s_idx < v_idx)) { sync_streams(rp); v_idx = s_idx = rp->v_pos; } @@ -685,7 +686,8 @@ static void emit_arith(struct r300_fragment_program *rp, int op, rp->s_pos = s_idx + 1; } -// sync_streams(rp); +/* Force this for now */ + sync_streams(rp); return; }; @@ -776,6 +778,7 @@ static GLboolean parse_program(struct r300_fragment_program *rp) ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; case FP_OPCODE_MOV: + case FP_OPCODE_SWZ: emit_arith(rp, PFS_OP_MAD, t_dst(rp, fpi->DstReg), fpi->DstReg.WriteMask, t_src(rp, fpi->SrcReg[0]), pfs_one, pfs_zero, flags); @@ -826,9 +829,6 @@ static GLboolean parse_program(struct r300_fragment_program *rp) negate(t_src(rp, fpi->SrcReg[1])), flags); break; - case FP_OPCODE_SWZ: - ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); - break; case FP_OPCODE_TEX: emit_tex(rp, fpi, R300_FPITX_OP_TEX); break; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 26e4ae56a95..b60f6743352 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -20,10 +20,7 @@ typedef struct _pfs_reg_t { GLuint index:6; GLuint v_swz:5; GLuint s_swz:5; - GLboolean vcross:1; - GLboolean scross:1; GLuint negate:1; //XXX: we need to handle negate individually - GLboolean has_w:1; GLboolean valid:1; } pfs_reg_t;