X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi915%2Fi915_fragprog.c;h=1a5943c87fba4dc5ae64f62db266934d3638b944;hb=0febd0ecfd1e2a36381ab7793811b9c7891ed82f;hp=23f2f0f9da055948319a7f526c94b733d420afbb;hpb=8d418d16165624a59b2049d4097b4ab0dc82ffa9;p=mesa.git diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 23f2f0f9da0..1a5943c87fb 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2003 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0, -1.0 / (6 * 5 * 4 * 3 * 2 * 1) }; +/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */ +#define TEXCOORD_TEX (0<<7) +#define TEXCOORD_VAR (1<<7) + +static unsigned +get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord) +{ + for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { + if (p->texcoord_mapping[i] == texcoord) + return i; + } + + /* blah */ + return p->ctx->Const.MaxTextureCoordUnits - 1; +} + /** * Retrieve a ureg for the given source register. Will emit * constants, apply swizzling and negation as needed. @@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p, const struct gl_fragment_program *program) { GLuint src; + unsigned unit; switch (source->File) { @@ -97,43 +114,47 @@ src_vector(struct i915_fragment_program *p, break; case PROGRAM_INPUT: switch (source->Index) { - case FRAG_ATTRIB_WPOS: + case VARYING_SLOT_POS: src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); break; - case FRAG_ATTRIB_COL0: + case VARYING_SLOT_COL0: src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); break; - case FRAG_ATTRIB_COL1: + case VARYING_SLOT_COL1: src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); src = swizzle(src, X, Y, Z, ONE); break; - case FRAG_ATTRIB_FOGC: + case VARYING_SLOT_FOGC: src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); src = swizzle(src, W, ZERO, ZERO, ONE); break; - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: + case VARYING_SLOT_TEX0: + case VARYING_SLOT_TEX1: + case VARYING_SLOT_TEX2: + case VARYING_SLOT_TEX3: + case VARYING_SLOT_TEX4: + case VARYING_SLOT_TEX5: + case VARYING_SLOT_TEX6: + case VARYING_SLOT_TEX7: + unit = get_texcoord_mapping(p, (source->Index - + VARYING_SLOT_TEX0) | TEXCOORD_TEX); src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), + T_TEX0 + unit, D0_CHANNEL_ALL); break; - case FRAG_ATTRIB_VAR0: - case FRAG_ATTRIB_VAR0 + 1: - case FRAG_ATTRIB_VAR0 + 2: - case FRAG_ATTRIB_VAR0 + 3: - case FRAG_ATTRIB_VAR0 + 4: - case FRAG_ATTRIB_VAR0 + 5: - case FRAG_ATTRIB_VAR0 + 6: - case FRAG_ATTRIB_VAR0 + 7: + case VARYING_SLOT_VAR0: + case VARYING_SLOT_VAR0 + 1: + case VARYING_SLOT_VAR0 + 2: + case VARYING_SLOT_VAR0 + 3: + case VARYING_SLOT_VAR0 + 4: + case VARYING_SLOT_VAR0 + 5: + case VARYING_SLOT_VAR0 + 6: + case VARYING_SLOT_VAR0 + 7: + unit = get_texcoord_mapping(p, (source->Index - + VARYING_SLOT_VAR0) | TEXCOORD_VAR); src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0), + T_TEX0 + unit, D0_CHANNEL_ALL); break; @@ -146,6 +167,7 @@ src_vector(struct i915_fragment_program *p, case PROGRAM_OUTPUT: switch (source->Index) { case FRAG_RESULT_COLOR: + case FRAG_RESULT_DATA0: src = UREG(REG_TYPE_OC, 0); break; case FRAG_RESULT_DEPTH: @@ -160,17 +182,6 @@ src_vector(struct i915_fragment_program *p, /* Various paramters and env values. All emitted to * hardware as program constants. */ - case PROGRAM_LOCAL_PARAM: - src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]); - break; - - case PROGRAM_ENV_PARAM: - src = - i915_emit_param4fv(p, - p->ctx->FragmentProgram.Parameters[source-> - Index]); - break; - case PROGRAM_CONSTANT: case PROGRAM_STATE_VAR: case PROGRAM_UNIFORM: @@ -230,7 +241,7 @@ get_result_flags(const struct prog_instruction *inst) { GLuint flags = 0; - if (inst->SaturateMode == SATURATE_ZERO_ONE) + if (inst->Saturate) flags |= A0_DEST_SATURATE; if (inst->DstReg.WriteMask & WRITEMASK_X) flags |= A0_DEST_CHANNEL_X; @@ -681,21 +692,7 @@ upload_program(struct i915_fragment_program *p) break; case OPCODE_MIN: - src0 = src_vector(p, &inst->SrcReg[0], program); - src1 = src_vector(p, &inst->SrcReg[1], program); - tmp = i915_get_utemp(p); - flags = get_result_flags(inst); - - i915_emit_arith(p, - A0_MAX, - tmp, flags & A0_DEST_CHANNEL_ALL, 0, - negate(src0, 1, 1, 1, 1), - negate(src1, 1, 1, 1, 1), 0); - - i915_emit_arith(p, - A0_MOV, - get_result_vector(p, inst), - flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + EMIT_2ARG_ARITH(A0_MIN); break; case OPCODE_MOV: @@ -817,23 +814,52 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); + /* If both operands are uniforms or constants, we get 5 instructions + * like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SGE CONST[0].xxxx, U[1] + * U[1] = MOV CONST[1].-x-y-z-w + * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1] + * R[0].xyz = MUL R[0], U[0] + * + * This code is stupid. Instead of having the individual calls to + * i915_emit_arith generate the moves to utemps, do it in the caller. + * This results in code like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SGE CONST[0].xxxx, U[1] + * R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w + * R[0].xyz = MUL R[0], U[0] + */ + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST + && GET_UREG_TYPE(src1) == REG_TYPE_CONST) { + unsigned tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + src1, 0, 0); + + src1 = tmp; + } + /* tmp = src1 >= src2 */ i915_emit_arith(p, A0_SGE, tmp, flags, 0, - src_vector(p, &inst->SrcReg[0], program), - src_vector(p, &inst->SrcReg[1], program), + src0, + src1, 0); /* dst = src1 <= src2 */ i915_emit_arith(p, A0_SGE, dst, flags, 0, - negate(src_vector(p, &inst->SrcReg[0], program), - 1, 1, 1, 1), - negate(src_vector(p, &inst->SrcReg[1], program), - 1, 1, 1, 1), + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); /* dst = tmp && dst */ i915_emit_arith(p, @@ -966,23 +992,52 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); + /* If both operands are uniforms or constants, we get 5 instructions + * like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SLT CONST[0].xxxx, U[1] + * U[1] = MOV CONST[1].-x-y-z-w + * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1] + * R[0].xyz = MUL R[0], U[0] + * + * This code is stupid. Instead of having the individual calls to + * i915_emit_arith generate the moves to utemps, do it in the caller. + * This results in code like: + * + * U[1] = MOV CONST[1] + * U[0].xyz = SLT CONST[0].xxxx, U[1] + * R[0].xyz = SLT CONST[0].-x-x-x-x, U[1].-x-y-z-w + * R[0].xyz = MUL R[0], U[0] + */ + src0 = src_vector(p, &inst->SrcReg[0], program); + src1 = src_vector(p, &inst->SrcReg[1], program); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST + && GET_UREG_TYPE(src1) == REG_TYPE_CONST) { + unsigned tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + src1, 0, 0); + + src1 = tmp; + } + /* tmp = src1 < src2 */ i915_emit_arith(p, A0_SLT, tmp, flags, 0, - src_vector(p, &inst->SrcReg[0], program), - src_vector(p, &inst->SrcReg[1], program), + src0, + src1, 0); /* dst = src1 > src2 */ i915_emit_arith(p, A0_SLT, dst, flags, 0, - negate(src_vector(p, &inst->SrcReg[0], program), - 1, 1, 1, 1), - negate(src_vector(p, &inst->SrcReg[1], program), - 1, 1, 1, 1), + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); /* dst = tmp || dst */ i915_emit_arith(p, @@ -1142,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p) } } +static void +check_texcoord_mapping(struct i915_fragment_program *p) +{ + GLbitfield64 inputs = p->FragProg.Base.InputsRead; + unsigned unit = 0; + + for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { + if (inputs & VARYING_BIT_TEX(i)) { + if (unit >= p->ctx->Const.MaxTextureCoordUnits) { + unit++; + break; + } + p->texcoord_mapping[unit++] = i | TEXCOORD_TEX; + } + if (inputs & VARYING_BIT_VAR(i)) { + if (unit >= p->ctx->Const.MaxTextureCoordUnits) { + unit++; + break; + } + p->texcoord_mapping[unit++] = i | TEXCOORD_VAR; + } + } + + if (unit > p->ctx->Const.MaxTextureCoordUnits) + i915_program_error(p, "Too many texcoord units"); +} static void check_wpos(struct i915_fragment_program *p) { GLbitfield64 inputs = p->FragProg.Base.InputsRead; GLint i; + unsigned unit = 0; p->wpos_tex = -1; + if ((inputs & VARYING_BIT_POS) == 0) + return; + for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i))) - continue; - else if (inputs & FRAG_BIT_WPOS) { - p->wpos_tex = i; - inputs &= ~FRAG_BIT_WPOS; - } + unit += !!(inputs & VARYING_BIT_TEX(i)); + unit += !!(inputs & VARYING_BIT_VAR(i)); } - if (inputs & FRAG_BIT_WPOS) { + if (unit < p->ctx->Const.MaxTextureCoordUnits) + p->wpos_tex = unit; + else i915_program_error(p, "No free texcoord for wpos value"); - } } @@ -1178,6 +1260,7 @@ translate_program(struct i915_fragment_program *p) } i915_init_program(i915, p); + check_texcoord_mapping(p); check_wpos(p); upload_program(p); fixup_depth_write(p); @@ -1359,7 +1442,7 @@ i915ValidateFragmentProgram(struct i915_context *i915) intel->coloroffset = 0; intel->specoffset = 0; - if (inputsRead & FRAG_BITS_TEX_ANY || p->wpos_tex != -1) { + if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) { EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); } else { @@ -1370,38 +1453,40 @@ i915ValidateFragmentProgram(struct i915_context *i915) if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4); - if (inputsRead & FRAG_BIT_COL0) { + if (inputsRead & VARYING_BIT_COL0) { intel->coloroffset = offset / 4; EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); } - if (inputsRead & FRAG_BIT_COL1) { + if (inputsRead & VARYING_BIT_COL1) { intel->specoffset = offset / 4; EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4); } - if ((inputsRead & FRAG_BIT_FOGC)) { + if ((inputsRead & VARYING_BIT_FOGC)) { EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); } for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputsRead & FRAG_BIT_TEX(i)) { + if (inputsRead & VARYING_BIT_TEX(i)) { + int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX); int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } - else if (inputsRead & FRAG_BIT_VAR(i)) { + if (inputsRead & VARYING_BIT_VAR(i)) { + int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR); int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); } - else if (i == p->wpos_tex) { + if (i == p->wpos_tex) { int wpos_size = 4 * sizeof(float); /* If WPOS is required, duplicate the XYZ position data in an * unused texture coordinate: @@ -1416,8 +1501,6 @@ i915ValidateFragmentProgram(struct i915_context *i915) if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { - int k; - I915_STATECHANGE(i915, I915_UPLOAD_CTX); /* Must do this *after* statechange, so as not to affect @@ -1437,8 +1520,7 @@ i915ValidateFragmentProgram(struct i915_context *i915) i915->state.Ctx[I915_CTXREG_LIS2] = s2; i915->state.Ctx[I915_CTXREG_LIS4] = s4; - k = intel->vtbl.check_vertex_size(intel, intel->vertex_size); - assert(k); + assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size)); } if (!p->params_uptodate)