dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
}
+static void
+micro_ddx_fine(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
+ dst->f[2] =
+ dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
+}
+
+
static void
micro_ddy(union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src)
dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
}
+static void
+micro_ddy_fine(union tgsi_exec_channel *dst,
+ const union tgsi_exec_channel *src)
+{
+ dst->f[0] =
+ dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
+ dst->f[1] =
+ dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
+}
+
static void
micro_dmul(union tgsi_double_channel *dst,
const union tgsi_double_channel *src)
dst->d[3] = src->d[3] - floor(src->d[3]);
}
+static void
+micro_dflr(union tgsi_double_channel *dst,
+ const union tgsi_double_channel *src)
+{
+ dst->d[0] = floor(src->d[0]);
+ dst->d[1] = floor(src->d[1]);
+ dst->d[2] = floor(src->d[2]);
+ dst->d[3] = floor(src->d[3]);
+}
+
static void
micro_dldexp(union tgsi_double_channel *dst,
const union tgsi_double_channel *src0,
}
}
-
-/**
- * Check if there's a potential src/dst register data dependency when
- * using SOA execution.
- * Example:
- * MOV T, T.yxwz;
- * This would expand into:
- * MOV t0, t1;
- * MOV t1, t0;
- * MOV t2, t3;
- * MOV t3, t2;
- * The second instruction will have the wrong value for t0 if executed as-is.
- */
-boolean
-tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
-{
- uint i, chan;
-
- uint writemask = inst->Dst[0].Register.WriteMask;
- if (writemask == TGSI_WRITEMASK_X ||
- writemask == TGSI_WRITEMASK_Y ||
- writemask == TGSI_WRITEMASK_Z ||
- writemask == TGSI_WRITEMASK_W ||
- writemask == TGSI_WRITEMASK_NONE) {
- /* no chance of data dependency */
- return FALSE;
- }
-
- /* loop over src regs */
- for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
- if ((inst->Src[i].Register.File ==
- inst->Dst[0].Register.File) &&
- ((inst->Src[i].Register.Index ==
- inst->Dst[0].Register.Index) ||
- inst->Src[i].Register.Indirect ||
- inst->Dst[0].Register.Indirect)) {
- /* loop over dest channels */
- uint channelsWritten = 0x0;
- for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
- /* check if we're reading a channel that's been written */
- uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
- if (channelsWritten & (1 << swizzle)) {
- return TRUE;
- }
-
- channelsWritten |= (1 << chan);
- }
- }
- }
- }
- return FALSE;
-}
-
-
/**
* Initialize machine state by expanding tokens to full instructions,
* allocating temporary storage, setting up constants, etc.
* sizeof(struct tgsi_full_declaration));
maxDeclarations += 10;
}
- if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
- unsigned reg;
- for (reg = parse.FullToken.FullDeclaration.Range.First;
- reg <= parse.FullToken.FullDeclaration.Range.Last;
- ++reg) {
- ++mach->NumOutputs;
- }
- }
+ if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
+ mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
tgsi_exec_machine_create(enum pipe_shader_type shader_type)
{
struct tgsi_exec_machine *mach;
- uint i;
mach = align_malloc( sizeof *mach, 16 );
if (!mach)
goto fail;
}
- /* Setup constants needed by the SSE2 executor. */
- for( i = 0; i < 4; i++ ) {
- mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
- mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
- mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
- mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
- mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
- mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
- mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
- mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
- mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
- mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
- }
-
#ifdef DEBUG
/* silence warnings */
(void) print_chan;
break;
case TGSI_FILE_SYSTEM_VALUE:
- /* XXX no swizzling at this point. Will be needed if we put
- * gl_FragCoord, for example, in a sys value register.
- */
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
}
case TGSI_INTERPOLATE_COLOR:
eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
+ interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
break;
default:
dst->i[3] = util_last_bit(src->u[3]) - 1;
}
+
+static void
+exec_interp_at_sample(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel index2D;
+ union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+
+ assert(reg->Register.File == TGSI_FILE_INPUT);
+ assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
+
+ get_index_registers(mach, reg, &index, &index2D);
+ float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
+
+ /* Short cut: sample 0 is like a normal fetch */
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
+ continue;
+
+ fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
+ &result[chan]);
+ if (sample != 0.0f) {
+
+ /* TODO: define the samples > 0, but so far we only do fake MSAA */
+ float x = 0;
+ float y = 0;
+
+ unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
+ assert(pos >= 0);
+ assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
+ mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
+ }
+ store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+
+static void
+exec_interp_at_offset(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel index2D;
+ union tgsi_exec_channel ofsx;
+ union tgsi_exec_channel ofsy;
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+
+ assert(reg->Register.File == TGSI_FILE_INPUT);
+
+ get_index_registers(mach, reg, &index, &index2D);
+ unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
+
+ fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
+
+ for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
+ continue;
+ union tgsi_exec_channel result;
+ fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
+ mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
+ store_dest(mach, &result, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+}
+
+
+static void
+exec_interp_at_centroid(struct tgsi_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel index2D;
+ union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
+ const struct tgsi_full_src_register *reg = &inst->Src[0];
+
+ assert(reg->Register.File == TGSI_FILE_INPUT);
+ get_index_registers(mach, reg, &index, &index2D);
+
+ for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+ if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
+ continue;
+
+ /* Here we should add the change to use a sample that lies within the
+ * primitive (Section 15.2):
+ *
+ * "When interpolating variables declared using centroid in ,
+ * the variable is sampled at a location within the pixel covered
+ * by the primitive generating the fragment.
+ * ...
+ * The built-in functions interpolateAtCentroid ... will sample
+ * variables as though they were declared with the centroid ...
+ * qualifier[s]."
+ *
+ * Since we only support 1 sample currently, this is just a pass-through.
+ */
+ fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
+ &result[chan]);
+ store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
+ }
+
+}
+
+
/**
* Execute a TGSI instruction.
* Returns TRUE if a barrier instruction is hit,
exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
+ case TGSI_OPCODE_DDX_FINE:
+ exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_DDX:
exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
+ case TGSI_OPCODE_DDY_FINE:
+ exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
+ break;
+
case TGSI_OPCODE_DDY:
exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
break;
exec_double_unary(mach, inst, micro_dfrac);
break;
+ case TGSI_OPCODE_DFLR:
+ exec_double_unary(mach, inst, micro_dflr);
+ break;
+
case TGSI_OPCODE_DLDEXP:
exec_dldexp(mach, inst);
break;
case TGSI_OPCODE_I642D:
exec_double_unary(mach, inst, micro_i642d);
break;
-
+ case TGSI_OPCODE_INTERP_SAMPLE:
+ exec_interp_at_sample(mach, inst);
+ break;
+ case TGSI_OPCODE_INTERP_OFFSET:
+ exec_interp_at_offset(mach, inst);
+ break;
+ case TGSI_OPCODE_INTERP_CENTROID:
+ exec_interp_at_centroid(mach, inst);
+ break;
default:
assert( 0 );
}