X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Ftgsi%2Ftgsi_exec.c;h=ab17c1f9bc293cb971d9a4943cc45f2eddc79133;hp=18ee27502b443ff2a41446014f4de11fff5f49ef;hb=329dee14555b8c8da59e6b47a51050f2aa736596;hpb=850619117e35932e1a7ca290b0d0c2a0c9f1e7fa diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 18ee27502b4..ab17c1f9bc2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -217,6 +217,17 @@ micro_ddx(union tgsi_exec_channel *dst, dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; } +static void +micro_ddx_fine(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT]; + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + + static void micro_ddy(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) @@ -227,6 +238,16 @@ micro_ddy(union tgsi_exec_channel *dst, dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; } +static void +micro_ddy_fine(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = + dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; + dst->f[1] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT]; +} + static void micro_dmul(union tgsi_double_channel *dst, const union tgsi_double_channel *src) @@ -357,6 +378,16 @@ micro_dfrac(union tgsi_double_channel *dst, dst->d[3] = src->d[3] - floor(src->d[3]); } +static void +micro_dflr(union tgsi_double_channel *dst, + const union tgsi_double_channel *src) +{ + dst->d[0] = floor(src->d[0]); + dst->d[1] = floor(src->d[1]); + dst->d[2] = floor(src->d[2]); + dst->d[3] = floor(src->d[3]); +} + static void micro_dldexp(union tgsi_double_channel *dst, const union tgsi_double_channel *src0, @@ -1045,61 +1076,6 @@ tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, } } - -/** - * Check if there's a potential src/dst register data dependency when - * using SOA execution. - * Example: - * MOV T, T.yxwz; - * This would expand into: - * MOV t0, t1; - * MOV t1, t0; - * MOV t2, t3; - * MOV t3, t2; - * The second instruction will have the wrong value for t0 if executed as-is. - */ -boolean -tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) -{ - uint i, chan; - - uint writemask = inst->Dst[0].Register.WriteMask; - if (writemask == TGSI_WRITEMASK_X || - writemask == TGSI_WRITEMASK_Y || - writemask == TGSI_WRITEMASK_Z || - writemask == TGSI_WRITEMASK_W || - writemask == TGSI_WRITEMASK_NONE) { - /* no chance of data dependency */ - return FALSE; - } - - /* loop over src regs */ - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - if ((inst->Src[i].Register.File == - inst->Dst[0].Register.File) && - ((inst->Src[i].Register.Index == - inst->Dst[0].Register.Index) || - inst->Src[i].Register.Indirect || - inst->Dst[0].Register.Indirect)) { - /* loop over dest channels */ - uint channelsWritten = 0x0; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - /* check if we're reading a channel that's been written */ - uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); - if (channelsWritten & (1 << swizzle)) { - return TRUE; - } - - channelsWritten |= (1 << chan); - } - } - } - } - return FALSE; -} - - /** * Initialize machine state by expanding tokens to full instructions, * allocating temporary storage, setting up constants, etc. @@ -1215,14 +1191,8 @@ tgsi_exec_machine_bind_shader( * sizeof(struct tgsi_full_declaration)); maxDeclarations += 10; } - if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { - unsigned reg; - for (reg = parse.FullToken.FullDeclaration.Range.First; - reg <= parse.FullToken.FullDeclaration.Range.Last; - ++reg) { - ++mach->NumOutputs; - } - } + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) + mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1); else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; @@ -1305,7 +1275,6 @@ struct tgsi_exec_machine * tgsi_exec_machine_create(enum pipe_shader_type shader_type) { struct tgsi_exec_machine *mach; - uint i; mach = align_malloc( sizeof *mach, 16 ); if (!mach) @@ -1330,20 +1299,6 @@ tgsi_exec_machine_create(enum pipe_shader_type shader_type) goto fail; } - /* Setup constants needed by the SSE2 executor. */ - for( i = 0; i < 4; i++ ) { - mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; - mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; - mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; - mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ - mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; - mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ - mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; - mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; - mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; - mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; - } - #ifdef DEBUG /* silence warnings */ (void) print_chan; @@ -1567,9 +1522,6 @@ fetch_src_file_channel(const struct tgsi_exec_machine *mach, break; case TGSI_FILE_SYSTEM_VALUE: - /* XXX no swizzling at this point. Will be needed if we put - * gl_FragCoord, for example, in a sys value register. - */ for (i = 0; i < TGSI_QUAD_SIZE; i++) { chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; } @@ -5398,10 +5350,18 @@ exec_instruction( exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; + case TGSI_OPCODE_DDX_FINE: + exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_DDX: exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; + case TGSI_OPCODE_DDY_FINE: + exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); + break; + case TGSI_OPCODE_DDY: exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; @@ -6154,6 +6114,10 @@ exec_instruction( exec_double_unary(mach, inst, micro_dfrac); break; + case TGSI_OPCODE_DFLR: + exec_double_unary(mach, inst, micro_dflr); + break; + case TGSI_OPCODE_DLDEXP: exec_dldexp(mach, inst); break;