X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnv30%2Fnv30_fragprog.c;h=4ce16b8f0e39af5859f06955a834902a401454cc;hb=d9e396ce4a124529fa92ad967f2b3ff72534079b;hp=0ce702d6f8449d6ca8399d03ce0d7f0943df7b48;hpb=d0b7ff551ab25153e3023871af3daa65b394a828;p=mesa.git diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 0ce702d6f84..4ce16b8f0e3 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -1,7 +1,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_inlines.h" +#include "util/u_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_dump.h" @@ -19,19 +19,19 @@ #define MASK_Z 4 #define MASK_W 8 #define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) -#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X -#define DEF_CTEST NV30_FP_OP_COND_TR -#include "nv30_shader.h" +#define DEF_SCALE NVFX_FP_OP_DST_SCALE_1X +#define DEF_CTEST NVFX_FP_OP_COND_TR +#include "nvfx_shader.h" -#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) -#define neg(s) nv30_sr_neg((s)) -#define abs(s) nv30_sr_abs((s)) -#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) +#define swz(s,x,y,z,w) nvfx_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nvfx_sr_neg((s)) +#define abs(s) nvfx_sr_abs((s)) +#define scale(s,v) nvfx_sr_scale((s), NVFX_FP_OP_DST_SCALE_##v) #define MAX_CONSTS 128 #define MAX_IMM 32 struct nv30_fpc { - struct nv30_fragment_program *fp; + struct nvfx_fragment_program *fp; uint attrib_map[PIPE_MAX_SHADER_INPUTS]; @@ -50,21 +50,21 @@ struct nv30_fpc { } consts[MAX_CONSTS]; int nr_consts; - struct nv30_sreg imm[MAX_IMM]; + struct nvfx_sreg imm[MAX_IMM]; unsigned nr_imm; }; -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg temp(struct nv30_fpc *fpc) { int idx; idx = fpc->temp_temp_count++; idx += fpc->high_temp + 1; - return nv30_sr(NV30SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg constant(struct nv30_fpc *fpc, int pipe, float vals[4]) { int idx; @@ -76,49 +76,49 @@ constant(struct nv30_fpc *fpc, int pipe, float vals[4]) fpc->consts[idx].pipe = pipe; if (pipe == -1) memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); - return nv30_sr(NV30SR_CONST, idx); + return nvfx_sr(NVFXSR_CONST, idx); } #define arith(cc,s,o,d,m,s0,s1,s2) \ - nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ + nv30_fp_arith((cc), (s), NVFX_FP_OP_OPCODE_##o, \ (d), (m), (s0), (s1), (s2)) #define tex(cc,s,o,u,d,m,s0,s1,s2) \ - nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ + nv30_fp_tex((cc), (s), NVFX_FP_OP_OPCODE_##o, (u), \ (d), (m), (s0), none, none) static void grow_insns(struct nv30_fpc *fpc, int size) { - struct nv30_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; fp->insn_len += size; fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); } static void -emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +emit_src(struct nv30_fpc *fpc, int pos, struct nvfx_sreg src) { - struct nv30_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; uint32_t sr = 0; switch (src.type) { - case NV30SR_INPUT: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); - hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); + case NVFXSR_INPUT: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NVFX_FP_OP_INPUT_SRC_SHIFT); break; - case NV30SR_OUTPUT: - sr |= NV30_FP_REG_SRC_HALF; + case NVFXSR_OUTPUT: + sr |= NVFX_FP_REG_SRC_HALF; /* fall-through */ - case NV30SR_TEMP: - sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); - sr |= (src.index << NV30_FP_REG_SRC_SHIFT); + case NVFXSR_TEMP: + sr |= (NVFX_FP_REG_TYPE_TEMP << NVFX_FP_REG_TYPE_SHIFT); + sr |= (src.index << NVFX_FP_REG_SRC_SHIFT); break; - case NV30SR_CONST: + case NVFXSR_CONST: grow_insns(fpc, 4); hw = &fp->insn[fpc->inst_offset]; if (fpc->consts[src.index].pipe >= 0) { - struct nv30_fragment_program_data *fpd; + struct nvfx_fragment_program_data *fpd; fp->consts = realloc(fp->consts, ++fp->nr_consts * sizeof(*fpd)); @@ -132,63 +132,63 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) sizeof(uint32_t) * 4); } - sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + sr |= (NVFX_FP_REG_TYPE_CONST << NVFX_FP_REG_TYPE_SHIFT); break; - case NV30SR_NONE: - sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + case NVFXSR_NONE: + sr |= (NVFX_FP_REG_TYPE_INPUT << NVFX_FP_REG_TYPE_SHIFT); break; default: assert(0); } if (src.negate) - sr |= NV30_FP_REG_NEGATE; + sr |= NVFX_FP_REG_NEGATE; if (src.abs) hw[1] |= (1 << (29 + pos)); - sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | - (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | - (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | - (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + sr |= ((src.swz[0] << NVFX_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NVFX_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NVFX_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NVFX_FP_REG_SWZ_W_SHIFT)); hw[pos + 1] |= sr; } static void -emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +emit_dst(struct nv30_fpc *fpc, struct nvfx_sreg dst) { - struct nv30_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw = &fp->insn[fpc->inst_offset]; switch (dst.type) { - case NV30SR_TEMP: + case NVFXSR_TEMP: if (fpc->num_regs < (dst.index + 1)) fpc->num_regs = dst.index + 1; break; - case NV30SR_OUTPUT: + case NVFXSR_OUTPUT: if (dst.index == 1) { fp->fp_control |= 0xe; } else { - hw[0] |= NV30_FP_OP_OUT_REG_HALF; + hw[0] |= NVFX_FP_OP_OUT_REG_HALF; } break; - case NV30SR_NONE: + case NVFXSR_NONE: hw[0] |= (1 << 30); break; default: assert(0); } - hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); + hw[0] |= (dst.index << NVFX_FP_OP_OUT_REG_SHIFT); } static void nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv30_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; uint32_t *hw; fpc->inst_offset = fp->insn_len; @@ -196,22 +196,22 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, hw = &fp->insn[fpc->inst_offset]; memset(hw, 0, sizeof(uint32_t) * 4); - if (op == NV30_FP_OP_OPCODE_KIL) + if (op == NVFX_FP_OP_OPCODE_KIL) fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; - hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); - hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); - hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + hw[0] |= (op << NVFX_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NVFX_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NVFX_FP_OP_DST_SCALE_SHIFT); if (sat) - hw[0] |= NV30_FP_OP_OUT_SAT; + hw[0] |= NVFX_FP_OP_OUT_SAT; if (dst.cc_update) - hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; - hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); - hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | - (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | - (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | - (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + hw[0] |= NVFX_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NVFX_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NVFX_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NVFX_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NVFX_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NVFX_FP_OP_COND_SWZ_W_SHIFT)); emit_dst(fpc, dst); emit_src(fpc, 0, s0); @@ -221,36 +221,36 @@ nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, static void nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, - struct nv30_sreg dst, int mask, - struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) + struct nvfx_sreg dst, int mask, + struct nvfx_sreg s0, struct nvfx_sreg s1, struct nvfx_sreg s2) { - struct nv30_fragment_program *fp = fpc->fp; + struct nvfx_fragment_program *fp = fpc->fp; nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); - fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); + fp->insn[fpc->inst_offset] |= (unit << NVFX_FP_OP_TEX_UNIT_SHIFT); fp->samplers |= (1 << unit); } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) { - struct nv30_sreg src; + struct nvfx_sreg src; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - src = nv30_sr(NV30SR_INPUT, - fpc->attrib_map[fsrc->SrcRegister.Index]); + src = nvfx_sr(NVFXSR_INPUT, + fpc->attrib_map[fsrc->Register.Index]); break; case TGSI_FILE_CONSTANT: - src = constant(fpc, fsrc->SrcRegister.Index, NULL); + src = constant(fpc, fsrc->Register.Index, NULL); break; case TGSI_FILE_IMMEDIATE: - assert(fsrc->SrcRegister.Index < fpc->nr_imm); - src = fpc->imm[fsrc->SrcRegister.Index]; + assert(fsrc->Register.Index < fpc->nr_imm); + src = fpc->imm[fsrc->Register.Index]; break; case TGSI_FILE_TEMPORARY: - src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + src = nvfx_sr(NVFXSR_TEMP, fsrc->Register.Index + 1); if (fpc->high_temp < src.index) fpc->high_temp = src.index; break; @@ -258,46 +258,46 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) * Luckily fragprog results are just temp regs.. */ case TGSI_FILE_OUTPUT: - if (fsrc->SrcRegister.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); + if (fsrc->Register.Index == fpc->colour_id) + return nvfx_sr(NVFXSR_OUTPUT, 0); else - return nv30_sr(NV30SR_OUTPUT, 1); + return nvfx_sr(NVFXSR_OUTPUT, 1); break; default: NOUVEAU_ERR("bad src file\n"); break; } - src.abs = fsrc->SrcRegisterExtMod.Absolute; - src.negate = fsrc->SrcRegister.Negate; - src.swz[0] = fsrc->SrcRegister.SwizzleX; - src.swz[1] = fsrc->SrcRegister.SwizzleY; - src.swz[2] = fsrc->SrcRegister.SwizzleZ; - src.swz[3] = fsrc->SrcRegister.SwizzleW; + src.abs = fsrc->Register.Absolute; + src.negate = fsrc->Register.Negate; + src.swz[0] = fsrc->Register.SwizzleX; + src.swz[1] = fsrc->Register.SwizzleY; + src.swz[2] = fsrc->Register.SwizzleZ; + src.swz[3] = fsrc->Register.SwizzleW; return src; } -static INLINE struct nv30_sreg +static INLINE struct nvfx_sreg tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { int idx; - switch (fdst->DstRegister.File) { + switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: - if (fdst->DstRegister.Index == fpc->colour_id) - return nv30_sr(NV30SR_OUTPUT, 0); + if (fdst->Register.Index == fpc->colour_id) + return nvfx_sr(NVFXSR_OUTPUT, 0); else - return nv30_sr(NV30SR_OUTPUT, 1); + return nvfx_sr(NVFXSR_OUTPUT, 1); break; case TGSI_FILE_TEMPORARY: - idx = fdst->DstRegister.Index + 1; + idx = fdst->Register.Index + 1; if (fpc->high_temp < idx) fpc->high_temp = idx; - return nv30_sr(NV30SR_TEMP, idx); + return nvfx_sr(NVFXSR_TEMP, idx); case TGSI_FILE_NULL: - return nv30_sr(NV30SR_NONE, 0); + return nvfx_sr(NVFXSR_NONE, 0); default: - NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); - return nv30_sr(NV30SR_NONE, 0); + NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File); + return nvfx_sr(NVFXSR_NONE, 0); } } @@ -315,10 +315,10 @@ tgsi_mask(uint tgsi) static boolean src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, - struct nv30_sreg *src) + struct nvfx_sreg *src) { - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg tgsi = tgsi_src(fpc, fsrc); uint mask = 0; uint c; @@ -350,8 +350,8 @@ static boolean nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, const struct tgsi_full_instruction *finst) { - const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); - struct nv30_sreg src[3], dst, tmp; + const struct nvfx_sreg none = nvfx_sr(NVFXSR_NONE, 0); + struct nvfx_sreg src[3], dst, tmp; int mask, sat, unit = 0; int ai = -1, ci = -1; int i; @@ -363,8 +363,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; - if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + fsrc = &finst->Src[i]; + if (fsrc->Register.File == TGSI_FILE_TEMPORARY) { src[i] = tgsi_src(fpc, fsrc); } } @@ -372,9 +372,9 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *fsrc; - fsrc = &finst->FullSrcRegisters[i]; + fsrc = &finst->Src[i]; - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: case TGSI_FILE_CONSTANT: case TGSI_FILE_TEMPORARY: @@ -385,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; } - switch (fsrc->SrcRegister.File) { + switch (fsrc->Register.File) { case TGSI_FILE_INPUT: - if (ai == -1 || ai == fsrc->SrcRegister.Index) { - ai = fsrc->SrcRegister.Index; + if (ai == -1 || ai == fsrc->Register.Index) { + ai = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { NOUVEAU_MSG("extra src attr %d\n", - fsrc->SrcRegister.Index); + fsrc->Register.Index); src[i] = temp(fpc); arith(fpc, 0, MOV, src[i], MASK_ALL, tgsi_src(fpc, fsrc), none, none); @@ -400,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; case TGSI_FILE_CONSTANT: case TGSI_FILE_IMMEDIATE: - if (ci == -1 || ci == fsrc->SrcRegister.Index) { - ci = fsrc->SrcRegister.Index; + if (ci == -1 || ci == fsrc->Register.Index) { + ci = fsrc->Register.Index; src[i] = tgsi_src(fpc, fsrc); } else { src[i] = temp(fpc); @@ -413,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, /* handled above */ break; case TGSI_FILE_SAMPLER: - unit = fsrc->SrcRegister.Index; + unit = fsrc->Register.Index; break; case TGSI_FILE_OUTPUT: break; @@ -423,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, } } - dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); - mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + dst = tgsi_dst(fpc, &finst->Dst[0]); + mask = tgsi_mask(finst->Dst[0].Register.WriteMask); sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); switch (finst->Instruction.Opcode) { @@ -435,11 +435,12 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_CMP: - tmp = temp(fpc); - arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp = nvfx_sr(NVFXSR_NONE, 0); tmp.cc_update = 1; arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); - dst.cc_test = NV30_VP_INST_COND_LT; + dst.cc_test = NVFX_VP_INST_COND_GE; + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + dst.cc_test = NVFX_VP_INST_COND_LT; arith(fpc, sat, MOV, dst, mask, src[1], none, none); break; case TGSI_OPCODE_COS: @@ -473,10 +474,10 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, 0, KIL, none, 0, none, none, none); break; case TGSI_OPCODE_KIL: - dst = nv30_sr(NV30SR_NONE, 0); + dst = nvfx_sr(NVFXSR_NONE, 0); dst.cc_update = 1; arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); - dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; + dst.cc_update = 0; dst.cc_test = NVFX_FP_OP_COND_LT; arith(fpc, 0, KIL, dst, 0, none, none, none); break; case TGSI_OPCODE_LG2: @@ -484,7 +485,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, break; // case TGSI_OPCODE_LIT: case TGSI_OPCODE_LRP: - arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); + arith(fpc, sat, LRP_NV30, dst, mask, src[0], src[1], src[2]); break; case TGSI_OPCODE_MAD: arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); @@ -502,7 +503,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_POW: - arith(fpc, sat, POW, dst, mask, src[0], src[1], none); + arith(fpc, sat, POW_NV30, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RCP: arith(fpc, sat, RCP, dst, mask, src[0], none, none); @@ -511,19 +512,34 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, assert(0); break; case TGSI_OPCODE_RFL: - arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); + arith(fpc, 0, RFL_NV30, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_RSQ: - arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, RSQ_NV30, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); break; case TGSI_OPCODE_SCS: - if (mask & MASK_X) { - arith(fpc, sat, COS, dst, MASK_X, - swz(src[0], X, X, X, X), none, none); + /* avoid overwriting the source */ + if(src[0].swz[SWZ_X] != SWZ_X) + { + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } } - if (mask & MASK_Y) { - arith(fpc, sat, SIN, dst, MASK_Y, - swz(src[0], X, X, X, X), none, none); + else + { + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } } break; case TGSI_OPCODE_SIN: @@ -572,28 +588,28 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, { int hw; - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - hw = NV30_FP_OP_INPUT_SRC_POSITION; + hw = NVFX_FP_OP_INPUT_SRC_POSITION; break; case TGSI_SEMANTIC_COLOR: - if (fdec->Semantic.SemanticIndex == 0) { - hw = NV30_FP_OP_INPUT_SRC_COL0; + if (fdec->Semantic.Index == 0) { + hw = NVFX_FP_OP_INPUT_SRC_COL0; } else - if (fdec->Semantic.SemanticIndex == 1) { - hw = NV30_FP_OP_INPUT_SRC_COL1; + if (fdec->Semantic.Index == 1) { + hw = NVFX_FP_OP_INPUT_SRC_COL1; } else { NOUVEAU_ERR("bad colour semantic index\n"); return FALSE; } break; case TGSI_SEMANTIC_FOG: - hw = NV30_FP_OP_INPUT_SRC_FOGC; + hw = NVFX_FP_OP_INPUT_SRC_FOGC; break; case TGSI_SEMANTIC_GENERIC: - if (fdec->Semantic.SemanticIndex <= 7) { - hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. - SemanticIndex); + if (fdec->Semantic.Index <= 7) { + hw = NVFX_FP_OP_INPUT_SRC_TC(fdec->Semantic. + Index); } else { NOUVEAU_ERR("bad generic semantic index\n"); return FALSE; @@ -604,7 +620,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, return FALSE; } - fpc->attrib_map[fdec->DeclarationRange.First] = hw; + fpc->attrib_map[fdec->Range.First] = hw; return TRUE; } @@ -612,12 +628,12 @@ static boolean nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, const struct tgsi_full_declaration *fdec) { - switch (fdec->Semantic.SemanticName) { + switch (fdec->Semantic.Name) { case TGSI_SEMANTIC_POSITION: - fpc->depth_id = fdec->DeclarationRange.First; + fpc->depth_id = fdec->Range.First; break; case TGSI_SEMANTIC_COLOR: - fpc->colour_id = fdec->DeclarationRange.First; + fpc->colour_id = fdec->Range.First; break; default: NOUVEAU_ERR("bad output semantic\n"); @@ -653,9 +669,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) goto out_err; break; /*case TGSI_FILE_TEMPORARY: - if (fdec->DeclarationRange.Last > high_temp) { + if (fdec->Range.Last > high_temp) { high_temp = - fdec->DeclarationRange.Last; + fdec->Range.Last; } break;*/ default: @@ -686,7 +702,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc) tgsi_parse_free(&p); /*if (++high_temp) { - fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); + fpc->r_temp = CALLOC(high_temp, sizeof(struct nvfx_sreg)); for (i = 0; i < high_temp; i++) fpc->r_temp[i] = temp(fpc); fpc->r_temps_discard = 0; @@ -702,8 +718,8 @@ out_err: } static void -nv30_fragprog_translate(struct nv30_context *nv30, - struct nv30_fragment_program *fp) +nv30_fragprog_translate(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { struct tgsi_parse_context parse; struct nv30_fpc *fpc = NULL; @@ -743,7 +759,6 @@ nv30_fragprog_translate(struct nv30_context *nv30, } fp->fp_control |= (fpc->num_regs-1)/2; - fp->fp_reg_control = (1<<16)|0x4; /* Terminate final instruction */ fp->insn[fpc->inst_offset] |= 0x00000001; @@ -757,17 +772,16 @@ nv30_fragprog_translate(struct nv30_context *nv30, fp->insn[fpc->inst_offset + 3] = 0x00000000; fp->translated = TRUE; - fp->on_hw = FALSE; out_err: tgsi_parse_free(&parse); FREE(fpc); } static void -nv30_fragprog_upload(struct nv30_context *nv30, - struct nv30_fragment_program *fp) +nv30_fragprog_upload(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { - struct pipe_screen *pscreen = nv30->pipe.screen; + struct pipe_screen *pscreen = nvfx->pipe.screen; const uint32_t le = 1; uint32_t *map; int i; @@ -798,12 +812,12 @@ nv30_fragprog_upload(struct nv30_context *nv30, } static boolean -nv30_fragprog_validate(struct nv30_context *nv30) +nv30_fragprog_validate(struct nvfx_context *nvfx) { - struct nv30_fragment_program *fp = nv30->fragprog; + struct nvfx_fragment_program *fp = nvfx->fragprog; struct pipe_buffer *constbuf = - nv30->constbuf[PIPE_SHADER_FRAGMENT]; - struct pipe_screen *pscreen = nv30->pipe.screen; + nvfx->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_screen *pscreen = nvfx->pipe.screen; struct nouveau_stateobj *so; boolean new_consts = FALSE; int i; @@ -811,27 +825,27 @@ nv30_fragprog_validate(struct nv30_context *nv30) if (fp->translated) goto update_constants; - /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ - nv30_fragprog_translate(nv30, fp); + /*nvfx->fallback_swrast &= ~NVFX_NEW_FRAGPROG;*/ + nv30_fragprog_translate(nvfx, fp); if (!fp->translated) { - /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ + /*nvfx->fallback_swrast |= NVFX_NEW_FRAGPROG;*/ return FALSE; } fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4); - nv30_fragprog_upload(nv30, fp); + nv30_fragprog_upload(nvfx, fp); - so = so_new(8, 1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); + so = so_new(4, 4, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_ACTIVE_PROGRAM, 1); so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); - so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_CONTROL, 1); so_data (so, fp->fp_control); - so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); - so_data (so, fp->fp_reg_control); - so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); + so_method(so, nvfx->screen->eng3d, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, (1<<16)|0x4); + so_method(so, nvfx->screen->eng3d, NV34TCL_TX_UNITS_ENABLE, 1); so_data (so, fp->samplers); so_ref(so, &fp->so); so_ref(NULL, &so); @@ -843,7 +857,7 @@ update_constants: map = pipe_buffer_map(pscreen, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { - struct nv30_fragment_program_data *fpd = &fp->consts[i]; + struct nvfx_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; @@ -855,11 +869,11 @@ update_constants: pipe_buffer_unmap(pscreen, constbuf); if (new_consts) - nv30_fragprog_upload(nv30, fp); + nv30_fragprog_upload(nvfx, fp); } - if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { - so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); + if (new_consts || fp->so != nvfx->state.hw[NVFX_STATE_FRAGPROG]) { + so_ref(fp->so, &nvfx->state.hw[NVFX_STATE_FRAGPROG]); return TRUE; } @@ -867,17 +881,23 @@ update_constants: } void -nv30_fragprog_destroy(struct nv30_context *nv30, - struct nv30_fragment_program *fp) +nv30_fragprog_destroy(struct nvfx_context *nvfx, + struct nvfx_fragment_program *fp) { + if (fp->buffer) + pipe_buffer_reference(&fp->buffer, NULL); + + if (fp->so) + so_ref(NULL, &fp->so); + if (fp->insn_len) FREE(fp->insn); } -struct nv30_state_entry nv30_state_fragprog = { +struct nvfx_state_entry nv30_state_fragprog = { .validate = nv30_fragprog_validate, .dirty = { - .pipe = NV30_NEW_FRAGPROG, - .hw = NV30_STATE_FRAGPROG + .pipe = NVFX_NEW_FRAGPROG, + .hw = NVFX_STATE_FRAGPROG } };