From: Ben Skeggs Date: Mon, 27 Nov 2006 01:57:37 +0000 (+0000) Subject: - Add InitInstruction to hw shader backend, and remove SetUnusedSource. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=98818f159baeaeba45d656d612b64b2f22c63753;p=mesa.git - Add InitInstruction to hw shader backend, and remove SetUnusedSource. - NV30FP/NV40VP: Clear any fields before we OR new values into them - NV40VP: It seems that it might be possible to write a result reg at the same time a temp is written. In InitInstruction, initialise OUT_DEST to OUT_DEST_TEMP so result regs don't get clobbered by default. --- diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader.h b/src/mesa/drivers/dri/nouveau/nouveau_shader.h index fac8851a578..a1e7794487c 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader.h @@ -256,6 +256,7 @@ struct _nvsFunc { struct _op_xlat*(*GetOPTXRec) (nvsFunc *, int merged); struct _op_xlat*(*GetOPTXFromSOP) (nvsOpcode, int *id); + void (*InitInstruction) (nvsFunc *); int (*SupportsOpcode) (nvsFunc *, nvsOpcode); void (*SetOpcode) (nvsFunc *, unsigned int opcode, int slot); @@ -265,7 +266,6 @@ struct _nvsFunc { void (*SetResult) (nvsFunc *, nvsRegister *, unsigned int mask, int slot); void (*SetSource) (nvsFunc *, nvsRegister *, int pos); - void (*SetUnusedSource) (nvsFunc *, int pos); void (*SetTexImageUnit) (nvsFunc *, int unit); void (*SetSaturate) (nvsFunc *); void (*SetLastInst) (nvsFunc *); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c index 1cb0ca490e2..b39f4668b93 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_shader_2.c @@ -34,6 +34,8 @@ #include "macros.h" #include "enums.h" +#include "program.h" + #include "nouveau_shader.h" struct pass2_rec { @@ -100,7 +102,7 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, nvsSwzComp default_swz[4] = { NVS_SWZ_X, NVS_SWZ_Y, NVS_SWZ_Z, NVS_SWZ_W }; nvsFunc *shader = nvs->func; nvsRegister reg; - int i, srcpos_used = ~7; + int i; shader->SetOpcode(shader, op->NV, slot); if (inst->saturate ) shader->SetSaturate(shader); @@ -129,7 +131,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, if (reg.file == NVS_FILE_ATTRIB) nvs->inputs_read |= (1 << reg.index); shader->SetSource(shader, ®, op->srcpos[i]); - srcpos_used |= (1<srcpos[i]); if (reg.file == NVS_FILE_CONST && shader->GetSourceConstVal) { int idx_slot = nvs->params[reg.index].hw_index_cnt++; nvs->params[reg.index].hw_index = realloc( @@ -138,10 +139,6 @@ pass2_add_instruction(nvsPtr nvs, nvsInstruction *inst, } } } - for (i = 0; i < 3; i++) { - if (!(srcpos_used & (1<SetUnusedSource(shader, i); - } reg = pass2_mangle_reg(nvs, inst, inst->dest); if (reg.file == NVS_FILE_RESULT) @@ -153,9 +150,9 @@ static int pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last) { nvsFunc *shader = nvs->func; - struct _op_xlat *op, *op2; - unsigned int hw_inst[8] = {0,0,0,0,0,0,0,0,0}; - int slot, slot2; + struct _op_xlat *op; + unsigned int hw_inst[8]; + int slot; int instsz; int i; @@ -164,6 +161,7 @@ pass2_assemble_instruction(nvsPtr nvs, nvsInstruction *inst, int last) /* Assemble this instruction */ if (!(op = shader->GetOPTXFromSOP(inst->op, &slot))) return 0; + shader->InitInstruction(shader); pass2_add_instruction(nvs, inst, op, slot); if (last) shader->SetLastInst(shader); diff --git a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c index 46391eb911f..1c2664ec707 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_fragprog.c +++ b/src/mesa/drivers/dri/nouveau/nv30_fragprog.c @@ -61,7 +61,7 @@ static void NV30FPUpdateConst(GLcontext *ctx, nouveauShader *nvs, int id) { uint32_t *new = nvs->params[id].source_val ? - nvs->params[id].source_val : nvs->params[id].val; + (uint32_t*)nvs->params[id].source_val : (uint32_t*)nvs->params[id].val; uint32_t *current; int i; @@ -101,6 +101,7 @@ NV30FPSupportsOpcode(nvsFunc *shader, nvsOpcode op) static void NV30FPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) { + shader->inst[0] &= ~NV30_FP_OP_OPCODE_MASK; shader->inst[0] |= (opcode << NV30_FP_OP_OPCODE_SHIFT); } @@ -139,7 +140,10 @@ NV30FPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, break; } + shader->inst[1] &= ~NV30_FP_OP_COND_MASK; shader->inst[1] |= (hwcond << NV30_FP_OP_COND_SHIFT); + + shader->inst[1] &= ~NV30_FP_OP_COND_SWZ_ALL_MASK; shader->inst[1] |= (swz[NVS_SWZ_X] << NV30_FP_OP_COND_SWZ_X_SHIFT); shader->inst[1] |= (swz[NVS_SWZ_Y] << NV30_FP_OP_COND_SWZ_Y_SHIFT); shader->inst[1] |= (swz[NVS_SWZ_Z] << NV30_FP_OP_COND_SWZ_Z_SHIFT); @@ -149,7 +153,7 @@ NV30FPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, static void NV30FPSetResult(nvsFunc *shader, nvsRegister *reg, unsigned int mask, int slot) { - unsigned int hwreg, hwmask = 0; + unsigned int hwreg; if (mask & SMASK_X) shader->inst[0] |= NV30_FP_OP_OUT_X; if (mask & SMASK_Y) shader->inst[0] |= NV30_FP_OP_OUT_Y; @@ -160,8 +164,11 @@ NV30FPSetResult(nvsFunc *shader, nvsRegister *reg, unsigned int mask, int slot) hwreg = 0; /* FIXME: this is only fragment.color */ /* This is *not* correct, I have no idea what it is either */ shader->inst[0] |= NV30_FP_OP_UNK0_7; - } else + } else { + shader->inst[0] &= ~NV30_FP_OP_UNK0_7; hwreg = reg->index; + } + shader->inst[0] &= ~NV30_FP_OP_OUT_REG_SHIFT; shader->inst[0] |= (hwreg << NV30_FP_OP_OUT_REG_SHIFT); } @@ -197,6 +204,7 @@ NV30FPSetSource(nvsFunc *shader, nvsRegister *reg, int pos) hwin = NV30_FP_OP_INPUT_SRC_COL0; break; } + shader->inst[0] &= ~NV30_FP_OP_INPUT_SRC_MASK; shader->inst[0] |= (hwin << NV30_FP_OP_INPUT_SRC_SHIFT); hwsrc |= (hwin << NV30_FP_REG_SRC_SHIFT); } @@ -220,24 +228,14 @@ NV30FPSetSource(nvsFunc *shader, nvsRegister *reg, int pos) hwsrc |= (reg->swizzle[NVS_SWZ_Z] << NV30_FP_REG_SWZ_Z_SHIFT); hwsrc |= (reg->swizzle[NVS_SWZ_W] << NV30_FP_REG_SWZ_W_SHIFT); + shader->inst[pos+1] &= ~NV30_FP_REG_ALL_MASK; shader->inst[pos+1] |= hwsrc; } -static void -NV30FPSetUnusedSource(nvsFunc *shader, int pos) -{ - shader->inst[pos+1] |= ( - (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT) | - (NVS_SWZ_X << NV30_FP_REG_SWZ_X_SHIFT) | - (NVS_SWZ_Y << NV30_FP_REG_SWZ_Y_SHIFT) | - (NVS_SWZ_Z << NV30_FP_REG_SWZ_Z_SHIFT) | - (NVS_SWZ_W << NV30_FP_REG_SWZ_W_SHIFT) - ); -} - static void NV30FPSetTexImageUnit(nvsFunc *shader, int unit) { + shader->inst[0] &= ~NV30_FP_OP_TEX_UNIT_SHIFT; shader->inst[0] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); } @@ -247,11 +245,27 @@ NV30FPSetSaturate(nvsFunc *shader) shader->inst[0] |= NV30_FP_OP_OUT_SAT; } +static void +NV30FPInitInstruction(nvsFunc *shader) +{ + unsigned int hwsrc; + + shader->inst[0] = 0; + + hwsrc = (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT) | + (NVS_SWZ_X << NV30_FP_REG_SWZ_X_SHIFT) | + (NVS_SWZ_Y << NV30_FP_REG_SWZ_Y_SHIFT) | + (NVS_SWZ_Z << NV30_FP_REG_SWZ_Z_SHIFT) | + (NVS_SWZ_W << NV30_FP_REG_SWZ_W_SHIFT); + shader->inst[1] = hwsrc; + shader->inst[2] = hwsrc; + shader->inst[3] = hwsrc; +} + static void NV30FPSetLastInst(nvsFunc *shader) { - shader->inst[0] |= 1; - + shader->inst[0] |= 1; } /******************************************************************************* @@ -669,13 +683,13 @@ NV30FPInitShaderFuncs(nvsFunc * shader) shader->UploadToHW = NV30FPUploadToHW; shader->UpdateConst = NV30FPUpdateConst; + shader->InitInstruction = NV30FPInitInstruction; shader->SupportsOpcode = NV30FPSupportsOpcode; shader->SetOpcode = NV30FPSetOpcode; shader->SetCCUpdate = NV30FPSetCCUpdate; shader->SetCondition = NV30FPSetCondition; shader->SetResult = NV30FPSetResult; shader->SetSource = NV30FPSetSource; - shader->SetUnusedSource = NV30FPSetUnusedSource; shader->SetTexImageUnit = NV30FPSetTexImageUnit; shader->SetSaturate = NV30FPSetSaturate; shader->SetLastInst = NV30FPSetLastInst; diff --git a/src/mesa/drivers/dri/nouveau/nv30_shader.h b/src/mesa/drivers/dri/nouveau/nv30_shader.h index d0bf6399302..7a027dd4273 100644 --- a/src/mesa/drivers/dri/nouveau/nv30_shader.h +++ b/src/mesa/drivers/dri/nouveau/nv30_shader.h @@ -351,6 +351,7 @@ #define NV30_FP_OP_INDEX_INPUT (1 << 30) //== Register selection == +#define NV30_FP_REG_ALL_MASK (0x1FFFF<<0) #define NV30_FP_REG_TYPE_SHIFT 0 #define NV30_FP_REG_TYPE_MASK (3 << 0) # define NV30_FP_REG_TYPE_TEMP 0 diff --git a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c index 111c6de71b0..f2cb3fb166a 100644 --- a/src/mesa/drivers/dri/nouveau/nv40_vertprog.c +++ b/src/mesa/drivers/dri/nouveau/nv40_vertprog.c @@ -19,8 +19,13 @@ NV40VPSupportsOpcode(nvsFunc * shader, nvsOpcode op) static void NV40VPSetOpcode(nvsFunc *shader, unsigned int opcode, int slot) { - if (slot) shader->inst[1] |= (opcode << NV40_VP_INST_SCA_OPCODE_SHIFT); - else shader->inst[1] |= (opcode << NV40_VP_INST_VEC_OPCODE_SHIFT); + if (slot) { + shader->inst[1] &= ~NV40_VP_INST_SCA_OPCODE_MASK; + shader->inst[1] |= (opcode << NV40_VP_INST_SCA_OPCODE_SHIFT); + } else { + shader->inst[1] &= ~NV40_VP_INST_VEC_OPCODE_MASK; + shader->inst[1] |= (opcode << NV40_VP_INST_VEC_OPCODE_SHIFT); + } } static void @@ -36,7 +41,9 @@ NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, unsigned int hwcond; if (on ) shader->inst[0] |= NV40_VP_INST_COND_TEST_ENABLE; + else shader->inst[0] &= ~NV40_VP_INST_COND_TEST_ENABLE; if (reg) shader->inst[0] |= NV40_VP_INST_COND_REG_SELECT_1; + else shader->inst[0] &= ~NV40_VP_INST_COND_REG_SELECT_1; switch (cond) { case NVS_COND_TR: hwcond = NV40_VP_INST_COND_TR; break; @@ -52,8 +59,10 @@ NV40VPSetCondition(nvsFunc *shader, int on, nvsCond cond, int reg, hwcond = NV40_VP_INST_COND_TR; break; } + shader->inst[0] &= ~NV40_VP_INST_COND_MASK; shader->inst[0] |= (hwcond << NV40_VP_INST_COND_SHIFT); + shader->inst[0] &= ~NV40_VP_INST_COND_SWZ_ALL_MASK; shader->inst[0] |= (swizzle[NVS_SWZ_X] << NV40_VP_INST_COND_SWZ_X_SHIFT); shader->inst[0] |= (swizzle[NVS_SWZ_Y] << NV40_VP_INST_COND_SWZ_Y_SHIFT); shader->inst[0] |= (swizzle[NVS_SWZ_Z] << NV40_VP_INST_COND_SWZ_Z_SHIFT); @@ -95,25 +104,31 @@ NV40VPSetResult(nvsFunc *shader, nvsRegister * dest, unsigned int mask, hwidx = 0; break; } + shader->inst[3] &= ~NV40_VP_INST_DEST_MASK; shader->inst[3] |= (hwidx << NV40_VP_INST_DEST_SHIFT); - if (slot) { - shader->inst[3] |= NV40_VP_INST_SCA_RESULT; - shader->inst[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; - } else { - shader->inst[0] |= NV40_VP_INST_VEC_RESULT; - shader->inst[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); - } + if (slot) shader->inst[3] |= NV40_VP_INST_SCA_RESULT; + else shader->inst[0] |= NV40_VP_INST_VEC_RESULT; } else { /* NVS_FILE_TEMP || NVS_FILE_ADDRESS */ - if (slot) + if (slot) { + shader->inst[3] &= ~NV40_VP_INST_SCA_RESULT; + shader->inst[3] &= ~NV40_VP_INST_SCA_DEST_TEMP_MASK; shader->inst[3] |= (dest->index << NV40_VP_INST_SCA_DEST_TEMP_SHIFT); - else + } else { + shader->inst[0] &= ~NV40_VP_INST_VEC_RESULT; + shader->inst[0] &= ~(NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20)); shader->inst[0] |= (dest->index << NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } } - if (slot) shader->inst[3] |= (hwmask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); - else shader->inst[3] |= (hwmask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + if (slot) { + shader->inst[3] &= ~NV40_VP_INST_SCA_WRITEMASK_MASK; + shader->inst[3] |= (hwmask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } else { + shader->inst[3] &= ~NV40_VP_INST_VEC_WRITEMASK_MASK; + shader->inst[3] |= (hwmask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } } static void @@ -121,6 +136,8 @@ NV40VPInsertSource(nvsFunc *shader, unsigned int hw, int pos) { switch (pos) { case 0: + shader->inst[1] &= ~NV40_VP_INST_SRC0H_MASK; + shader->inst[2] &= ~NV40_VP_INST_SRC0L_MASK; shader->inst[1] |= ((hw & NV40_VP_SRC0_HIGH_MASK) >> NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; @@ -128,10 +145,13 @@ NV40VPInsertSource(nvsFunc *shader, unsigned int hw, int pos) << NV40_VP_INST_SRC0L_SHIFT; break; case 1: + shader->inst[2] &= ~NV40_VP_INST_SRC1_MASK; shader->inst[2] |= hw << NV40_VP_INST_SRC1_SHIFT; break; case 2: + shader->inst[2] &= ~NV40_VP_INST_SRC2H_MASK; + shader->inst[3] &= ~NV40_VP_INST_SRC2L_MASK; shader->inst[2] |= ((hw & NV40_VP_SRC2_HIGH_MASK) >> NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; @@ -155,24 +175,34 @@ NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) case NVS_FILE_ATTRIB: hw |= (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT); + shader->inst[1] &= ~NV40_VP_INST_INPUT_SRC_MASK; shader->inst[1] |= (src->index << NV40_VP_INST_INPUT_SRC_SHIFT); if (src->indexed) { shader->inst[0] |= NV40_VP_INST_INDEX_INPUT; if (src->addr_reg) shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1; + else + shader->inst[0] &= ~NV40_VP_INST_ADDR_REG_SELECT_1; + shader->inst[0] &= ~NV40_VP_INST_ADDR_SWZ_SHIFT; shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT); - } + } else + shader->inst[0] &= ~NV40_VP_INST_INDEX_INPUT; break; case NVS_FILE_CONST: hw |= (NV40_VP_SRC_REG_TYPE_CONST << NV40_VP_SRC_REG_TYPE_SHIFT); + shader->inst[1] &= ~NV40_VP_INST_CONST_SRC_MASK; shader->inst[1] |= (src->index << NV40_VP_INST_CONST_SRC_SHIFT); if (src->indexed) { shader->inst[3] |= NV40_VP_INST_INDEX_CONST; if (src->addr_reg) shader->inst[0] |= NV40_VP_INST_ADDR_REG_SELECT_1; + else + shader->inst[0] &= ~NV40_VP_INST_ADDR_REG_SELECT_1; + shader->inst[0] &= ~NV40_VP_INST_ADDR_SWZ_MASK; shader->inst[0] |= (src->addr_comp << NV40_VP_INST_ADDR_SWZ_SHIFT); - } + } else + shader->inst[3] &= ~NV40_VP_INST_INDEX_CONST; break; case NVS_FILE_TEMP: hw |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); @@ -189,6 +219,8 @@ NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) hw |= NV40_VP_SRC_NEGATE; if (src->abs) shader->inst[0] |= (1 << (21 + pos)); + else + shader->inst[0] &= ~(1 << (21 + pos)); hw |= (src->swizzle[0] << NV40_VP_SRC_SWZ_X_SHIFT); hw |= (src->swizzle[1] << NV40_VP_SRC_SWZ_Y_SHIFT); hw |= (src->swizzle[2] << NV40_VP_SRC_SWZ_Z_SHIFT); @@ -199,21 +231,30 @@ NV40VPSetSource(nvsFunc *shader, nvsRegister * src, int pos) } static void -NV40VPSetUnusedSource(nvsFunc *shader, int pos) +NV40VPInitInstruction(nvsFunc *shader) { - unsigned int hw; - - hw = ((NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT) | - (NVS_SWZ_X << NV40_VP_SRC_SWZ_X_SHIFT) | - (NVS_SWZ_Y << NV40_VP_SRC_SWZ_Y_SHIFT) | - (NVS_SWZ_Z << NV40_VP_SRC_SWZ_Z_SHIFT) | - (NVS_SWZ_W << NV40_VP_SRC_SWZ_W_SHIFT)); - - NV40VPInsertSource(shader, hw, pos); + unsigned int hwsrc = 0; + + shader->inst[0] = /*NV40_VP_INST_VEC_RESULT | */ + NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + shader->inst[1] = 0; + shader->inst[2] = 0; + shader->inst[3] = NV40_VP_INST_SCA_RESULT | + NV40_VP_INST_SCA_DEST_TEMP_MASK | + NV40_VP_INST_DEST_MASK; + + hwsrc = (NV40_VP_SRC_REG_TYPE_INPUT << NV40_VP_SRC_REG_TYPE_SHIFT) | + (NVS_SWZ_X << NV40_VP_SRC_SWZ_X_SHIFT) | + (NVS_SWZ_Y << NV40_VP_SRC_SWZ_Y_SHIFT) | + (NVS_SWZ_Z << NV40_VP_SRC_SWZ_Z_SHIFT) | + (NVS_SWZ_W << NV40_VP_SRC_SWZ_W_SHIFT); + NV40VPInsertSource(shader, hwsrc, 0); + NV40VPInsertSource(shader, hwsrc, 1); + NV40VPInsertSource(shader, hwsrc, 2); } static void -NV40VPSetLastInst(nvsFunc *shader, int pos) +NV40VPSetLastInst(nvsFunc *shader) { shader->inst[3] |= 1; } @@ -611,13 +652,13 @@ NV40VPInitShaderFuncs(nvsFunc * shader) MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_PUSHA, NVS_OP_PUSHA, 3, -1, -1); MOD_OPCODE(NVVP_TX_SOP, NV40_VP_INST_OP_POPA , NVS_OP_POPA , -1, -1, -1); + shader->InitInstruction = NV40VPInitInstruction; shader->SupportsOpcode = NV40VPSupportsOpcode; shader->SetOpcode = NV40VPSetOpcode; shader->SetCCUpdate = NV40VPSetCCUpdate; shader->SetCondition = NV40VPSetCondition; shader->SetResult = NV40VPSetResult; shader->SetSource = NV40VPSetSource; - shader->SetUnusedSource = NV40VPSetUnusedSource; shader->SetLastInst = NV40VPSetLastInst; shader->HasMergedInst = NV40VPHasMergedInst;