From b946984e3bbd91da3111edd0d62f90cfd4967ad3 Mon Sep 17 00:00:00 2001 From: Luca Barbieri Date: Fri, 3 Sep 2010 18:31:18 +0200 Subject: [PATCH] nvfx: support indirect addressing in vps Negative or huge offsets not yet supported. --- src/gallium/drivers/nvfx/nv30_vertprog.h | 6 ++ src/gallium/drivers/nvfx/nvfx_screen.c | 11 ++- src/gallium/drivers/nvfx/nvfx_shader.h | 7 +- src/gallium/drivers/nvfx/nvfx_vertprog.c | 99 ++++++++++++++++++------ 4 files changed, 96 insertions(+), 27 deletions(-) diff --git a/src/gallium/drivers/nvfx/nv30_vertprog.h b/src/gallium/drivers/nvfx/nv30_vertprog.h index 9a68f5c1fb0..e8c16b0341a 100644 --- a/src/gallium/drivers/nvfx/nv30_vertprog.h +++ b/src/gallium/drivers/nvfx/nv30_vertprog.h @@ -60,6 +60,9 @@ /* DWORD 0 */ +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_INPUT (1 << 27) + #define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) #define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ #define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ @@ -136,6 +139,9 @@ # define NV30_VP_INST_DEST_TC(n) (8+(n)) # define NV30_VP_INST_DEST_CLP(n) (17 + (n)) +/* guess that this is the same as nv40 */ +#define NV30_VP_INST_INDEX_CONST (1 << 1) + /* Useful to split the source selection regs into their pieces */ #define NV30_VP_SRC0_HIGH_SHIFT 6 #define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c index affed961d46..42094227e1c 100644 --- a/src/gallium/drivers/nvfx/nvfx_screen.c +++ b/src/gallium/drivers/nvfx/nvfx_screen.c @@ -110,7 +110,8 @@ nvfx_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_MAX_VS_INPUTS: return 16; case PIPE_CAP_MAX_VS_CONSTS: - return 256; + /* XXX: currently more don't work, but it should be possible to make it work */ + return 212 - 6; case PIPE_CAP_MAX_VS_TEMPS: return screen->is_nv4x ? 32 : 13; case PIPE_CAP_MAX_VS_ADDRS: @@ -487,7 +488,13 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev) /* Vtxprog resources */ if (nouveau_resource_init(&screen->vp_exec_heap, 0, screen->is_nv4x ? 512 : 256) || - nouveau_resource_init(&screen->vp_data_heap, 0, 256)) { + /* XXX: this should actually be 468 or 256, but apparently indirect addressing + * cannot read consts starting from 212 on nv40. + * It looks like 44 slots are reserved for something, and there is a "mode switch" + * from 256 slots to 512 slots that we are setting to "256 mode" on nv40, leading + * to 212 = 256 - 44 instead of 468 = 512 - 44 usable slots. + */ + nouveau_resource_init(&screen->vp_data_heap, 0, 212)) { nvfx_screen_destroy(pscreen); return NULL; } diff --git a/src/gallium/drivers/nvfx/nvfx_shader.h b/src/gallium/drivers/nvfx/nvfx_shader.h index 35006eec3d4..e642a27af86 100644 --- a/src/gallium/drivers/nvfx/nvfx_shader.h +++ b/src/gallium/drivers/nvfx/nvfx_shader.h @@ -414,14 +414,16 @@ #define abs(s) nvfx_src_abs((s)) struct nvfx_reg { - uint8_t type; + int8_t type; uint32_t index; }; struct nvfx_src { struct nvfx_reg reg; - /* src only */ + uint8_t indirect : 1; + uint8_t indirect_reg : 1; + uint8_t indirect_swz : 2; uint8_t negate : 1; uint8_t abs : 1; uint8_t swz[4]; @@ -483,6 +485,7 @@ nvfx_src(struct nvfx_reg reg) .abs = 0, .negate = 0, .swz = { 0, 1, 2, 3 }, + .indirect = 0, }; return temp; } diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index ea7e88c5613..838c3aa208b 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -46,6 +46,7 @@ struct nvfx_vpc { struct nvfx_reg r_result[PIPE_MAX_SHADER_OUTPUTS]; struct nvfx_reg *r_address; struct nvfx_reg *r_temp; + struct nvfx_reg *r_const; struct nvfx_reg *imm; unsigned nr_imm; @@ -152,6 +153,18 @@ emit_src(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, uint32_t *hw, int pos, (src.swz[2] << NVFX_VP(SRC_SWZ_Z_SHIFT)) | (src.swz[3] << NVFX_VP(SRC_SWZ_W_SHIFT))); + if(src.indirect) { + if(src.reg.type == NVFXSR_CONST) + hw[3] |= NVFX_VP(INST_INDEX_CONST); + else if(src.reg.type == NVFXSR_INPUT) + hw[0] |= NVFX_VP(INST_INDEX_INPUT); + else + assert(0); + if(src.indirect_reg) + hw[0] |= NVFX_VP(INST_ADDR_REG_SELECT_1); + hw[0] |= src.indirect_swz << NVFX_VP(INST_ADDR_SWZ_SHIFT); + } + switch (pos) { case 0: hw[1] |= ((sr & NVFX_VP(SRC0_HIGH_MASK)) >> @@ -317,6 +330,9 @@ nvfx_vp_emit(struct nvfx_vpc *vpc, struct nvfx_insn insn) emit_src(nvfx, vpc, hw, 0, insn.src[0]); emit_src(nvfx, vpc, hw, 1, insn.src[1]); emit_src(nvfx, vpc, hw, 2, insn.src[2]); + +// if(insn.src[0].indirect || op == NVFX_VP_INST_VEC_OP_ARL) +// hw[3] |= NV40_VP_INST_SCA_RESULT; } static inline struct nvfx_src @@ -328,7 +344,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.reg = nvfx_reg(NVFXSR_INPUT, fsrc->Register.Index); break; case TGSI_FILE_CONSTANT: - src.reg = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0); + src.reg = vpc->r_const[fsrc->Register.Index]; break; case TGSI_FILE_IMMEDIATE: src.reg = vpc->imm[fsrc->Register.Index]; @@ -339,7 +355,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { default: NOUVEAU_ERR("bad src file\n"); src.reg.index = 0; - src.reg.type = 0; + src.reg.type = -1; break; } @@ -349,6 +365,22 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { src.swz[1] = fsrc->Register.SwizzleY; src.swz[2] = fsrc->Register.SwizzleZ; src.swz[3] = fsrc->Register.SwizzleW; + src.indirect = 0; + + if(fsrc->Register.Indirect) { + if(fsrc->Indirect.File == TGSI_FILE_ADDRESS && + (fsrc->Register.File == TGSI_FILE_CONSTANT || fsrc->Register.File == TGSI_FILE_INPUT)) + { + src.indirect = 1; + src.indirect_reg = fsrc->Indirect.Index; + src.indirect_swz = fsrc->Indirect.SwizzleX; + } + else + { + src.reg.index = 0; + src.reg.type = -1; + } + } return src; } @@ -461,6 +493,15 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, } } + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + if(src[i].reg.type < 0) + return FALSE; + } + + if(finst->Dst[0].Register.File == TGSI_FILE_ADDRESS && + finst->Instruction.Opcode != TGSI_OPCODE_ARL) + return FALSE; + dst = tgsi_dst(vpc, &finst->Dst[0]); mask = tgsi_mask(finst->Dst[0].Register.WriteMask); @@ -761,7 +802,7 @@ static boolean nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) { struct tgsi_parse_context p; - int high_temp = -1, high_addr = -1, nr_imm = 0, i; + int high_const = -1, high_temp = -1, high_addr = -1, nr_imm = 0, i; struct util_semantic_set set; unsigned char sem_layout[8]; unsigned num_outputs; @@ -814,14 +855,18 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) fdec->Range.Last; } break; -#if 0 /* this would be nice.. except gallium doesn't track it */ case TGSI_FILE_ADDRESS: if (fdec->Range.Last > high_addr) { high_addr = fdec->Range.Last; } break; -#endif + case TGSI_FILE_CONSTANT: + if (fdec->Range.Last > high_const) { + high_const = + fdec->Range.Last; + } + break; case TGSI_FILE_OUTPUT: if (!nvfx_vertprog_parse_decl_output(nvfx, vpc, fdec)) return FALSE; @@ -831,23 +876,6 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) } } break; -#if 1 /* yay, parse instructions looking for address regs instead */ - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - const struct tgsi_full_instruction *finst; - const struct tgsi_full_dst_register *fdst; - - finst = &p.FullToken.FullInstruction; - fdst = &finst->Dst[0]; - - if (fdst->Register.File == TGSI_FILE_ADDRESS) { - if (fdst->Register.Index > high_addr) - high_addr = fdst->Register.Index; - } - - } - break; -#endif default: break; } @@ -868,7 +896,13 @@ nvfx_vertprog_prepare(struct nvfx_context* nvfx, struct nvfx_vpc *vpc) if (++high_addr) { vpc->r_address = CALLOC(high_addr, sizeof(struct nvfx_reg)); for (i = 0; i < high_addr; i++) - vpc->r_address[i] = temp(vpc); + vpc->r_address[i] = nvfx_reg(NVFXSR_TEMP, i); + } + + if(++high_const) { + vpc->r_const = CALLOC(high_const, sizeof(struct nvfx_reg)); + for (i = 0; i < high_const; i++) + vpc->r_const[i] = constant(vpc, i, 0, 0, 0, 0); } vpc->r_temps_discard = 0; @@ -1037,6 +1071,8 @@ out_err: FREE(vpc->r_temp); if (vpc->r_address) FREE(vpc->r_address); + if (vpc->r_const) + FREE(vpc->r_const); if (vpc->imm) FREE(vpc->imm); FREE(vpc); @@ -1116,6 +1152,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } } + //printf("start at %u nc %u\n", vp->data->start, vp->nr_consts); + /*XXX: handle this some day */ assert(vp->data->start >= vp->data_start_min); @@ -1161,6 +1199,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) struct nvfx_relocation* reloc = (struct nvfx_relocation*)((char*)vp->const_relocs.data + i); struct nvfx_vertex_program_exec *vpi = &vp->insns[reloc->location]; + //printf("reloc %i to %i + %i\n", reloc->location, vp->data->start, reloc->target); + vpi->data[1] &= ~NVFX_VP(INST_CONST_SRC_MASK); vpi->data[1] |= (reloc->target + vp->data->start) << @@ -1178,6 +1218,16 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) if (constbuf) map = (float*)nvfx_buffer(constbuf)->data; + /* + for (i = 0; i < 512; i++) { + float v[4] = {0.1, 0,2, 0.3, 0.4}; + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (chan, i); + OUT_RINGp (chan, (uint32_t *)v, 4); + printf("frob %i\n", i); + } + */ + for (i = nvfx->use_vp_clipping ? 6 : 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; @@ -1190,6 +1240,8 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) 4 * sizeof(float)); } + //printf("upload into %i + %i: %f %f %f %f\n", vp->data->start, i, vpd->value[0], vpd->value[1], vpd->value[2], vpd->value[3]); + BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_CONST_ID, 5); OUT_RING (chan, i + vp->data->start); OUT_RINGp (chan, (uint32_t *)vpd->value, 4); @@ -1202,6 +1254,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) OUT_RING (chan, vp->exec->start); for (i = 0; i < vp->nr_insns; i++) { BEGIN_RING(chan, eng3d, NV34TCL_VP_UPLOAD_INST(0), 4); + //printf("%08x %08x %08x %08x\n", vp->insns[i].data[0], vp->insns[i].data[1], vp->insns[i].data[2], vp->insns[i].data[3]); OUT_RINGp (chan, vp->insns[i].data, 4); } vp->clip_nr = -1; -- 2.30.2