X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnvfx%2Fnvfx_vertprog.c;h=939d2b83aee4ab3feda1e00aadd5afaabb1c2b4c;hb=8eb0fc430a8c1687627156a06faf5762144022f3;hp=2d243be16a35ff1ce245f0b0917292b2af308c4b;hpb=e5c7d1e1c8ccb493c63e33d017c28b5cf4a55829;p=mesa.git diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c index 2d243be16a3..939d2b83aee 100644 --- a/src/gallium/drivers/nvfx/nvfx_vertprog.c +++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c @@ -10,6 +10,7 @@ #include "nvfx_context.h" #include "nvfx_state.h" +#include "nvfx_resource.h" /* TODO (at least...): * 1. Indexed consts + ARL @@ -299,7 +300,13 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT))); if(!nvfx->is_nv4x) { - hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + if(slot == 0) + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); + else + { + hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT); + hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT); + } // hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK); // hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT)); @@ -334,7 +341,7 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op, static INLINE struct nvfx_sreg tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { - struct nvfx_sreg src; + struct nvfx_sreg src = { 0 }; switch (fsrc->Register.File) { case TGSI_FILE_INPUT: @@ -365,7 +372,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) { static INLINE struct nvfx_sreg tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) { - struct nvfx_sreg dst; + struct nvfx_sreg dst = { 0 }; switch (fdst->Register.File) { case TGSI_FILE_OUTPUT: @@ -479,6 +486,9 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_ARL: arith(vpc, VEC, ARL, dst, mask, src[0], none, none); break; + case TGSI_OPCODE_COS: + arith(vpc, SCA, COS, dst, mask, none, none, src[0]); + break; case TGSI_OPCODE_DP3: arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none); break; @@ -512,6 +522,11 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_LOG: arith(vpc, SCA, LOG, dst, mask, none, none, src[0]); break; + case TGSI_OPCODE_LRP: + tmp = temp(vpc); + arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp); + break; case TGSI_OPCODE_MAD: arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]); break; @@ -544,15 +559,36 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, case TGSI_OPCODE_RSQ: arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0])); break; + case TGSI_OPCODE_SEQ: + arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SGE: arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none); break; case TGSI_OPCODE_SGT: arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SIN: + arith(vpc, SCA, SIN, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SLE: + arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SLT: arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none); break; + case TGSI_OPCODE_SNE: + arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SSG: + arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(vpc, VEC, STR, dst, mask, src[0], src[1], none); + break; case TGSI_OPCODE_SUB: arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1])); break; @@ -830,15 +866,15 @@ out_err: FREE(vpc); } -static boolean +boolean nvfx_vertprog_validate(struct nvfx_context *nvfx) { - struct pipe_screen *pscreen = nvfx->pipe.screen; + struct pipe_context *pipe = &nvfx->pipe; struct nvfx_screen *screen = nvfx->screen; struct nouveau_channel *chan = screen->base.channel; struct nouveau_grobj *eng3d = screen->eng3d; struct nvfx_vertex_program *vp; - struct pipe_buffer *constbuf; + struct pipe_resource *constbuf; boolean upload_code = FALSE, upload_data = FALSE; int i; @@ -846,6 +882,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) vp = nvfx->vertprog; constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX]; + // TODO: ouch! can't we just use constant slots for these?! if ((nvfx->dirty & NVFX_NEW_UCP) || memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) { nvfx_vertprog_destroy(nvfx, vp); @@ -857,21 +894,19 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx) } /* Translate TGSI shader into hw bytecode */ - if (vp->translated) - goto check_gpu_resources; - - nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; + if (!vp->translated) + { + nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG; nvfx_vertprog_translate(nvfx, vp); - if (!vp->translated) { - nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; + if (!vp->translated) { + nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG; return FALSE; + } } -check_gpu_resources: /* Allocate hw vtxprog exec slots */ if (!vp->exec) { struct nouveau_resource *heap = nvfx->screen->vp_exec_heap; - struct nouveau_stateobj *so; uint vplen = vp->nr_insns; if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) { @@ -886,19 +921,6 @@ check_gpu_resources: assert(0); } - so = so_new(3, 4, 0); - so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1); - so_data (so, vp->exec->start); - if(nvfx->is_nv4x) { - so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2); - so_data (so, vp->ir); - so_data (so, vp->or); - } - so_method(so, eng3d, NV34TCL_VP_CLIP_PLANES_ENABLE, 1); - so_data (so, vp->clip_ctrl); - so_ref(so, &vp->so); - so_ref(NULL, &so); - upload_code = TRUE; } @@ -961,10 +983,8 @@ check_gpu_resources: if (vp->nr_consts) { float *map = NULL; - if (constbuf) { - map = pipe_buffer_map(pscreen, constbuf, - PIPE_BUFFER_USAGE_CPU_READ); - } + if (constbuf) + map = nvfx_buffer(constbuf)->data; for (i = 0; i < vp->nr_consts; i++) { struct nvfx_vertex_program_data *vpd = &vp->consts[i]; @@ -982,9 +1002,6 @@ check_gpu_resources: OUT_RING (chan, i + vp->data->start); OUT_RINGp (chan, (uint32_t *)vpd->value, 4); } - - if (constbuf) - pipe_buffer_unmap(pscreen, constbuf); } /* Upload vtxprog */ @@ -1005,12 +1022,21 @@ check_gpu_resources: } } - if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) { - so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]); - return TRUE; + if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP)) + { + WAIT_RING(chan, 7); + OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1)); + OUT_RING(chan, vp->exec->start); + if(nvfx->is_nv4x) { + OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2)); + OUT_RING(chan, vp->ir); + OUT_RING(chan, vp->or); + } + OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1)); + OUT_RING(chan, vp->clip_ctrl); } - return FALSE; + return TRUE; } void @@ -1037,13 +1063,4 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp) vp->data_start_min = 0; vp->ir = vp->or = vp->clip_ctrl = 0; - so_ref(NULL, &vp->so); } - -struct nvfx_state_entry nvfx_state_vertprog = { - .validate = nvfx_vertprog_validate, - .dirty = { - .pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP, - .hw = NVFX_STATE_VERTPROG, - } -};