X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fnvfx%2Fnvfx_vertprog.c;h=939d2b83aee4ab3feda1e00aadd5afaabb1c2b4c;hb=8eb0fc430a8c1687627156a06faf5762144022f3;hp=2d243be16a35ff1ce245f0b0917292b2af308c4b;hpb=e5c7d1e1c8ccb493c63e33d017c28b5cf4a55829;p=mesa.git

diff --git a/src/gallium/drivers/nvfx/nvfx_vertprog.c b/src/gallium/drivers/nvfx/nvfx_vertprog.c
index 2d243be16a3..939d2b83aee 100644
--- a/src/gallium/drivers/nvfx/nvfx_vertprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_vertprog.c
@@ -10,6 +10,7 @@
 
 #include "nvfx_context.h"
 #include "nvfx_state.h"
+#include "nvfx_resource.h"
 
 /* TODO (at least...):
  *  1. Indexed consts  + ARL
@@ -299,7 +300,13 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
 		  (3 << NVFX_VP(INST_COND_SWZ_W_SHIFT)));
 
 	if(!nvfx->is_nv4x) {
-		hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+		if(slot == 0)
+			hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT);
+		else
+		{
+			hw[0] |= ((op >> 4) << NV30_VP_INST_SCA_OPCODEH_SHIFT);
+			hw[1] |= ((op & 0xf) << NV30_VP_INST_SCA_OPCODEL_SHIFT);
+		}
 //		hw[3] |= NVFX_VP(INST_SCA_DEST_TEMP_MASK);
 //		hw[3] |= (mask << NVFX_VP(INST_VEC_WRITEMASK_SHIFT));
 
@@ -334,7 +341,7 @@ nvfx_vp_arith(struct nvfx_context* nvfx, struct nvfx_vpc *vpc, int slot, int op,
 
 static INLINE struct nvfx_sreg
 tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
-	struct nvfx_sreg src;
+	struct nvfx_sreg src = { 0 };
 
 	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
@@ -365,7 +372,7 @@ tgsi_src(struct nvfx_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 
 static INLINE struct nvfx_sreg
 tgsi_dst(struct nvfx_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
-	struct nvfx_sreg dst;
+	struct nvfx_sreg dst = { 0 };
 
 	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
@@ -479,6 +486,9 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
 	case TGSI_OPCODE_ARL:
 		arith(vpc, VEC, ARL, dst, mask, src[0], none, none);
 		break;
+	case TGSI_OPCODE_COS:
+		arith(vpc, SCA, COS, dst, mask, none, none, src[0]);
+		break;
 	case TGSI_OPCODE_DP3:
 		arith(vpc, VEC, DP3, dst, mask, src[0], src[1], none);
 		break;
@@ -512,6 +522,11 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
 	case TGSI_OPCODE_LOG:
 		arith(vpc, SCA, LOG, dst, mask, none, none, src[0]);
 		break;
+	case TGSI_OPCODE_LRP:
+		tmp = temp(vpc);
+		arith(vpc, VEC, MAD, tmp, mask, neg(src[0]), src[2], src[2]);
+		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], tmp);
+		break;
 	case TGSI_OPCODE_MAD:
 		arith(vpc, VEC, MAD, dst, mask, src[0], src[1], src[2]);
 		break;
@@ -544,15 +559,36 @@ nvfx_vertprog_parse_instruction(struct nvfx_context* nvfx, struct nvfx_vpc *vpc,
 	case TGSI_OPCODE_RSQ:
 		arith(vpc, SCA, RSQ, dst, mask, none, none, abs(src[0]));
 		break;
+	case TGSI_OPCODE_SEQ:
+		arith(vpc, VEC, SEQ, dst, mask, src[0], src[1], none);
+		break;
+	case TGSI_OPCODE_SFL:
+		arith(vpc, VEC, SFL, dst, mask, src[0], src[1], none);
+		break;
 	case TGSI_OPCODE_SGE:
 		arith(vpc, VEC, SGE, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_SGT:
 		arith(vpc, VEC, SGT, dst, mask, src[0], src[1], none);
 		break;
+	case TGSI_OPCODE_SIN:
+		arith(vpc, SCA, SIN, dst, mask, none, none, src[0]);
+		break;
+	case TGSI_OPCODE_SLE:
+		arith(vpc, VEC, SLE, dst, mask, src[0], src[1], none);
+		break;
 	case TGSI_OPCODE_SLT:
 		arith(vpc, VEC, SLT, dst, mask, src[0], src[1], none);
 		break;
+	case TGSI_OPCODE_SNE:
+		arith(vpc, VEC, SNE, dst, mask, src[0], src[1], none);
+		break;
+	case TGSI_OPCODE_SSG:
+		arith(vpc, VEC, SSG, dst, mask, src[0], src[1], none);
+		break;
+	case TGSI_OPCODE_STR:
+		arith(vpc, VEC, STR, dst, mask, src[0], src[1], none);
+		break;
 	case TGSI_OPCODE_SUB:
 		arith(vpc, VEC, ADD, dst, mask, src[0], none, neg(src[1]));
 		break;
@@ -830,15 +866,15 @@ out_err:
 	FREE(vpc);
 }
 
-static boolean
+boolean
 nvfx_vertprog_validate(struct nvfx_context *nvfx)
 {
-	struct pipe_screen *pscreen = nvfx->pipe.screen;
+	struct pipe_context *pipe = &nvfx->pipe;
 	struct nvfx_screen *screen = nvfx->screen;
 	struct nouveau_channel *chan = screen->base.channel;
 	struct nouveau_grobj *eng3d = screen->eng3d;
 	struct nvfx_vertex_program *vp;
-	struct pipe_buffer *constbuf;
+	struct pipe_resource *constbuf;
 	boolean upload_code = FALSE, upload_data = FALSE;
 	int i;
 
@@ -846,6 +882,7 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 		vp = nvfx->vertprog;
 		constbuf = nvfx->constbuf[PIPE_SHADER_VERTEX];
 
+		// TODO: ouch! can't we just use constant slots for these?!
 		if ((nvfx->dirty & NVFX_NEW_UCP) ||
 		    memcmp(&nvfx->clip, &vp->ucp, sizeof(vp->ucp))) {
 			nvfx_vertprog_destroy(nvfx, vp);
@@ -857,21 +894,19 @@ nvfx_vertprog_validate(struct nvfx_context *nvfx)
 	}
 
 	/* Translate TGSI shader into hw bytecode */
-	if (vp->translated)
-		goto check_gpu_resources;
-
-	nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
+	if (!vp->translated)
+	{
+		nvfx->fallback_swtnl &= ~NVFX_NEW_VERTPROG;
 		nvfx_vertprog_translate(nvfx, vp);
-	if (!vp->translated) {
-		nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
+		if (!vp->translated) {
+			nvfx->fallback_swtnl |= NVFX_NEW_VERTPROG;
 			return FALSE;
+		}
 	}
 
-check_gpu_resources:
 	/* Allocate hw vtxprog exec slots */
 	if (!vp->exec) {
 		struct nouveau_resource *heap = nvfx->screen->vp_exec_heap;
-		struct nouveau_stateobj *so;
 		uint vplen = vp->nr_insns;
 
 		if (nouveau_resource_alloc(heap, vplen, vp, &vp->exec)) {
@@ -886,19 +921,6 @@ check_gpu_resources:
 				assert(0);
 		}
 
-		so = so_new(3, 4, 0);
-		so_method(so, eng3d, NV34TCL_VP_START_FROM_ID, 1);
-		so_data  (so, vp->exec->start);
-		if(nvfx->is_nv4x) {
-			so_method(so, eng3d, NV40TCL_VP_ATTRIB_EN, 2);
-			so_data  (so, vp->ir);
-			so_data  (so, vp->or);
-		}
-		so_method(so, eng3d,  NV34TCL_VP_CLIP_PLANES_ENABLE, 1);
-		so_data  (so, vp->clip_ctrl);
-		so_ref(so, &vp->so);
-		so_ref(NULL, &so);
-
 		upload_code = TRUE;
 	}
 
@@ -961,10 +983,8 @@ check_gpu_resources:
 	if (vp->nr_consts) {
 		float *map = NULL;
 
-		if (constbuf) {
-			map = pipe_buffer_map(pscreen, constbuf,
-					      PIPE_BUFFER_USAGE_CPU_READ);
-		}
+		if (constbuf)
+			map = nvfx_buffer(constbuf)->data;
 
 		for (i = 0; i < vp->nr_consts; i++) {
 			struct nvfx_vertex_program_data *vpd = &vp->consts[i];
@@ -982,9 +1002,6 @@ check_gpu_resources:
 			OUT_RING  (chan, i + vp->data->start);
 			OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
 		}
-
-		if (constbuf)
-			pipe_buffer_unmap(pscreen, constbuf);
 	}
 
 	/* Upload vtxprog */
@@ -1005,12 +1022,21 @@ check_gpu_resources:
 		}
 	}
 
-	if (vp->so != nvfx->state.hw[NVFX_STATE_VERTPROG]) {
-		so_ref(vp->so, &nvfx->state.hw[NVFX_STATE_VERTPROG]);
-		return TRUE;
+	if(nvfx->dirty & (NVFX_NEW_VERTPROG | NVFX_NEW_UCP))
+	{
+		WAIT_RING(chan, 7);
+		OUT_RING(chan, RING_3D(NV34TCL_VP_START_FROM_ID, 1));
+		OUT_RING(chan, vp->exec->start);
+		if(nvfx->is_nv4x) {
+			OUT_RING(chan, RING_3D(NV40TCL_VP_ATTRIB_EN, 2));
+			OUT_RING(chan, vp->ir);
+			OUT_RING(chan, vp->or);
+		}
+		OUT_RING(chan, RING_3D(NV34TCL_VP_CLIP_PLANES_ENABLE, 1));
+		OUT_RING(chan, vp->clip_ctrl);
 	}
 
-	return FALSE;
+	return TRUE;
 }
 
 void
@@ -1037,13 +1063,4 @@ nvfx_vertprog_destroy(struct nvfx_context *nvfx, struct nvfx_vertex_program *vp)
 	vp->data_start_min = 0;
 
 	vp->ir = vp->or = vp->clip_ctrl = 0;
-	so_ref(NULL, &vp->so);
 }
-
-struct nvfx_state_entry nvfx_state_vertprog = {
-	.validate = nvfx_vertprog_validate,
-	.dirty = {
-		.pipe = NVFX_NEW_VERTPROG | NVFX_NEW_UCP,
-		.hw = NVFX_STATE_VERTPROG,
-	}
-};