From 529dbbfcf7a674f2d82eed5e88ce92615721d5f2 Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 15 Mar 2013 23:39:01 +0100 Subject: [PATCH] nvc0: fix max varying count, move CLIPVERTEX,FOG out of the way The card spews an error if I use all 128 generic slots. Apparently the real limit isn't just dictated by the address space layout. --- .../nv50/codegen/nv50_ir_from_tgsi.cpp | 23 +++++++++++++++---- src/gallium/drivers/nvc0/nvc0_program.c | 13 +++++++---- src/gallium/drivers/nvc0/nvc0_screen.c | 12 ++++++++-- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 613187c0a12..b546429922d 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -1009,7 +1009,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) else info->out[dst.getIndex(0)].mask |= dst.getMask(); - if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE) + if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID || + info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG) info->out[dst.getIndex(0)].mask &= 1; if (isEdgeFlagPassthrough(insn)) @@ -1040,14 +1042,25 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) for (unsigned i = 0; i < info->numInputs; ++i) info->in[i].mask = 0xf; } else { + const int i = src.getIndex(0); for (unsigned c = 0; c < 4; ++c) { if (!(mask & (1 << c))) continue; int k = src.getSwizzle(c); - int i = src.getIndex(0); - if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X) - if (k <= TGSI_SWIZZLE_W) - info->in[i].mask |= 1 << k; + if (k <= TGSI_SWIZZLE_W) + info->in[i].mask |= 1 << k; + } + switch (info->in[i].sn) { + case TGSI_SEMANTIC_PSIZE: + case TGSI_SEMANTIC_PRIMID: + case TGSI_SEMANTIC_FOG: + info->in[i].mask &= 0x1; + break; + case TGSI_SEMANTIC_PCOORD: + info->in[i].mask &= 0x3; + break; + default: + break; } } } diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c index 2f1b41704a5..22dfaf91aa2 100644 --- a/src/gallium/drivers/nvc0/nvc0_program.c +++ b/src/gallium/drivers/nvc0/nvc0_program.c @@ -27,6 +27,9 @@ #include "nv50/codegen/nv50_ir_driver.h" #include "nve4_compute.h" +/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than + * 124 scalar varying values. + */ static uint32_t nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) { @@ -36,12 +39,12 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) case TGSI_SEMANTIC_PSIZE: return 0x06c; case TGSI_SEMANTIC_POSITION: return 0x070; case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; - case TGSI_SEMANTIC_FOG: return 0x270; + case TGSI_SEMANTIC_FOG: return 0x2e8; case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; - case TGSI_SEMANTIC_CLIPVERTEX: return 0x260; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; case TGSI_SEMANTIC_PCOORD: return 0x2e0; case NV50_SEMANTIC_TESSCOORD: return 0x2f0; case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; @@ -66,12 +69,12 @@ nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) case TGSI_SEMANTIC_PSIZE: return 0x06c; case TGSI_SEMANTIC_POSITION: return 0x070; case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; - case TGSI_SEMANTIC_FOG: return 0x270; + case TGSI_SEMANTIC_FOG: return 0x2e8; case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; - case TGSI_SEMANTIC_CLIPVERTEX: return 0x260; + case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; case TGSI_SEMANTIC_EDGEFLAG: return ~0; default: @@ -440,7 +443,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) } else if (info->in[i].slot[0] >= (0x2c0 / 4) && info->in[i].slot[0] <= (0x2fc / 4)) { - fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x03ff0000; + fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; } else { if (info->in[i].slot[c] < (0x040 / 4) || info->in[i].slot[c] > (0x380 / 4)) diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c index e77b819b651..73cf0af4d1d 100644 --- a/src/gallium/drivers/nvc0/nvc0_screen.c +++ b/src/gallium/drivers/nvc0/nvc0_screen.c @@ -221,9 +221,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_INPUTS: if (shader == PIPE_SHADER_VERTEX) return 32; + /* NOTE: These only count our slots for GENERIC varyings. + * The address space may be larger, but the actual hard limit seems to be + * less than what the address space layout permits, so don't add TEXCOORD, + * COLOR, etc. here. + */ if (shader == PIPE_SHADER_FRAGMENT) - return (0x200 + 0x20 + 0x80) / 16; /* generic + colors + TexCoords */ - return (0x200 + 0x40 + 0x80) / 16; /* without 0x60 for per-patch inputs */ + return 0x1f0 / 16; + /* Actually this counts CLIPVERTEX, which occupies the last generic slot, + * and excludes 0x60 per-patch inputs. + */ + return 0x200 / 16; case PIPE_SHADER_CAP_MAX_CONSTS: return 65536 / 16; case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: -- 2.30.2