nvc0: fix max varying count, move CLIPVERTEX,FOG out of the way
authorChristoph Bumiller <e0425955@student.tuwien.ac.at>
Fri, 15 Mar 2013 22:39:01 +0000 (23:39 +0100)
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>
Wed, 20 Mar 2013 11:25:21 +0000 (12:25 +0100)
The card spews an error if I use all 128 generic slots.
Apparently the real limit isn't just dictated by the address space
layout.

src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp
src/gallium/drivers/nvc0/nvc0_program.c
src/gallium/drivers/nvc0/nvc0_screen.c

index 613187c0a121ec0329143384a1a19b7347d12a46..b546429922d61dbb18fff12a239a118de247b1ef 100644 (file)
@@ -1009,7 +1009,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          else
             info->out[dst.getIndex(0)].mask |= dst.getMask();
 
-         if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE)
+         if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE ||
+             info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PRIMID ||
+             info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_FOG)
             info->out[dst.getIndex(0)].mask &= 1;
 
          if (isEdgeFlagPassthrough(insn))
@@ -1040,14 +1042,25 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          for (unsigned i = 0; i < info->numInputs; ++i)
             info->in[i].mask = 0xf;
       } else {
+         const int i = src.getIndex(0);
          for (unsigned c = 0; c < 4; ++c) {
             if (!(mask & (1 << c)))
                continue;
             int k = src.getSwizzle(c);
-            int i = src.getIndex(0);
-            if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X)
-               if (k <= TGSI_SWIZZLE_W)
-                  info->in[i].mask |= 1 << k;
+            if (k <= TGSI_SWIZZLE_W)
+               info->in[i].mask |= 1 << k;
+         }
+         switch (info->in[i].sn) {
+         case TGSI_SEMANTIC_PSIZE:
+         case TGSI_SEMANTIC_PRIMID:
+         case TGSI_SEMANTIC_FOG:
+            info->in[i].mask &= 0x1;
+            break;
+         case TGSI_SEMANTIC_PCOORD:
+            info->in[i].mask &= 0x3;
+            break;
+         default:
+            break;
          }
       }
    }
index 2f1b41704a59e76624928d5ffe346b24bbfb94c7..22dfaf91aa2ad787893f782423288c8a49eca9d4 100644 (file)
@@ -27,6 +27,9 @@
 #include "nv50/codegen/nv50_ir_driver.h"
 #include "nve4_compute.h"
 
+/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
+ * 124 scalar varying values.
+ */
 static uint32_t
 nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
 {
@@ -36,12 +39,12 @@ nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
    case TGSI_SEMANTIC_PSIZE:        return 0x06c;
    case TGSI_SEMANTIC_POSITION:     return 0x070;
    case TGSI_SEMANTIC_GENERIC:      return ubase + si * 0x10;
-   case TGSI_SEMANTIC_FOG:          return 0x270;
+   case TGSI_SEMANTIC_FOG:          return 0x2e8;
    case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
    case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
    case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
    case TGSI_SEMANTIC_CLIPDIST:     return 0x2c0 + si * 0x10;
-   case TGSI_SEMANTIC_CLIPVERTEX:   return 0x260;
+   case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
    case TGSI_SEMANTIC_PCOORD:       return 0x2e0;
    case NV50_SEMANTIC_TESSCOORD:    return 0x2f0;
    case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
@@ -66,12 +69,12 @@ nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
    case TGSI_SEMANTIC_PSIZE:         return 0x06c;
    case TGSI_SEMANTIC_POSITION:      return 0x070;
    case TGSI_SEMANTIC_GENERIC:       return ubase + si * 0x10;
-   case TGSI_SEMANTIC_FOG:           return 0x270;
+   case TGSI_SEMANTIC_FOG:           return 0x2e8;
    case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
    case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
    case NV50_SEMANTIC_CLIPDISTANCE:  return 0x2c0 + si * 0x4;
    case TGSI_SEMANTIC_CLIPDIST:      return 0x2c0 + si * 0x10;
-   case TGSI_SEMANTIC_CLIPVERTEX:    return 0x260;
+   case TGSI_SEMANTIC_CLIPVERTEX:    return 0x270;
    case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10;
    case TGSI_SEMANTIC_EDGEFLAG:      return ~0;
    default:
@@ -440,7 +443,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
          } else
          if (info->in[i].slot[0] >= (0x2c0 / 4) &&
              info->in[i].slot[0] <= (0x2fc / 4)) {
-            fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x03ff0000;
+            fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
          } else {
             if (info->in[i].slot[c] < (0x040 / 4) ||
                 info->in[i].slot[c] > (0x380 / 4))
index e77b819b6513c3c1fe6ef67872912ba7186bd5bf..73cf0af4d1d679371820eeb8de8cf73382bf9c74 100644 (file)
@@ -221,9 +221,17 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_MAX_INPUTS:
       if (shader == PIPE_SHADER_VERTEX)
          return 32;
+      /* NOTE: These only count our slots for GENERIC varyings.
+       * The address space may be larger, but the actual hard limit seems to be
+       * less than what the address space layout permits, so don't add TEXCOORD,
+       * COLOR, etc. here.
+       */
       if (shader == PIPE_SHADER_FRAGMENT)
-         return (0x200 + 0x20 + 0x80) / 16; /* generic + colors + TexCoords */
-      return (0x200 + 0x40 + 0x80) / 16; /* without 0x60 for per-patch inputs */
+         return 0x1f0 / 16;
+      /* Actually this counts CLIPVERTEX, which occupies the last generic slot,
+       * and excludes 0x60 per-patch inputs.
+       */
+      return 0x200 / 16;
    case PIPE_SHADER_CAP_MAX_CONSTS:
       return 65536 / 16;
    case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: