a4xx: add noperspective interpolation support
authorIlia Mirkin <imirkin@alum.mit.edu>
Sun, 5 Jul 2020 02:13:32 +0000 (22:13 -0400)
committerMarge Bot <eric+marge@anholt.net>
Mon, 6 Jul 2020 17:35:56 +0000 (17:35 +0000)
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5753>

src/freedreno/registers/a4xx.xml
src/gallium/drivers/freedreno/a4xx/fd4_emit.c
src/gallium/drivers/freedreno/a4xx/fd4_program.c

index 454ee596de8eb05309a0afd24149f700e8be155a..596f722e94faf58beb0a2c794b7da55cdefe295c 100644 (file)
@@ -923,7 +923,13 @@ perhaps they should be taken with a grain of salt
                <bitfield name="SAMPLEID" pos="6" type="boolean"/>
                <bitfield name="MSAA_SAMPLES" low="7" high="9" type="uint"/>
                <bitfield name="SAMPLEID_HR" pos="11" type="boolean"/>
                <bitfield name="SAMPLEID" pos="6" type="boolean"/>
                <bitfield name="MSAA_SAMPLES" low="7" high="9" type="uint"/>
                <bitfield name="SAMPLEID_HR" pos="11" type="boolean"/>
-               <bitfield name="VARYING" pos="12" type="boolean"/>
+               <bitfield name="IJ_PERSP_PIXEL" pos="12" type="boolean"/>
+               <!-- the 2 below are just educated guesses -->
+               <bitfield name="IJ_PERSP_CENTROID" pos="13" type="boolean"/>
+               <bitfield name="IJ_PERSP_SAMPLE" pos="14" type="boolean"/>
+               <!-- needs to be enabled to get nopersp values,
+                    perhaps other cases too? -->
+               <bitfield name="SIZE" pos="15" type="boolean"/>
        </reg32>
        <array offset="0x20a4" name="RB_MRT" stride="5" length="8">
                <reg32 offset="0x0" name="CONTROL">
        </reg32>
        <array offset="0x20a4" name="RB_MRT" stride="5" length="8">
                <reg32 offset="0x0" name="CONTROL">
@@ -1944,10 +1950,9 @@ perhaps they should be taken with a grain of salt
                <bitfield name="ZFAR_CLIP_DISABLE" pos="17" type="boolean"/>
                <bitfield name="ZERO_GB_SCALE_Z" pos="22" type="boolean"/>
        </reg32>
                <bitfield name="ZFAR_CLIP_DISABLE" pos="17" type="boolean"/>
                <bitfield name="ZERO_GB_SCALE_Z" pos="22" type="boolean"/>
        </reg32>
-       <reg32 offset="0x2003" name="GRAS_CLEAR_CNTL">
-               <!-- probably not the right name, but.. -->
-               <!-- bit0 set for everything *but* fastclear -->
-               <bitfield name="NOT_FASTCLEAR" pos="0" type="boolean"/>
+       <reg32 offset="0x2003" name="GRAS_CNTL">
+               <bitfield name="IJ_PERSP" pos="0" type="boolean"/>
+               <bitfield name="IJ_LINEAR" pos="1" type="boolean"/>
        </reg32>
        <reg32 offset="0x2004" name="GRAS_CL_GB_CLIP_ADJ">
                <bitfield name="HORZ" low="0" high="9" type="uint"/>
        </reg32>
        <reg32 offset="0x2004" name="GRAS_CL_GB_CLIP_ADJ">
                <bitfield name="HORZ" low="0" high="9" type="uint"/>
@@ -2065,7 +2070,7 @@ perhaps they should be taken with a grain of salt
                <bitfield name="IJ_LINEAR_CENTROID" low="24" high="31" type="a3xx_regid"/>
        </reg32>
        <!-- 0x23c4 3 regids, lowest one goes to 0 when *not* per-sample shading -->
                <bitfield name="IJ_LINEAR_CENTROID" low="24" high="31" type="a3xx_regid"/>
        </reg32>
        <!-- 0x23c4 3 regids, lowest one goes to 0 when *not* per-sample shading -->
-       <reg32 offset="0xb985" name="HLSQ_CONTROL_4_REG">
+       <reg32 offset="0x23c4" name="HLSQ_CONTROL_4_REG">
                <bitfield name="IJ_PERSP_SAMPLE" low="0" high="7" type="a3xx_regid"/>
                <bitfield name="IJ_LINEAR_SAMPLE" low="8" high="15" type="a3xx_regid"/>
        </reg32>
                <bitfield name="IJ_PERSP_SAMPLE" low="0" high="7" type="a3xx_regid"/>
                <bitfield name="IJ_LINEAR_SAMPLE" low="8" high="15" type="a3xx_regid"/>
        </reg32>
index 95c2dc7d639aefa396ccc750b4cb5621cbb5b156..e6848a1aecc1e882378a73cb8d0d85282921ddcd 100644 (file)
@@ -924,9 +924,6 @@ fd4_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring)
        OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
        OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
 
        OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
        OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
 
-       OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
-       OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
-
        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
        OUT_RING(ring, 0x0);
 
        OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
        OUT_RING(ring, 0x0);
 
index 4e98be45999f5fa8f2f5b7e8200e2f65f000842d..ef44ce57168eb66cb3123082a1a15aa7ea91196a 100644 (file)
@@ -164,7 +164,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
 {
        struct stage s[MAX_STAGES];
        uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
 {
        struct stage s[MAX_STAGES];
        uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
-       uint32_t face_regid, coord_regid, zwcoord_regid, vcoord_regid, lcoord_regid;
+       uint32_t face_regid, coord_regid, zwcoord_regid, ij_regid[IJ_COUNT];
        enum a3xx_threadsize fssz;
        int constmode;
        int i, j;
        enum a3xx_threadsize fssz;
        int constmode;
        int i, j;
@@ -209,17 +209,8 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
        face_regid      = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
        coord_regid     = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
        zwcoord_regid   = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
        face_regid      = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE);
        coord_regid     = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD);
        zwcoord_regid   = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
-       vcoord_regid    = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
-       lcoord_regid    = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
-
-       /* XXX since we don't know how to support noperspective varyings on a4xx,
-        * use this little hack to support u_blitter, which should be the only
-        * case with noperspective varyings on a4xx:
-        */
-       if (VALIDREG(lcoord_regid)) {
-               assert(!VALIDREG(vcoord_regid));
-               vcoord_regid = lcoord_regid;
-       }
+       for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
+               ij_regid[i] = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
 
        /* we could probably divide this up into things that need to be
         * emitted if frag-prog is dirty vs if vert-prog is dirty..
 
        /* we could probably divide this up into things that need to be
         * emitted if frag-prog is dirty vs if vert-prog is dirty..
@@ -245,8 +236,10 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
        OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
                        0x3f3f000 |           /* XXX */
                        A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
        OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
                        0x3f3f000 |           /* XXX */
                        A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
-       OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(vcoord_regid) |
-                       0xfcfcfc00);
+       /* XXX left out centroid/sample for now */
+       OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) |
+                       A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) |
+                       0xfcfc0000);
        OUT_RING(ring, 0x00fcfcfc);   /* XXX HLSQ_CONTROL_4 */
 
        OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
        OUT_RING(ring, 0x00fcfcfc);   /* XXX HLSQ_CONTROL_4 */
 
        OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
@@ -392,9 +385,17 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
        OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
                        A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
 
        OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
                        A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
 
+       OUT_PKT0(ring, REG_A4XX_GRAS_CNTL, 1);
+       OUT_RING(ring,
+                       CONDREG(face_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
+                       CONDREG(zwcoord_regid, A4XX_GRAS_CNTL_IJ_PERSP) |
+                       CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_GRAS_CNTL_IJ_PERSP) |
+                       CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_GRAS_CNTL_IJ_LINEAR));
+
        OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
        OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
        OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
        OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
-                       COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) |
+                       CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_RB_RENDER_CONTROL2_IJ_PERSP_PIXEL) |
+                       CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_RB_RENDER_CONTROL2_SIZE) |
                        COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
                        COND(s[FS].v->fragcoord_compmask != 0,
                                        A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask)));
                        COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
                        COND(s[FS].v->fragcoord_compmask != 0,
                                        A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask)));