freedreno/a3xx: allow num_samplers != num_textures
[mesa.git] / src / gallium / drivers / freedreno / a3xx / fd3_program.c
index d84bbe9c36fad299bb42adf45f806a88d7463ba2..7bb96faa8999f5aacd16d5991e530cf65f341853 100644 (file)
@@ -98,15 +98,8 @@ create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
                tgsi_dump(cso->tokens, 0);
        }
 
-       if (type == SHADER_FRAGMENT) {
-               /* we seem to get wrong colors (maybe swap/endianess or hw issue?)
-                * with full precision color reg.  And blob driver only seems to
-                * use half precision register for color output (that I can find
-                * so far), even with highp precision.  So for force half precision
-                * for frag shader:
-                */
+       if ((type == SHADER_FRAGMENT) && (fd_mesa_debug & FD_DBG_FRAGHALF))
                so->half_precision = true;
-       }
 
        ret = fd3_compile_shader(so, cso->tokens);
        if (ret) {
@@ -186,7 +179,8 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
 {
        struct ir3_shader_info *si = &so->info;
        enum adreno_state_block sb;
-       uint32_t i, *bin;
+       enum adreno_state_src src;
+       uint32_t i, sz, *bin;
 
        if (so->type == SHADER_VERTEX) {
                sb = SB_VERT_SHADER;
@@ -194,17 +188,31 @@ emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
                sb = SB_FRAG_SHADER;
        }
 
-       // XXX use SS_INDIRECT
-       bin = fd_bo_map(so->bo);
-       OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
+       if (fd_mesa_debug & FD_DBG_DIRECT) {
+               sz = si->sizedwords;
+               src = SS_DIRECT;
+               bin = fd_bo_map(so->bo);
+       } else {
+               sz = 0;
+               src = SS_INDIRECT;
+               bin = NULL;
+       }
+
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
        OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
-                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
                        CP_LOAD_STATE_0_STATE_BLOCK(sb) |
                        CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
-       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
-                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
-       for (i = 0; i < si->sizedwords; i++)
+       if (bin) {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+       } else {
+               OUT_RELOC(ring, so->bo, 0,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+       }
+       for (i = 0; i < sz; i++) {
                OUT_RING(ring, bin[i]);
+       }
 }
 
 void
@@ -223,6 +231,10 @@ fd3_program_emit(struct fd_ringbuffer *ring,
 
        OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
        OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                       /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
+                        * flush some caches? I think we only need to set those
+                        * bits if we have updated const or shader..
+                        */
                        A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
                        A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
        OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |