r300: Set correct VAP_CNTL per vertex program.
authorMarkus Amsler <markus.amsler@oribi.org>
Sat, 3 May 2008 16:55:45 +0000 (12:55 -0400)
committerAlex Deucher <alex@botch2.com>
Sat, 3 May 2008 16:55:45 +0000 (12:55 -0400)
adapted from Markus' patch on bug 15386 with updates for non-TCL
and R500.

src/mesa/drivers/dri/r300/r300_ioctl.c
src/mesa/drivers/dri/r300/r300_state.c

index d38ee9003d27345013871b6a4c127f28ad8987fb..ffcde7ff35446a9cd7792b407f9969ae4189b9b8 100644 (file)
@@ -187,6 +187,7 @@ static void r300EmitClearState(GLcontext * ctx)
        drm_radeon_cmd_header_t *cmd = NULL;
        int has_tcl = 1;
        int is_r500 = 0;
+       GLuint vap_cntl;
 
        if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
                has_tcl = 0;
@@ -206,11 +207,9 @@ static void r300EmitClearState(GLcontext * ctx)
        R300_STATECHANGE(r300, vir[0]);
        reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0);
        if (!has_tcl)
-           /*e32(0x22030003);*/
            e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
                 ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
        else
-           /*e32(0x21030003);*/
            e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
                 ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
 
@@ -425,6 +424,35 @@ static void r300EmitClearState(GLcontext * ctx)
                    R500_ALU_RGBA_A_SWIZ_0);
        }
 
+       if (has_tcl) {
+           vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+                       (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+                       (12 << R300_VF_MAX_VTX_NUM_SHIFT));
+           if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+               vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
+       } else
+           vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+                       (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+                       (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+       if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+           vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+       else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560))
+           vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+       else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
+           vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+       else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+           vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+       else
+           vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+
+       R300_STATECHANGE(rmesa, vap_cntl);
+       reg_start(R300_VAP_CNTL, 0);
+       e32(vap_cntl);
+
        if (has_tcl) {
                R300_STATECHANGE(r300, pvs);
                reg_start(R300_VAP_PVS_CNTL_1, 2);
index 21b5ac969b3a2f7f02ff2a8fd43b0199f76fc34f..10002e3c4f8b8be5b850d4a0c13d9121837e0ff2 100644 (file)
@@ -1811,10 +1811,70 @@ static inline void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest,
        }
 }
 
+#define MIN3(a,b,c) ((a)<(b) ? MIN2(a, c): MIN2(b, c))
+
+static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count)
+{
+    int vtx_mem_size;
+    int cmd_reserved = 0;
+    int cmd_written = 0;
+    drm_radeon_cmd_header_t *cmd = NULL;
+    int pvs_num_slots;
+    int pvs_num_cntrls;
+
+    /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS.
+     * See r500 docs 6.5.2 */
+    reg_start(R300_VAP_PVS_WAITIDLE, 0);
+    e32(0x00000000);
+
+    /* avoid division by zero */
+    if (input_count == 0) input_count = 1;
+    if (output_count == 0) output_count = 1;
+    if (temp_count == 0) temp_count = 1;
+
+    if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+       vtx_mem_size = 128;
+    else
+       vtx_mem_size = 72;
+
+    pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count);
+    pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
+
+    R300_STATECHANGE(rmesa, vap_cntl);
+    if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
+       rmesa->hw.vap_cntl.cmd[1] = 
+           (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
+           (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
+           (12 << R300_VF_MAX_VTX_NUM_SHIFT);
+       if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+           rmesa->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION;
+    } else
+       /* not sure about non-tcl */
+       rmesa->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+                                   (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+                                   (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+    if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
+       rmesa->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+    else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
+            (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560))
+       rmesa->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+    else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
+       rmesa->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+    else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+            (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+            (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+       rmesa->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+    else
+       rmesa->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+
+}
+
 static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
 {
        struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader);
        GLuint o_reg = 0;
+       GLuint i_reg = 0;
        int i;
        int inst_count = 0;
        int param_count = 0;
@@ -1827,6 +1887,7 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
                        prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
                        prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
                        program_end += 4;
+                       i_reg++;
                }
        }
 
@@ -1836,6 +1897,8 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
                                       &(prog->program));
        inst_count = (prog->program.length / 4) - 1;
 
+       r300VapCntl(rmesa, i_reg, o_reg, 0);
+
        R300_STATECHANGE(rmesa, pvs);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
            (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
@@ -1849,6 +1912,15 @@ static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa)
            (inst_count << R300_PVS_CNTL_3_PROGRAM_UNKNOWN2_SHIFT);
 }
 
+static int bit_count (int x)
+{
+    x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U);
+    x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U);
+    x = (x >> 16) + (x & 0xffff);
+    x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f);
+    return (x >> 8) + (x & 0x00ff);
+}
+
 static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
 {
        GLcontext *ctx = rmesa->radeon.glCtx;
@@ -1870,6 +1942,9 @@ static void r300SetupRealVertexProgram(r300ContextPtr rmesa)
        r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program));
        inst_count = (prog->program.length / 4) - 1;
 
+       r300VapCntl(rmesa, bit_count(prog->key.InputsRead),
+                   bit_count(prog->key.OutputsWritten), prog->num_temporaries);
+
        R300_STATECHANGE(rmesa, pvs);
        rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] =
          (0 << R300_PVS_CNTL_1_PROGRAM_START_SHIFT) |
@@ -1903,13 +1978,6 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa)
                r300SetupDefaultVertexProgram(rmesa);
        }
 
-
-       /* FIXME: This is done for vertex shader fragments, but also needs to be
-        * done for vap_pvs, so I leave it as a reminder. */
-#if 0
-       reg_start(R300_VAP_PVS_WAITIDLE, 0);
-       e32(0x00000000);
-#endif
 }
 
 /**
@@ -2011,35 +2079,6 @@ static void r300ResetHwState(r300ContextPtr r300)
        r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
        r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
 
-       /* setup the VAP */
-       /* for tcl, PVS_NUM_SLOTS, PVS_NUM_CNTLRS, VF_MAX_VTX_NUM need to be adjusted
-        * dynamically.  PVS_NUM_FPUS is fixed based on asic
-        */
-       if (has_tcl) {
-           r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-                                       (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-                                       (12 << R300_VF_MAX_VTX_NUM_SHIFT));
-           if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
-               r300->hw.vap_cntl.cmd[1] |= R500_TCL_STATE_OPTIMIZATION;
-       } else
-           r300->hw.vap_cntl.cmd[1] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-                                       (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-                                       (5 << R300_VF_MAX_VTX_NUM_SHIFT));
-
-       if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
-           r300->hw.vap_cntl.cmd[1] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
-       else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
-                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560))
-           r300->hw.vap_cntl.cmd[1] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
-       else if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)
-           r300->hw.vap_cntl.cmd[1] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
-       else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
-                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
-                (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
-           r300->hw.vap_cntl.cmd[1] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
-       else
-           r300->hw.vap_cntl.cmd[1] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
-
        r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
            | R300_VPORT_X_OFFSET_ENA
            | R300_VPORT_Y_SCALE_ENA