i965: Refactor SIMD16-to-2xSIMD8 checks.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_curbe.c
index 1a828edeb969501b35ea7b580f5c629e90b94987..d0ec8595c45ce21006af94262bcd5d5ae57feaea 100644 (file)
 static void calculate_curbe_offsets( struct brw_context *brw )
 {
    struct gl_context *ctx = &brw->ctx;
-   /* CACHE_NEW_WM_PROG */
+   /* BRW_NEW_FS_PROG_DATA */
    const GLuint nr_fp_regs = (brw->wm.prog_data->base.nr_params + 15) / 16;
 
-   /* CACHE_NEW_VS_PROG */
+   /* BRW_NEW_VS_PROG_DATA */
    const GLuint nr_vp_regs = (brw->vs.prog_data->base.base.nr_params + 15) / 16;
    GLuint nr_clip_regs = 0;
    GLuint total_regs;
@@ -142,8 +142,9 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 const struct brw_tracked_state brw_curbe_offsets = {
    .dirty = {
       .mesa = _NEW_TRANSFORM,
-      .brw  = BRW_NEW_CONTEXT,
-      .cache = CACHE_NEW_VS_PROG | CACHE_NEW_WM_PROG
+      .brw  = BRW_NEW_CONTEXT |
+              BRW_NEW_FS_PROG_DATA |
+              BRW_NEW_VS_PROG_DATA,
    },
    .emit = calculate_curbe_offsets
 };
@@ -211,10 +212,12 @@ brw_upload_constant_buffer(struct brw_context *brw)
 
    /* fragment shader constants */
    if (brw->curbe.wm_size) {
+      _mesa_load_state_parameters(ctx, brw->fragment_program->Base.Parameters);
+
       /* BRW_NEW_CURBE_OFFSETS */
       GLuint offset = brw->curbe.wm_start * 16;
 
-      /* CACHE_NEW_WM_PROG | _NEW_PROGRAM_CONSTANTS: copy uniform values */
+      /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
       for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) {
         buf[offset + i] = *brw->wm.prog_data->base.param[i];
       }
@@ -251,9 +254,11 @@ brw_upload_constant_buffer(struct brw_context *brw)
 
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
+      _mesa_load_state_parameters(ctx, brw->vertex_program->Base.Parameters);
+
       GLuint offset = brw->curbe.vs_start * 16;
 
-      /* CACHE_NEW_VS_PROG | _NEW_PROGRAM_CONSTANTS: copy uniform values */
+      /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
       for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) {
          buf[offset + i] = *brw->vs.prog_data->base.base.param[i];
       }
@@ -280,6 +285,19 @@ brw_upload_constant_buffer(struct brw_context *brw)
     */
 
 emit:
+   /* Work around mysterious 965 hangs that appear to happen if you do
+    * two 3DPRIMITIVEs with only a CONSTANT_BUFFER inbetween.  If we
+    * haven't already flushed for some other reason, explicitly do so.
+    *
+    * We've found no documented reason why this should be necessary.
+    */
+   if (brw->gen == 4 && !brw->is_g4x &&
+       (brw->state.dirty.brw & (BRW_NEW_BATCH | BRW_NEW_PSP)) == 0) {
+      BEGIN_BATCH(1);
+      OUT_BATCH(MI_FLUSH);
+      ADVANCE_BATCH();
+   }
+
    /* BRW_NEW_URB_FENCE: From the gen4 PRM, volume 1, section 3.9.8
     * (CONSTANT_BUFFER (CURBE Load)):
     *
@@ -304,12 +322,12 @@ emit:
 const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
       .mesa = _NEW_PROGRAM_CONSTANTS,
-      .brw  = (BRW_NEW_URB_FENCE |
-              BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
-              BRW_NEW_CURBE_OFFSETS |
-              BRW_NEW_BATCH),
-      .cache = (CACHE_NEW_VS_PROG |
-                CACHE_NEW_WM_PROG)
+      .brw  = BRW_NEW_BATCH |
+              BRW_NEW_CURBE_OFFSETS |
+              BRW_NEW_FS_PROG_DATA |
+              BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+              BRW_NEW_URB_FENCE |
+              BRW_NEW_VS_PROG_DATA,
    },
    .emit = brw_upload_constant_buffer,
 };