i965/fs: Lower 32x32 bit multiplication on BXT.
[mesa.git] / src / mesa / drivers / dri / i965 / gen7_urb.c
index c37c110fa8e92d4b3997df258c3be585e4b08dfe..69162171c4ec184ad52952f8f29e98c26beab78f 100644 (file)
@@ -61,7 +61,8 @@ static void
 gen7_allocate_push_constants(struct brw_context *brw)
 {
    unsigned avail_size = 16;
-   unsigned multiplier = (brw->is_haswell && brw->gt == 3) ? 2 : 1;
+   unsigned multiplier =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
 
    /* BRW_NEW_GEOMETRY_PROGRAM */
    bool gs_present = brw->geometry_program;
@@ -93,7 +94,7 @@ gen7_allocate_push_constants(struct brw_context *brw)
     * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
     * commands.
     */
-   brw->state.dirty.brw |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
+   brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
 }
 
 void
@@ -120,9 +121,9 @@ gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
     *     A PIPE_CONTOL command with the CS Stall bit set must be programmed
     *     in the ring after this instruction.
     *
-    * No such restriction exists for Haswell.
+    * No such restriction exists for Haswell or Baytrail.
     */
-   if (brw->gen < 8 && !brw->is_haswell)
+   if (brw->gen < 8 && !brw->is_haswell && !brw->is_baytrail)
       gen7_emit_cs_stall_flush(brw);
 }
 
@@ -130,7 +131,6 @@ const struct brw_tracked_state gen7_push_constant_space = {
    .dirty = {
       .mesa = 0,
       .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM,
-      .cache = 0,
    },
    .emit = gen7_allocate_push_constants,
 };
@@ -138,16 +138,30 @@ const struct brw_tracked_state gen7_push_constant_space = {
 static void
 gen7_upload_urb(struct brw_context *brw)
 {
-   const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
+   const int push_size_kB =
+      (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
 
-   /* CACHE_NEW_VS_PROG */
+   /* BRW_NEW_VS_PROG_DATA */
    unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
    unsigned vs_entry_size_bytes = vs_size * 64;
-   /* BRW_NEW_GEOMETRY_PROGRAM, CACHE_NEW_GS_PROG */
+   /* BRW_NEW_GEOMETRY_PROGRAM, BRW_NEW_GS_PROG_DATA */
    bool gs_present = brw->geometry_program;
    unsigned gs_size = gs_present ? brw->gs.prog_data->base.urb_entry_size : 1;
    unsigned gs_entry_size_bytes = gs_size * 64;
 
+   /* If we're just switching between programs with the same URB requirements,
+    * skip the rest of the logic.
+    */
+   if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) &&
+       brw->urb.vsize == vs_size &&
+       brw->urb.gs_present == gs_present &&
+       brw->urb.gsize == gs_size) {
+      return;
+   }
+   brw->urb.vsize = vs_size;
+   brw->urb.gs_present = gs_present;
+   brw->urb.gsize = gs_size;
+
    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
     *
     *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
@@ -214,7 +228,7 @@ gen7_upload_urb(struct brw_context *brw)
       remaining_space = total_wants;
    if (remaining_space > 0) {
       unsigned vs_additional = (unsigned)
-         round(vs_wants * (((double) remaining_space) / total_wants));
+         roundf(vs_wants * (((float) remaining_space) / total_wants));
       vs_chunks += vs_additional;
       remaining_space -= vs_additional;
       gs_chunks += remaining_space;
@@ -261,7 +275,8 @@ gen7_upload_urb(struct brw_context *brw)
    brw->urb.vs_start = push_constant_chunks;
    brw->urb.gs_start = push_constant_chunks + vs_chunks;
 
-   gen7_emit_vs_workaround_flush(brw);
+   if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
+      gen7_emit_vs_workaround_flush(brw);
    gen7_emit_urb_state(brw,
                        brw->urb.nr_vs_entries, vs_size, brw->urb.vs_start,
                        brw->urb.nr_gs_entries, gs_size, brw->urb.gs_start);
@@ -298,8 +313,10 @@ gen7_emit_urb_state(struct brw_context *brw,
 const struct brw_tracked_state gen7_urb = {
    .dirty = {
       .mesa = 0,
-      .brw = BRW_NEW_CONTEXT | BRW_NEW_GEOMETRY_PROGRAM,
-      .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG),
+      .brw = BRW_NEW_CONTEXT |
+             BRW_NEW_GEOMETRY_PROGRAM |
+             BRW_NEW_GS_PROG_DATA |
+             BRW_NEW_VS_PROG_DATA,
    },
    .emit = gen7_upload_urb,
 };