i965: Always use the pre-computed offset for the relocation entry
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 21 Jul 2017 15:36:45 +0000 (16:36 +0100)
committerKenneth Graunke <kenneth@whitecape.org>
Fri, 4 Aug 2017 17:26:37 +0000 (10:26 -0700)
We must be careful to only compute the address once based on the
per-context information (rather than accessing the unlocked global
bo->offset64) so that the value in the batch does match the
reloc.presumed_offset we declare to the kernel. Otherwise, highly
unlikely, but we may see GPU hangs in multithreaded users.

The only real complication here is isl_surf_fill_state() which needs to
adjust the reloc.delta to both general a tile offset and to encode state
into the lower 12 bits.

(Rebased on ISL changes by Ken.)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_wm_surface_state.c
src/mesa/drivers/dri/i965/genX_state_upload.c

index a0ca6ddf9855d3e08beddc65eb24a36f1b291bf9..5a3eccfe04288e05410721a9b78504ea6e6e6930 100644 (file)
@@ -154,13 +154,13 @@ brw_emit_surface_state(struct brw_context *brw,
    case ISL_AUX_USAGE_CCS_E:
       aux_surf = &mt->mcs_buf->surf;
       aux_bo = mt->mcs_buf->bo;
-      aux_offset = mt->mcs_buf->bo->offset64 + mt->mcs_buf->offset;
+      aux_offset = mt->mcs_buf->offset;
       break;
 
    case ISL_AUX_USAGE_HIZ:
       aux_surf = &mt->hiz_buf->surf;
       aux_bo = mt->hiz_buf->bo;
-      aux_offset = mt->hiz_buf->bo->offset64;
+      aux_offset = 0;
       break;
 
    case ISL_AUX_USAGE_NONE:
@@ -180,28 +180,29 @@ brw_emit_surface_state(struct brw_context *brw,
                                  surf_offset);
 
    isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
-                       .address = mt->bo->offset64 + offset,
+                       .address = brw_emit_reloc(&brw->batch,
+                                                 *surf_offset + brw->isl_dev.ss.addr_offset,
+                                                 mt->bo, offset, read_domains, write_domains),
                        .aux_surf = aux_surf, .aux_usage = aux_usage,
                        .aux_address = aux_offset,
                        .mocs = mocs, .clear_color = clear_color,
                        .x_offset_sa = tile_x, .y_offset_sa = tile_y);
-
-   brw_emit_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset,
-                  mt->bo, offset, read_domains, write_domains);
-
    if (aux_surf) {
       /* On gen7 and prior, the upper 20 bits of surface state DWORD 6 are the
        * upper 20 bits of the GPU address of the MCS buffer; the lower 12 bits
        * contain other control information.  Since buffer addresses are always
        * on 4k boundaries (and thus have their lower 12 bits zero), we can use
        * an ordinary reloc to do the necessary address translation.
+       *
+       * FIXME: move to the point of assignment.
        */
       assert((aux_offset & 0xfff) == 0);
       uint32_t *aux_addr = state + brw->isl_dev.ss.aux_addr_offset;
-      brw_emit_reloc(&brw->batch,
-                     *surf_offset + brw->isl_dev.ss.aux_addr_offset,
-                     aux_bo, *aux_addr - aux_bo->offset64,
-                     read_domains, write_domains);
+      *aux_addr = brw_emit_reloc(&brw->batch,
+                                 *surf_offset +
+                                 brw->isl_dev.ss.aux_addr_offset,
+                                 aux_bo, *aux_addr,
+                                 read_domains, write_domains);
    }
 }
 
@@ -611,18 +612,16 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
                                   out_offset);
 
    isl_buffer_fill_state(&brw->isl_dev, dw,
-                         .address = (bo ? bo->offset64 : 0) + buffer_offset,
+                         .address = !bo ? buffer_offset :
+                                    brw_emit_reloc(&brw->batch,
+                                                   *out_offset + brw->isl_dev.ss.addr_offset,
+                                                   bo, buffer_offset,
+                                                   I915_GEM_DOMAIN_SAMPLER,
+                                                   (rw ? I915_GEM_DOMAIN_SAMPLER : 0)),
                          .size = buffer_size,
                          .format = surface_format,
                          .stride = pitch,
                          .mocs = tex_mocs[brw->gen]);
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset,
-                     bo, buffer_offset,
-                     I915_GEM_DOMAIN_SAMPLER,
-                     (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
-   }
 }
 
 void
@@ -785,17 +784,15 @@ brw_update_sol_surface(struct brw_context *brw,
       BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
       surface_format << BRW_SURFACE_FORMAT_SHIFT |
       BRW_SURFACE_RC_READ_WRITE;
-   surf[1] = bo->offset64 + offset_bytes; /* reloc */
+   surf[1] = brw_emit_reloc(&brw->batch,
+                            *out_offset + 4, bo, offset_bytes,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
              height << BRW_SURFACE_HEIGHT_SHIFT);
    surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = 0;
    surf[5] = 0;
-
-   /* Emit relocation to surface contents. */
-   brw_emit_reloc(&brw->batch, *out_offset + 4, bo, offset_bytes,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 }
 
 /* Creates a new WM constant buffer reflecting the current fragment program's
@@ -903,7 +900,9 @@ brw_emit_null_surface_state(struct brw_context *brw,
                  1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
                  1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
    }
-   surf[1] = bo ? bo->offset64 : 0;
+   surf[1] = !bo ? 0 :
+             brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
    surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
               (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
 
@@ -916,11 +915,6 @@ brw_emit_null_surface_state(struct brw_context *brw,
               pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
    surf[4] = multisampling_state;
    surf[5] = 0;
-
-   if (bo) {
-      brw_emit_reloc(&brw->batch, *out_offset + 4, bo, 0,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
 }
 
 /**
@@ -977,8 +971,12 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
 
    /* reloc */
    assert(mt->offset % mt->cpp == 0);
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
-             mt->bo->offset64 + mt->offset);
+   surf[1] = brw_emit_reloc(&brw->batch, offset + 4, mt->bo,
+                            mt->offset +
+                            intel_renderbuffer_get_tile_offsets(irb,
+                                                                &tile_x,
+                                                                &tile_y),
+                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
 
    surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
              (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
@@ -1021,9 +1019,6 @@ gen4_update_renderbuffer_surface(struct brw_context *brw,
       }
    }
 
-   brw_emit_reloc(&brw->batch, offset + 4, mt->bo, surf[1] - mt->bo->offset64,
-                  I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-
    return offset;
 }
 
index dacccf78f57a463948dc62fe13fd952133e24202..4f4dd6f993bcfcff10c3f18cc4bed7083c4ef41f 100644 (file)
@@ -5053,14 +5053,13 @@ genX(update_sampler_state)(struct brw_context *brw,
                                  texObj->StencilSampling,
                                  &border_color_offset);
    }
-
-   samp_st.BorderColorPointer = border_color_offset;
-
    if (GEN_GEN < 6) {
-      samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
-      brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
-                     brw->batch.bo, border_color_offset,
-                     I915_GEM_DOMAIN_SAMPLER, 0);
+      samp_st.BorderColorPointer =
+         brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
+                        brw->batch.bo, border_color_offset,
+                        I915_GEM_DOMAIN_SAMPLER, 0);
+   } else {
+      samp_st.BorderColorPointer = border_color_offset;
    }
 
 #if GEN_GEN >= 8