/**
* Optional scratch buffer used to store spilled register values and
* variably-indexed GRF arrays.
+ *
+ * The contents of this buffer are short-lived so the same memory can be
+ * re-used at will for multiple shader programs (executed by the same fixed
+ * function). However reusing a scratch BO for which shader invocations
+ * are still in flight with a per-thread scratch slot size other than the
+ * original can cause threads with different scratch slot size and FFTID
+ * (which may be executed in parallel depending on the shader stage and
+ * hardware generation) to map to an overlapping region of the scratch
+ * space, which can potentially lead to mutual scratch space corruption.
+ * For that reason if you borrow this scratch buffer you should only be
+ * using the slot size given by the \c per_thread_scratch member below,
+ * unless you're taking additional measures to synchronize thread execution
+ * across slot size changes.
*/
drm_intel_bo *scratch_bo;
vs->thread2.scratch_space_base_pointer =
stage_state->scratch_bo->offset64 >> 10; /* reloc */
vs->thread2.per_thread_scratch_space =
- ffs(brw->vs.prog_data->base.base.total_scratch) - 11;
+ ffs(stage_state->per_thread_scratch) - 11;
} else {
vs->thread2.scratch_space_base_pointer = 0;
vs->thread2.per_thread_scratch_space = 0;
wm->thread2.scratch_space_base_pointer =
brw->wm.base.scratch_bo->offset64 >> 10; /* reloc */
wm->thread2.per_thread_scratch_space =
- ffs(prog_data->base.total_scratch) - 11;
+ ffs(brw->wm.base.per_thread_scratch) - 11;
} else {
wm->thread2.scratch_space_base_pointer = 0;
wm->thread2.per_thread_scratch_space = 0;
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0); /* no scratch space */
}
if (brw->vs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->vs.prog_data->base.base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
*/
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else if (brw->is_haswell) {
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
*/
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->total_scratch) - 12);
+ ffs(stage_state->per_thread_scratch) - 12);
} else {
/* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
*/
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- prog_data->total_scratch / 1024 - 1);
+ stage_state->per_thread_scratch / 1024 - 1);
}
} else {
OUT_BATCH(0);
if (prog_data->total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->gs.prog_data->base.base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (prog_data->base.total_scratch) {
OUT_RELOC(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (prog_data->base.total_scratch) {
OUT_RELOC(brw->wm.base.scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
}
if (prog_data->total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
if (brw->gs.prog_data->base.base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->gs.prog_data->base.base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
OUT_BATCH(0);
if (prog_data->base.total_scratch) {
OUT_RELOC64(stage_state->scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(prog_data->base.total_scratch) - 11);
+ ffs(stage_state->per_thread_scratch) - 11);
} else {
OUT_BATCH(0);
OUT_BATCH(0);