gen6_3DSTATE_WM(struct ilo_builder *builder,
const struct ilo_state_raster *rs,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[6] = rs->wm[2] | ps->ps[4];
dw[7] = 0; /* kernel 1 */
dw[8] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 2, scratch_bo,
+ ps->ps[0], 0);
+ }
}
static inline void
static inline void
gen7_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 8;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[5] = ps->ps[5];
dw[6] = 0; /* kernel 1 */
dw[7] = 0; /* kernel 2 */
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ps->ps[3], 0);
+ }
}
static inline void
gen8_3DSTATE_PS(struct ilo_builder *builder,
const struct ilo_state_ps *ps,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 12;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[9] = 0;
dw[10] = 0; /* kernel 2 */
dw[11] = 0;
+
+ if (ilo_state_ps_get_scratch_size(ps)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ps->ps[1], 0);
+ }
}
static inline void
static inline void
gen6_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[3] = vs->vs[1];
dw[4] = vs->vs[2];
dw[5] = vs->vs[3];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_VS(struct ilo_builder *builder,
const struct ilo_state_vs *vs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[6] = vs->vs[2];
dw[7] = vs->vs[3];
dw[8] = vs->vs[4];
+
+ if (ilo_state_vs_get_scratch_size(vs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ vs->vs[1], 0);
+ }
}
static inline void
gen7_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
dw[4] = hs->hs[2];
dw[5] = hs->hs[3];
dw[6] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc(builder, pos + 4, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
gen8_3DSTATE_HS(struct ilo_builder *builder,
const struct ilo_state_hs *hs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
/* see hs_set_gen7_3DSTATE_HS() */
dw[6] = 0;
dw[7] = hs->hs[3];
dw[8] = 0;
+
+ if (ilo_state_hs_get_scratch_size(hs)) {
+ ilo_builder_batch_reloc64(builder, pos + 5, scratch_bo,
+ hs->hs[2], 0);
+ }
}
static inline void
static inline void
gen7_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 6;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
dw[3] = ds->ds[1];
dw[4] = ds->ds[2];
dw[5] = ds->ds[3];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen8_3DSTATE_DS(struct ilo_builder *builder,
const struct ilo_state_ds *ds,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 9;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
/* see ds_set_gen7_3DSTATE_DS() */
dw[6] = ds->ds[2];
dw[7] = ds->ds[3];
dw[8] = ds->ds[4];
+
+ if (ilo_state_ds_get_scratch_size(ds)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ ds->ds[1], 0);
+ }
}
static inline void
gen6_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 6, 6);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
static inline void
gen7_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 7;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 7, 7.5);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[4] = gs->gs[2];
dw[5] = gs->gs[3];
dw[6] = 0;
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc(builder, pos + 3, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
gen8_3DSTATE_GS(struct ilo_builder *builder,
const struct ilo_state_gs *gs,
- uint32_t kernel_offset)
+ uint32_t kernel_offset,
+ struct intel_bo *scratch_bo)
{
const uint8_t cmd_len = 10;
uint32_t *dw;
+ unsigned pos;
ILO_DEV_ASSERT(builder->dev, 8, 8);
- ilo_builder_batch_pointer(builder, cmd_len, &dw);
+ pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
dw[1] = kernel_offset;
dw[7] = gs->gs[3];
dw[8] = 0;
dw[9] = gs->gs[4];
+
+ if (ilo_state_gs_get_scratch_size(gs)) {
+ ilo_builder_batch_reloc64(builder, pos + 4, scratch_bo,
+ gs->gs[1], 0);
+ }
}
static inline void
*/
assert(per_thread_read <= 63);
- /* From the Haswell PRM, volume 2d, page 199:
+ /*
+ * From the Haswell PRM, volume 2d, page 199:
*
* "(Cross-Thread Constant Data Read Length) [0,127]"
*/
return true;
}
-static uint8_t
-compute_get_gen6_scratch_space(const struct ilo_dev *dev,
- const struct ilo_state_compute_info *info)
+static uint32_t
+compute_get_gen6_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
{
- uint32_t scratch_size = 0;
- uint8_t i;
+ ILO_DEV_ASSERT(dev, 6, 7);
- ILO_DEV_ASSERT(dev, 6, 8);
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 2, page 30:
+ *
+ * "(Per Thread Scratch Space)
+ * Range = [0,11] indicating [1k bytes, 12k bytes] [DevSNB]"
+ */
+ assert(info->per_thread_scratch_size <= 12 * 1024);
- for (i = 0; i < info->interface_count; i++) {
- if (scratch_size < info->interfaces[i].scratch_size)
- scratch_size = info->interfaces[i].scratch_size;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (info->per_thread_scratch_size - 1) / 1024 : 0;
+
+ return 1024 * (1 + *per_thread_space);
+}
- /* next power of two, starting from 1KB */
- return (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- assert(scratch_size <= 2 * 1024 * 1024);
+static uint32_t
+compute_get_gen75_per_thread_scratch_size(const struct ilo_dev *dev,
+ const struct ilo_state_compute_info *info,
+ uint8_t *per_thread_space)
+{
+ ILO_DEV_ASSERT(dev, 7.5, 8);
- /* next power of two, starting from 2KB */
- return (scratch_size > 2048) ?
- (util_last_bit(scratch_size - 1) - 11): 0;
- } else {
- assert(scratch_size <= 12 * 1024);
+ /*
+ * From the Haswell PRM, volume 2b, page 407:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,10] Indicating [2k bytes, 2 Mbytes]"
+ *
+ * "Note: The scratch space should be declared as 2x the desired
+ * scratch space. The stack will start at the half-way point instead
+ * of the end. The upper half of scratch space will not be accessed
+ * and so does not have to be allocated in memory."
+ *
+ * From the Broadwell PRM, volume 2a, page 450:
+ *
+ * "(Per Thread Scratch Space)
+ * [0,11] indicating [1k bytes, 2 Mbytes]"
+ */
+ assert(info->per_thread_scratch_size <=
+ ((ilo_dev_gen(dev) >= ILO_GEN(8)) ? 2 : 1) * 1024 * 1024);
- return (scratch_size > 1024) ?
- (scratch_size - 1) / 1024 : 0;
+ if (!info->per_thread_scratch_size) {
+ *per_thread_space = 0;
+ return 0;
}
+
+ /* next power of two, starting from 1KB */
+ *per_thread_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+
+ return 1 << (10 + *per_thread_space);
}
static bool
const struct ilo_state_compute_info *info)
{
struct compute_urb_configuration urb;
- uint8_t scratch_space;
+ uint32_t per_thread_size;
+ uint8_t per_thread_space;
uint32_t dw1, dw2, dw4;
!compute_validate_gen6(dev, info, &urb))
return false;
- scratch_space = compute_get_gen6_scratch_space(dev, info);
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ per_thread_size = compute_get_gen75_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ } else {
+ per_thread_size = compute_get_gen6_per_thread_scratch_size(dev,
+ info, &per_thread_space);
+ }
+
+ dw1 = per_thread_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
- dw1 = scratch_space << GEN6_VFE_DW1_SCRATCH_SPACE_PER_THREAD__SHIFT;
dw2 = (dev->thread_count - 1) << GEN6_VFE_DW2_MAX_THREADS__SHIFT |
urb.urb_entry_count << GEN6_VFE_DW2_URB_ENTRY_COUNT__SHIFT |
GEN6_VFE_DW2_RESET_GATEWAY_TIMER |
compute->vfe[1] = dw2;
compute->vfe[2] = dw4;
+ compute->scratch_size = per_thread_size * dev->thread_count;
+
return true;
}
/* usually 0 unless there are multiple interfaces */
uint32_t kernel_offset;
- uint32_t scratch_size;
-
uint8_t sampler_count;
uint8_t surface_count;
const struct ilo_state_compute_interface_info *interfaces;
uint8_t interface_count;
+ uint32_t per_thread_scratch_size;
+
uint32_t cv_urb_alloc_size;
uint32_t curbe_alloc_size;
};
uint32_t (*idrt)[6];
uint8_t idrt_count;
+
+ uint32_t scratch_size;
};
static inline size_t
const struct ilo_dev *dev,
const struct ilo_state_compute_info *info);
+static inline uint32_t
+ilo_state_compute_get_scratch_size(const struct ilo_state_compute *compute)
+{
+ return compute->scratch_size;
+}
+
#endif /* ILO_STATE_COMPUTE_H */
struct vertex_ff {
uint8_t grf_start;
- uint8_t scratch_space;
+
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
* others.
*/
const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 134:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1K Bytes, 2M Bytes]"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
assert(!kernel->offset);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
const struct ilo_state_shader_kernel_info *kernel,
const struct ilo_state_shader_resource_info *resource,
const struct ilo_state_shader_urb_info *urb,
+ uint32_t per_thread_scratch_size,
struct vertex_ff *ff)
{
ILO_DEV_ASSERT(dev, 6, 8);
+ memset(ff, 0, sizeof(*ff));
+
if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
!vertex_validate_gen6_urb(dev, stage, urb))
return false;
ff->grf_start = kernel->grf_start;
- /* next power of two, starting from 1KB */
- ff->scratch_space = (kernel->scratch_size > 1024) ?
- (util_last_bit(kernel->scratch_size - 1) - 10): 0;
+
+ if (per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 134:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1K Bytes, 2M Bytes]"
+ */
+ assert(per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
+ (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = vs_get_gen6_thread_count(dev, info);
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
if (ilo_dev_gen(dev) >= ILO_GEN(8))
vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ vs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = hs_get_gen7_thread_count(dev, info);
if (info->stats_enable)
dw2 |= GEN7_HS_DW2_STATISTICS;
- dw4 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
hs->hs[2] = dw4;
hs->hs[3] = dw5;
+ hs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
ILO_DEV_ASSERT(dev, 7, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel,
- &info->resource, &info->urb, &ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, &ff))
return false;
thread_count = ds_get_gen7_thread_count(dev, info);
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
if (ilo_dev_gen(dev) >= ILO_GEN(8))
ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
+ ds->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
ILO_DEV_ASSERT(dev, 6, 8);
- if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel,
- &info->resource, &info->urb, ff))
+ if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
+ &info->urb, info->per_thread_scratch_size, ff))
return false;
/*
ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
gs->gs[3] = dw5;
gs->gs[4] = dw6;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
dw2 |= GEN75_THREADDISP_ACCESS_UAV;
- dw3 = ff.scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff.per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
if (ilo_dev_gen(dev) >= ILO_GEN(8))
gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
+ gs->scratch_size = ff.per_thread_scratch_size * thread_count;
+
return true;
}
uint8_t grf_start;
uint8_t pcb_attr_count;
-
- uint32_t scratch_size;
};
/**
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
struct ilo_state_shader_resource_info resource;
struct ilo_state_shader_urb_info urb;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
struct ilo_state_gs_sol_info sol;
+ uint32_t per_thread_scratch_size;
bool dispatch_enable;
bool stats_enable;
};
struct ilo_state_ps_io_info io;
struct ilo_state_ps_params_info params;
+ uint32_t per_thread_scratch_size;
+
/* bitmask of GEN6_PS_DISPATCH_x */
uint8_t valid_kernels;
bool per_sample_dispatch;
struct ilo_state_vs {
uint32_t vs[5];
+ uint32_t scratch_size;
};
struct ilo_state_hs {
uint32_t hs[4];
+ uint32_t scratch_size;
};
struct ilo_state_ds {
uint32_t te[3];
uint32_t ds[5];
+ uint32_t scratch_size;
};
struct ilo_state_gs {
uint32_t gs[5];
+ uint32_t scratch_size;
};
struct ilo_state_ps {
uint32_t ps[8];
+ uint32_t scratch_size;
struct ilo_state_ps_dispatch_conds {
bool ps_valid;
ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_vs_get_scratch_size(const struct ilo_state_vs *vs)
+{
+ return vs->scratch_size;
+}
+
bool
ilo_state_hs_init(struct ilo_state_hs *hs,
const struct ilo_dev *dev,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_hs_get_scratch_size(const struct ilo_state_hs *hs)
+{
+ return hs->scratch_size;
+}
+
bool
ilo_state_ds_init(struct ilo_state_ds *ds,
const struct ilo_dev *dev,
ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_ds_get_scratch_size(const struct ilo_state_ds *ds)
+{
+ return ds->scratch_size;
+}
+
bool
ilo_state_gs_init(struct ilo_state_gs *gs,
const struct ilo_dev *dev,
ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
const struct ilo_dev *dev);
+static inline uint32_t
+ilo_state_gs_get_scratch_size(const struct ilo_state_gs *gs)
+{
+ return gs->scratch_size;
+}
+
bool
ilo_state_ps_init(struct ilo_state_ps *ps,
const struct ilo_dev *dev,
const struct ilo_dev *dev,
const struct ilo_state_ps_params_info *params);
+static inline uint32_t
+ilo_state_ps_get_scratch_size(const struct ilo_state_ps *ps)
+{
+ return ps->scratch_size;
+}
+
#endif /* ILO_STATE_SHADER_H */
uint32_t kernel_offsets[3];
uint8_t grf_starts[3];
bool pcb_enable;
- uint8_t scratch_space;
+ uint8_t per_thread_scratch_space;
+ uint32_t per_thread_scratch_size;
uint8_t sampler_count;
uint8_t surface_count;
{
/* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
const uint8_t max_grf_start = 128;
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 271:
- *
- * "(Per-Thread Scratch Space)
- * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
- */
- const uint32_t max_scratch_size = 2 * 1024 * 1024;
ILO_DEV_ASSERT(dev, 6, 8);
assert(kernel->offset % 64 == 0);
assert(kernel->grf_start < max_grf_start);
- assert(kernel->scratch_size <= max_scratch_size);
return true;
}
const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
- uint32_t scratch_size;
ILO_DEV_ASSERT(dev, 6, 8);
((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
kernel_32->pcb_attr_count));
- scratch_size = 0;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
- scratch_size < kernel_8->scratch_size)
- scratch_size = kernel_8->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
- scratch_size < kernel_16->scratch_size)
- scratch_size = kernel_16->scratch_size;
- if ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
- scratch_size < kernel_32->scratch_size)
- scratch_size = kernel_32->scratch_size;
-
- /* next power of two, starting from 1KB */
- ff->scratch_space = (scratch_size > 1024) ?
- (util_last_bit(scratch_size - 1) - 10): 0;
-
/* GPU hangs on Haswell if none of the dispatch mode bits is set */
if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
return false;
+ if (info->per_thread_scratch_size) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 271:
+ *
+ * "(Per-Thread Scratch Space)
+ * Range [0,11] indicating [1k bytes, 2M bytes] in powers of two"
+ */
+ assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
+
+ /* next power of two, starting from 1KB */
+ ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
+ (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
+ ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
+ }
+
ff->sampler_count = (resource->sampler_count <= 12) ?
(resource->sampler_count + 3) / 4 : 4;
ff->surface_count = resource->surface_count;
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
if (false)
dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw3 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw3 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
if (false)
dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
- dw4 = ff->scratch_space << GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
+ dw4 = ff->per_thread_scratch_space <<
+ GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
}
+ ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
/* save conditions */
ps->conds = ff.conds;
if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO))
- gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs_sol.vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
cso = ilo_shader_get_kernel_cso(vec->gs);
kernel_offset = ilo_shader_get_kernel_offset(vec->gs);
- gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->gs, kernel_offset, NULL);
} else if (ilo_dev_gen(r->dev) == ILO_GEN(6) &&
ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
const int verts_per_prim =
kernel_offset = ilo_shader_get_kernel_offset(vec->vs) +
ilo_shader_get_kernel_param(vec->vs, param);
- gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset);
+ gen6_3DSTATE_GS(r->builder, &cso->vs_sol.sol, kernel_offset, NULL);
} else {
- gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0);
+ gen6_3DSTATE_GS(r->builder, &vec->disabled_gs, 0, NULL);
}
}
}
gen6_wa_pre_3dstate_wm_max_threads(r);
gen6_3DSTATE_WM(r->builder, &vec->rasterizer->rs,
- &cso->ps, kernel_offset);
+ &cso->ps, kernel_offset, NULL);
}
}
gen6_wa_post_3dstate_constant_vs(r);
gen6_wa_pre_3dstate_vs_toggle(r);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen6_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs);
gen6_3DSTATE_SF(r->builder, &blitter->fb.rs, &blitter->sbe);
gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen6_wa_pre_3dstate_wm_max_threads(r);
- gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0);
+ gen6_3DSTATE_WM(r->builder, &blitter->fb.rs, &blitter->ps, 0, NULL);
}
static void
const uint32_t kernel_offset = ilo_shader_get_kernel_offset(vec->vs);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen8_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
else
- gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset);
+ gen6_3DSTATE_VS(r->builder, &cso->vs, kernel_offset, NULL);
}
}
gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen8_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
else
- gen7_3DSTATE_HS(r->builder, hs, kernel_offset);
+ gen7_3DSTATE_HS(r->builder, hs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_HS */
gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen8_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
else
- gen7_3DSTATE_DS(r->builder, ds, kernel_offset);
+ gen7_3DSTATE_DS(r->builder, ds, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_DS */
gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
- gen8_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen8_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
else
- gen7_3DSTATE_GS(r->builder, gs, kernel_offset);
+ gen7_3DSTATE_GS(r->builder, gs, kernel_offset, NULL);
}
/* 3DSTATE_BINDING_TABLE_POINTERS_GS */
if (r->hw_ctx_changed)
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen7_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
}
/* 3DSTATE_SCISSOR_STATE_POINTERS */
const struct ilo_blitter *blitter)
{
gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
- gen6_3DSTATE_VS(r->builder, &blitter->vs, 0);
+ gen6_3DSTATE_VS(r->builder, &blitter->vs, 0, NULL);
gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_HS(r->builder, &blitter->hs, 0);
+ gen7_3DSTATE_HS(r->builder, &blitter->hs, 0, NULL);
gen7_3DSTATE_TE(r->builder, &blitter->ds);
gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_DS(r->builder, &blitter->ds, 0);
+ gen7_3DSTATE_DS(r->builder, &blitter->ds, 0, NULL);
gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
- gen7_3DSTATE_GS(r->builder, &blitter->gs, 0);
+ gen7_3DSTATE_GS(r->builder, &blitter->gs, 0, NULL);
gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol);
gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
gen7_wa_pre_3dstate_ps_max_threads(r);
- gen7_3DSTATE_PS(r->builder, &blitter->ps, 0);
+ gen7_3DSTATE_PS(r->builder, &blitter->ps, 0, NULL);
}
static void
/* 3DSTATE_PS */
if (DIRTY(FS) || r->instruction_bo_changed)
- gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset);
+ gen8_3DSTATE_PS(r->builder, &cso->ps, kernel_offset, NULL);
/* 3DSTATE_PS_EXTRA */
if (DIRTY(FS))
kern->grf_start = kernel->in.start_grf;
kern->pcb_attr_count =
(kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
- kern->scratch_size = 0;
}
static void
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
init_shader_urb(kernel, state, &info.urb);
init_shader_kernel(kernel, state, &info.kernel);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.dispatch_enable = true;
info.stats_enable = true;
init_shader_kernel(kernel, state, &info.kernel_8);
init_shader_resource(kernel, state, &info.resource);
+ info.per_thread_scratch_size = 0;
info.io.has_rt_write = true;
info.io.posoffset = GEN6_POSOFFSET_NONE;
info.io.attr_count = kernel->in.count;