From: Anuj Phogat Date: Tue, 5 Jan 2016 16:41:39 +0000 (-0800) Subject: i965/cnl: Make URB {VS, GS, HS, DS} sizes non multiple of 3 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f9e31a26d4cf075e236e92aea63bb69eb9fad533;p=mesa.git i965/cnl: Make URB {VS, GS, HS, DS} sizes non multiple of 3 v1: By Ben Widawsky v2: v1 had an assert only for VS. Add the restriction for GS, HS and DS as well and make sure the allocated sizes are not multiple of 3. v3: Move the entry_size checks in to compiler code (Ken) Signed-off-by: Anuj Phogat Reviewed-by: Kenneth Graunke --- diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 269b8a099a4..53d0742d2e8 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -1197,6 +1197,14 @@ brw_compile_tes(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + prog_data->base.urb_read_length = 0; STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index a641ebfbba9..b443effca9a 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -2839,10 +2839,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const unsigned vue_entries = MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots); - if (compiler->devinfo->gen == 6) + if (compiler->devinfo->gen == 6) { prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); - else + } else { prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (compiler->devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + } if (INTEL_DEBUG & DEBUG_VS) { fprintf(stderr, "VS Output "); diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index d0236df0912..f763f482365 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -817,10 +817,17 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and * a multiple of 128 bytes in gen6. */ - if (compiler->devinfo->gen >= 7) + if (compiler->devinfo->gen >= 7) { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; - else + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (compiler->devinfo->gen == 10 && + prog_data->base.urb_entry_size % 3 == 0) + prog_data->base.urb_entry_size++; + } else { prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; + } assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim)); prog_data->output_topology = diff --git a/src/intel/compiler/brw_vec4_tcs.cpp b/src/intel/compiler/brw_vec4_tcs.cpp index 96597b8f2ad..c4d9f89a91b 100644 --- a/src/intel/compiler/brw_vec4_tcs.cpp +++ b/src/intel/compiler/brw_vec4_tcs.cpp @@ -441,6 +441,13 @@ brw_compile_tcs(const struct brw_compiler *compiler, /* URB entry sizes are stored as a multiple of 64 bytes. */ vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + /* On Cannonlake software shall not program an allocation size that + * specifies a size that is a multiple of 3 64B (512-bit) cachelines. + */ + if (devinfo->gen == 10 && + vue_prog_data->urb_entry_size % 3 == 0) + vue_prog_data->urb_entry_size++; + /* HS does not use the usual payload pushing from URB to GRFs, * because we don't have enough registers for a full-size payload, and * the hardware is broken on Haswell anyway. diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c index 028161df395..525c9c4d3da 100644 --- a/src/mesa/drivers/dri/i965/gen7_urb.c +++ b/src/mesa/drivers/dri/i965/gen7_urb.c @@ -224,6 +224,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size, BEGIN_BATCH(8); for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + assert(brw->gen != 10 || entry_size[i] % 3); OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2)); OUT_BATCH(entries[i] | ((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |