X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcommon%2Fgen_urb_config.c;h=0b819f4c937190846c791f412758a8e5fefd4b2b;hb=9458b017a946778ef5d065bfd61c47dafdfe3e94;hp=c925a0dbb3647bd65b50c890bf10f51c38d3070c;hpb=9ef2b9277d3bead6dbfa47e95794ca61e8be4e84;p=mesa.git diff --git a/src/intel/common/gen_urb_config.c b/src/intel/common/gen_urb_config.c index c925a0dbb36..0b819f4c937 100644 --- a/src/intel/common/gen_urb_config.c +++ b/src/intel/common/gen_urb_config.c @@ -26,6 +26,7 @@ #include "util/macros.h" #include "main/macros.h" +#include "compiler/shader_enums.h" #include "gen_l3_config.h" @@ -58,19 +59,42 @@ */ void gen_get_urb_config(const struct gen_device_info *devinfo, - unsigned push_constant_bytes, unsigned urb_size_bytes, + const struct gen_l3_config *l3_cfg, bool tess_present, bool gs_present, const unsigned entry_size[4], - unsigned entries[4], unsigned start[4]) + unsigned entries[4], unsigned start[4], + enum gen_urb_deref_block_size *deref_block_size) { + unsigned urb_size_kB = gen_get_l3_config_urb_size(devinfo, l3_cfg); + + /* RCU_MODE register for Gen12+ in BSpec says: + * + * "HW reserves 4KB of URB space per bank for Compute Engine out of the + * total storage available in L3. SW must consider that 4KB of storage + * per bank will be reduced from what is programmed for the URB space + * in L3 for Render Engine executed workloads. + * + * Example: When URB space programmed is 64KB (per bank) for Render + * Engine, the actual URB space available for operation is only 60KB + * (per bank). Similarly when URB space programmed is 128KB (per bank) + * for render engine, the actual URB space available for operation is + * only 124KB (per bank). More detailed descripton available in "L3 + * Cache" section of the B-Spec." + */ + if (devinfo->gen >= 12) + urb_size_kB -= 4 * devinfo->l3_banks; + + const unsigned push_constant_kB = + (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 32 : 16; + const bool active[4] = { true, tess_present, tess_present, gs_present }; /* URB allocations must be done in 8k chunks. */ - const unsigned chunk_size_bytes = 8192; + const unsigned chunk_size_kB = 8; + const unsigned chunk_size_bytes = chunk_size_kB * 1024; - const unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes; + const unsigned push_constant_chunks = push_constant_kB / chunk_size_kB; + const unsigned urb_chunks = urb_size_kB / chunk_size_kB; /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): * @@ -160,7 +184,7 @@ gen_get_urb_config(const struct gen_device_info *devinfo, roundf(wants[i] * (((float) remaining_space) / total_wants)); chunks[i] += additional; remaining_space -= additional; - total_wants -= additional; + total_wants -= wants[i]; } chunks[MESA_SHADER_GEOMETRY] += remaining_space; @@ -194,8 +218,53 @@ gen_get_urb_config(const struct gen_device_info *devinfo, } /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */ - start[0] = push_constant_chunks; - for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) { - start[i] = start[i - 1] + chunks[i - 1]; + int next = push_constant_chunks; + for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { + if (entries[i]) { + start[i] = next; + next += chunks[i]; + } else { + /* Just put disabled stages at the beginning. */ + start[i] = 0; + } + } + + if (deref_block_size) { + if (devinfo->gen >= 12) { + /* From the Gen12 BSpec: + * + * "Deref Block size depends on the last enabled shader and number + * of handles programmed for that shader + * + * 1) For GS last shader enabled cases, the deref block is + * always set to a per poly(within hardware) + * + * If the last enabled shader is VS or DS. + * + * 1) If DS is last enabled shader then if the number of DS + * handles is less than 324, need to set per poly deref. + * + * 2) If VS is last enabled shader then if the number of VS + * handles is less than 192, need to set per poly deref" + * + * The default is 32 so we assume that's the right choice if we're + * not in one of the explicit cases listed above. + */ + if (gs_present) { + *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY; + } else if (tess_present) { + if (entries[MESA_SHADER_TESS_EVAL] < 324) + *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY; + else + *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32; + } else { + if (entries[MESA_SHADER_VERTEX] < 192) + *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY; + else + *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32; + } + } else { + *deref_block_size = 0; + } } }