*/
void
gen_get_urb_config(const struct gen_device_info *devinfo,
- unsigned push_constant_bytes, unsigned urb_size_bytes,
+ const struct gen_l3_config *l3_cfg,
bool tess_present, bool gs_present,
const unsigned entry_size[4],
- unsigned entries[4], unsigned start[4])
+ unsigned entries[4], unsigned start[4],
+ enum gen_urb_deref_block_size *deref_block_size)
{
+ unsigned urb_size_kB = gen_get_l3_config_urb_size(devinfo, l3_cfg);
+
+ /* RCU_MODE register for Gen12+ in BSpec says:
+ *
+ * "HW reserves 4KB of URB space per bank for Compute Engine out of the
+ * total storage available in L3. SW must consider that 4KB of storage
+ * per bank will be reduced from what is programmed for the URB space
+ * in L3 for Render Engine executed workloads.
+ *
+ * Example: When URB space programmed is 64KB (per bank) for Render
+ * Engine, the actual URB space available for operation is only 60KB
+ * (per bank). Similarly when URB space programmed is 128KB (per bank)
+ * for render engine, the actual URB space available for operation is
+ * only 124KB (per bank). More detailed descripton available in "L3
+ * Cache" section of the B-Spec."
+ */
+ if (devinfo->gen >= 12)
+ urb_size_kB -= 4 * devinfo->l3_banks;
+
+ const unsigned push_constant_kB =
+ (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 32 : 16;
+
const bool active[4] = { true, tess_present, tess_present, gs_present };
/* URB allocations must be done in 8k chunks. */
- const unsigned chunk_size_bytes = 8192;
+ const unsigned chunk_size_kB = 8;
+ const unsigned chunk_size_bytes = chunk_size_kB * 1024;
- const unsigned push_constant_chunks =
- push_constant_bytes / chunk_size_bytes;
- const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes;
+ const unsigned push_constant_chunks = push_constant_kB / chunk_size_kB;
+ const unsigned urb_chunks = urb_size_kB / chunk_size_kB;
/* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
*
}
/* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
- start[0] = push_constant_chunks;
- for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
- start[i] = start[i - 1] + chunks[i - 1];
+ int next = push_constant_chunks;
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ if (entries[i]) {
+ start[i] = next;
+ next += chunks[i];
+ } else {
+ /* Just put disabled stages at the beginning. */
+ start[i] = 0;
+ }
+ }
+
+ if (deref_block_size) {
+ if (devinfo->gen >= 12) {
+ /* From the Gen12 BSpec:
+ *
+ * "Deref Block size depends on the last enabled shader and number
+ * of handles programmed for that shader
+ *
+ * 1) For GS last shader enabled cases, the deref block is
+ * always set to a per poly(within hardware)
+ *
+ * If the last enabled shader is VS or DS.
+ *
+ * 1) If DS is last enabled shader then if the number of DS
+ * handles is less than 324, need to set per poly deref.
+ *
+ * 2) If VS is last enabled shader then if the number of VS
+ * handles is less than 192, need to set per poly deref"
+ *
+ * The default is 32 so we assume that's the right choice if we're
+ * not in one of the explicit cases listed above.
+ */
+ if (gs_present) {
+ *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+ } else if (tess_present) {
+ if (entries[MESA_SHADER_TESS_EVAL] < 324)
+ *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+ else
+ *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32;
+ } else {
+ if (entries[MESA_SHADER_VERTEX] < 192)
+ *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_PER_POLY;
+ else
+ *deref_block_size = GEN_URB_DEREF_BLOCK_SIZE_32;
+ }
+ } else {
+ *deref_block_size = 0;
+ }
}
}