case SHADER_OPCODE_MEMORY_FENCE:
case SHADER_OPCODE_INTERLOCK:
- if (devinfo->gen >= 7)
- return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,
- 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0);
- else
+ switch (info.sfid) {
+ case GEN6_SFID_DATAPORT_RENDER_CACHE:
+ if (devinfo->gen >= 7)
+ return calculate_desc(info, unit_dp_rc, 2, 0, 0, 30 /* XXX */, 0,
+ 10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0);
+ else
+ abort();
+
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ case HSW_SFID_DATAPORT_DATA_CACHE_1:
+ if (devinfo->gen >= 7)
+ return calculate_desc(info, unit_dp_dc, 2, 0, 0, 30 /* XXX */, 0,
+ 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0);
+ else
+ abort();
+
+ default:
abort();
+ }
case SHADER_OPCODE_GEN4_SCRATCH_READ:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
* difference is the worst-case scenario branch_weight used for
* SIMD32 which accounts for the possibility of a dynamically
* uniform branch becoming divergent in SIMD32.
+ *
+ * Note that we provide slightly more pessimistic weights on
+ * Gen12+ for SIMD32, since the effective warp size on that
+ * platform is 2x the SIMD width due to EU fusion, which increases
+ * the likelihood of divergent control flow in comparison to
+ * previous generations, giving narrower SIMD modes a performance
+ * advantage in several test-cases with non-uniform discard jumps.
*/
const float branch_weight = (dispatch_width > 16 ? 1.0 : 0.5);
+ const float discard_weight = (dispatch_width > 16 || s->devinfo->gen < 12 ?
+ 1.0 : 0.5);
const float loop_weight = 10;
+ unsigned discard_count = 0;
unsigned elapsed = 0;
state st;
if (inst->opcode == BRW_OPCODE_ENDIF)
st.weight /= branch_weight;
+ else if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT && discard_count)
+ st.weight /= discard_weight;
elapsed += (st.unit_ready[unit_fe] - clock0) * st.weight;
st.weight *= loop_weight;
else if (inst->opcode == BRW_OPCODE_WHILE)
st.weight /= loop_weight;
+ else if (inst->opcode == FS_OPCODE_DISCARD_JUMP && !discard_count++)
+ st.weight *= discard_weight;
}
p.block_latency[block->num] = elapsed - elapsed0;