From 6041a31e77680597614776e59edb12709ec2e019 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 17 Oct 2017 14:45:12 -0700 Subject: [PATCH] intel/eu: Fix broadcast instruction for 64-bit values on little-core We're not using broadcast for any 32-bit types right now since we mostly use it for emit_uniformize on 32-bit buffer indices. However, SPIR-V subgroups are going to need it for 64-bit so let's make it work. Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_eu_emit.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index a18cfa4239f..fae74cf80ab 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3430,8 +3430,30 @@ brw_broadcast(struct brw_codegen *p, brw_pop_insn_state(p); /* Use indirect addressing to fetch the specified component. */ - brw_MOV(p, dst, - retype(brw_vec1_indirect(addr.subnr, offset), src.type)); + if (type_sz(src.type) > 4 && + (devinfo->is_cherryview || gen_device_info_is_9lp(devinfo))) { + /* From the Cherryview PRM Vol 7. "Register Region Restrictions": + * + * "When source or destination datatype is 64b or operation is + * integer DWord multiply, indirect addressing must not be + * used." + * + * To work around both of this issue, we do two integer MOVs + * insead of one 64-bit MOV. Because no double value should ever + * cross a register boundary, it's safe to use the immediate + * offset in the indirect here to handle adding 4 bytes to the + * offset and avoid the extra ADD to the register file. + */ + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 0), + retype(brw_vec1_indirect(addr.subnr, offset), + BRW_REGISTER_TYPE_D)); + brw_MOV(p, subscript(dst, BRW_REGISTER_TYPE_D, 1), + retype(brw_vec1_indirect(addr.subnr, offset + 4), + BRW_REGISTER_TYPE_D)); + } else { + brw_MOV(p, dst, + retype(brw_vec1_indirect(addr.subnr, offset), src.type)); + } } else { /* In SIMD4x2 mode the index can be either zero or one, replicate it * to all bits of a flag register, -- 2.30.2