From 49d4d0268bc03fbf2a0688563c5d89a7c9eb1e8e Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Fri, 1 Jul 2016 09:22:34 +0200 Subject: [PATCH] i965/vec4/tes: fix setup_payload() for 64bit data types Use a width of 2 with 64-bit attributes. Also, if we have a dvec3/4 attribute that gets split across two registers such that components XY are stored in the second half of a register and components ZW are stored in the first half of the next, we need to fix regioning for any instruction that reads components Z/W of the attribute. Notice this also means that we can't support sources that read cross-dvec2 swizzles (like XZ for example). v2: don't assert that we have a single channel swizzle in the case that we have to fix up Z/W access on the first half of the next register. We can handle any swizzle that does not cross dvec2 boundaries, which the double scalarization pass should have prevented anyway. Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index aa546fef530..bb81ad3a155 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -82,14 +82,33 @@ vec4_tes_visitor::setup_payload() if (inst->src[i].file != ATTR) continue; + bool is_64bit = type_sz(inst->src[i].type) == 8; + struct brw_reg grf = brw_vec4_grf(reg + inst->src[i].nr / 2, 4 * (inst->src[i].nr % 2)); - grf = stride(grf, 0, 4, 1); + grf = stride(grf, 0, is_64bit ? 2 : 4, 1); grf.swizzle = inst->src[i].swizzle; grf.type = inst->src[i].type; grf.abs = inst->src[i].abs; grf.negate = inst->src[i].negate; + /* For 64-bit attributes we can end up with components XY in the + * second half of a register and components ZW in the first half + * of the next. Fix it up here. + */ + if (is_64bit && grf.subnr > 0) { + /* We can't do swizzles that mix XY and ZW channels in this case. + * Such cases should have been handled by the scalarization pass. + */ + assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^ + (brw_mask_for_swizzle(grf.swizzle) & 0xc)); + if (brw_mask_for_swizzle(grf.swizzle) & 0xc) { + grf.subnr = 0; + grf.nr++; + grf.swizzle -= BRW_SWIZZLE_ZZZZ; + } + } + inst->src[i] = grf; } } -- 2.30.2