From 630b84cdc80594d912a64f64aa75ac498e6f1248 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 12 Jan 2017 18:05:58 -0800 Subject: [PATCH] i965: Use source region <1,2,0> when converting to DF. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Doing so allows us to use a single MOV in VEC4_OPCODE_TO_DOUBLE instead of two. Reviewed-by: Samuel Iglesias Gonsálvez --- src/intel/compiler/brw_eu_emit.c | 28 ++++++++++++++++++++++- src/intel/compiler/brw_vec4_generator.cpp | 13 +---------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 058742d4f6e..8637310a35a 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1089,7 +1089,6 @@ void brw_##OP(struct brw_codegen *p, \ } -ALU1(MOV) ALU2(SEL) ALU1(NOT) ALU2(AND) @@ -1123,6 +1122,33 @@ ALU2(SUBB) ROUND(RNDZ) ROUND(RNDE) +brw_inst * +brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0) +{ + const struct gen_device_info *devinfo = p->devinfo; + + /* When converting F->DF on IVB/BYT, every odd source channel is ignored. + * To avoid the problems that causes, we use a <1,2,0> source region to read + * each element twice. + */ + if (devinfo->gen == 7 && !devinfo->is_haswell && + brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 && + dest.type == BRW_REGISTER_TYPE_DF && + (src0.type == BRW_REGISTER_TYPE_F || + src0.type == BRW_REGISTER_TYPE_D || + src0.type == BRW_REGISTER_TYPE_UD) && + !has_scalar_region(src0)) { + assert(src0.vstride == BRW_VERTICAL_STRIDE_4 && + src0.width == BRW_WIDTH_4 && + src0.hstride == BRW_HORIZONTAL_STRIDE_1); + + src0.vstride = BRW_VERTICAL_STRIDE_1; + src0.width = BRW_WIDTH_2; + src0.hstride = BRW_HORIZONTAL_STRIDE_0; + } + + return brw_alu1(p, BRW_OPCODE_MOV, dest, src0); +} brw_inst * brw_ADD(struct brw_codegen *p, struct brw_reg dest, diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 2ac287f17fa..d3192ab7db3 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1958,18 +1958,7 @@ generate_code(struct brw_codegen *p, brw_set_default_access_mode(p, BRW_ALIGN_1); - struct brw_reg tmp = retype(dst, src[0].type); - tmp.hstride = BRW_HORIZONTAL_STRIDE_2; - tmp.width = BRW_WIDTH_4; - src[0].vstride = BRW_VERTICAL_STRIDE_4; - src[0].hstride = BRW_HORIZONTAL_STRIDE_1; - src[0].width = BRW_WIDTH_4; - brw_MOV(p, tmp, src[0]); - - tmp.vstride = BRW_VERTICAL_STRIDE_8; - tmp.hstride = BRW_HORIZONTAL_STRIDE_2; - tmp.width = BRW_WIDTH_4; - brw_MOV(p, dst, tmp); + brw_MOV(p, dst, src[0]); brw_set_default_access_mode(p, BRW_ALIGN_16); break; -- 2.30.2