From c301f447ea8449804208e414f189c0571e4339a8 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 7 Dec 2018 14:26:23 -0800 Subject: [PATCH] intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass. Currently the visitor attempts to enforce the regioning restrictions that apply to double-precision instructions on CHV/BXT at NIR-to-i965 translation time. It is possible though for the copy propagation pass to violate this restriction if a strided move is propagated into one of the affected instructions. I've only reproduced this issue on a future platform but it could affect CHV/BXT too under the right conditions. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Iago Toral Quiroga --- .../compiler/brw_fs_copy_propagation.cpp | 10 +++++++ src/intel/compiler/brw_ir_fs.h | 28 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index a8ec1c34630..c23ce1ef426 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -315,6 +315,16 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride, if (stride > 4) return false; + /* Bail if the channels of the source need to be aligned to the byte offset + * of the corresponding channel of the destination, and the provided stride + * would break this restriction. + */ + if (has_dst_aligned_region_restriction(devinfo, inst) && + !(type_sz(inst->src[arg].type) * stride == + type_sz(inst->dst.type) * inst->dst.stride || + stride == 0)) + return false; + /* 3-source instructions can only be Align16, which restricts what strides * they can take. They can only take a stride of 1 (the usual case), or 0 * with a special "repctrl" bit. But the repctrl bit doesn't work for diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 07e7224e0f8..95b069a2e02 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -486,4 +486,32 @@ get_exec_type_size(const fs_inst *inst) return type_sz(get_exec_type(inst)); } +/** + * Return whether the following regioning restriction applies to the specified + * instruction. From the Cherryview PRM Vol 7. "Register Region + * Restrictions": + * + * "When source or destination datatype is 64b or operation is integer DWord + * multiply, regioning in Align1 must follow these rules: + * + * 1. Source and Destination horizontal stride must be aligned to the same qword. + * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. + * 3. Source and Destination offset must be the same, except the case of + * scalar source." + */ +static inline bool +has_dst_aligned_region_restriction(const gen_device_info *devinfo, + const fs_inst *inst) +{ + const brw_reg_type exec_type = get_exec_type(inst); + const bool is_int_multiply = !brw_reg_type_is_floating_point(exec_type) && + (inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD); + + if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 || + (type_sz(exec_type) == 4 && is_int_multiply)) + return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo); + else + return false; +} + #endif -- 2.30.2