From c1107cec44ab030c7fcc97c67baa12df1cc9d7b5 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 27 May 2016 23:29:14 -0700 Subject: [PATCH] i965/fs: Allow scalar source regions on SNB math instructions. I haven't found any evidence that this isn't supported by the hardware, in fact according to the SNB hardware spec: "The supported regioning modes for math instructions are align16, align1 with the following restrictions: - Scalar source is supported. [...] - Source and destination offset must be the same, except the case of scalar source." Cc: "12.0" Reviewed-by: Jason Ekstrand Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 6 ++++-- src/mesa/drivers/dri/i965/brw_fs_builder.h | 10 ++-------- src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 9 ++------- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 1c2ccb4e163..2538f0dab85 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2000,8 +2000,10 @@ void gen6_math(struct brw_codegen *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); if (devinfo->gen == 6) { - assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); - assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(has_scalar_region(src0) || + src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(has_scalar_region(src1) || + src1.hstride == BRW_HORIZONTAL_STRIDE_1); } if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT || diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index f22903e5234..aef35f33239 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -621,20 +621,14 @@ namespace brw { src_reg fix_math_operand(const src_reg &src) const { - /* Can't do hstride == 0 args on gen6 math, so expand it out. We - * might be able to do better by doing execsize = 1 math and then - * expanding that result out, but we would need to be careful with - * masking. - * - * Gen6 hardware ignores source modifiers (negate and abs) on math + /* Gen6 hardware ignores source modifiers (negate and abs) on math * instructions, so we also move to a temp to set those up. * * Gen7 relaxes most of the above restrictions, but still can't use IMM * operands to math */ if ((shader->devinfo->gen == 6 && - (src.file == IMM || src.file == UNIFORM || - src.abs || src.negate)) || + (src.file == IMM || src.abs || src.negate)) || (shader->devinfo->gen == 7 && src.file == IMM)) { const dst_reg tmp = vgrf(src.type); MOV(tmp, src); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index d88d62ba685..2a83eb9c73c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -578,14 +578,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry) break; /* fallthrough */ case SHADER_OPCODE_POW: - /* Allow constant propagation into src1 (except on Gen 6), and let - * constant combining promote the constant on Gen < 8. - * - * While Gen 6 MATH can take a scalar source, its source and - * destination offsets must be equal and we cannot ensure that. + /* Allow constant propagation into src1, and let constant combining + * promote the constant on Gen < 8. */ - if (devinfo->gen == 6) - break; /* fallthrough */ case BRW_OPCODE_BFI1: case BRW_OPCODE_ASR: -- 2.30.2