From d9ddf5076d13ccfdc0ef690ed64f9fabfc0b313a Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 9 Aug 2019 15:40:33 -0700 Subject: [PATCH] intel/fs: make scan/reduce work with SIMD32 when it fits 2 registers When dealing with uint16_t and uint8_t on SIMD32 we can do all the operations using just 2 registers, so we don't hit the recursion at the beginning of emit_scan(). Because of that, we need to actually compute scan/reduce for channels 31:16. v2: Still missed instructions (Jason). Reviewed-by: Jason Ekstrand Signed-off-by: Paulo Zanoni --- src/intel/compiler/brw_fs_builder.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 0c1b6f5d6c7..1000d956d6f 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -514,6 +514,16 @@ namespace brw { right = horiz_offset(tmp, 8 + 4); set_condmod(mod, ubld.emit(opcode, right, left, right)); } + + if (dispatch_width() > 16) { + left = component(tmp, 16 + 3); + right = horiz_offset(tmp, 16 + 4); + set_condmod(mod, ubld.emit(opcode, right, left, right)); + + left = component(tmp, 24 + 3); + right = horiz_offset(tmp, 24 + 4); + set_condmod(mod, ubld.emit(opcode, right, left, right)); + } } if (cluster_size > 8 && dispatch_width() > 8) { @@ -521,6 +531,19 @@ namespace brw { src_reg left = component(tmp, 7); dst_reg right = horiz_offset(tmp, 8); set_condmod(mod, ubld.emit(opcode, right, left, right)); + + if (dispatch_width() > 16) { + left = component(tmp, 16 + 7); + right = horiz_offset(tmp, 16 + 8); + set_condmod(mod, ubld.emit(opcode, right, left, right)); + } + } + + if (cluster_size > 16 && dispatch_width() > 16) { + const fs_builder ubld = exec_all().group(16, 0); + src_reg left = component(tmp, 15); + dst_reg right = horiz_offset(tmp, 16); + set_condmod(mod, ubld.emit(opcode, right, left, right)); } } -- 2.30.2