From: Tim Rowley Date: Wed, 9 Aug 2017 22:32:28 +0000 (-0500) Subject: swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4475583f5ea44c3585e0ffea6118ba3a32fddd72;p=mesa.git swr/rast: Allow gather of floats from fetch shader with 2-4GB offsets Reviewed-by: Bruce Cherniak --- diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py index 2ed2b2f61eb..025d38ab336 100644 --- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py +++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py @@ -45,6 +45,7 @@ intrinsics = [ ['VGATHERPD', 'x86_avx2_gather_d_pd_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERPS', 'x86_avx2_gather_d_ps_256', ['src', 'pBase', 'indices', 'mask', 'scale']], ['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']], + ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']], ['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']], ['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']], ['VRCPPS', 'x86_avx_rcp_ps_256', ['a']], diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp index dcfe8970f5c..761c58ca27e 100644 --- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp +++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp @@ -1005,7 +1005,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState, Value *vMask = vGatherMask; // Gather a SIMD of vertices - vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vOffsets, vMask, C((char)1)); + // APIs allow a 4GB range for offsets + // However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :( + // But, we know that elements must be aligned for FETCH. :) + // Right shift the offset by a bit and then scale by 2 to remove the sign extension. + Value* vShiftedOffsets = VPSRLI(vOffsets, C(1)); + vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vMask, C((char)2)); } else {