From: Paul Berry Date: Wed, 16 Oct 2013 20:18:11 +0000 (-0700) Subject: i965/gs: Fix up gl_PointSize input swizzling for DUAL_INSTANCED gs. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=03ac2c7223f7645e3;p=mesa.git i965/gs: Fix up gl_PointSize input swizzling for DUAL_INSTANCED gs. Geometry shaders that run in "DUAL_INSTANCED" mode store their inputs in vec4's. This means that when compiling gl_PointSize input swizzling (a MOV instruction which uses a geometry shader input as both source and destination), we need to do two things: - Set force_writemask_all to ensure that the MOV happens regardless of which channels are enabled. - Set the source register region to <4;4,1> (instead of <0;4,1> to satisfy register region restrictions. v2: move the source register region fixup to the top of vec4_generator::generate_vec4_instruction(), so that it applies to all instructions rather than just MOV. Reviewed-by: Eric Anholt --- diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 1b597b55861..5196feb28e5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -856,6 +856,31 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, { vec4_instruction *inst = (vec4_instruction *) instruction; + if (dst.width == BRW_WIDTH_4) { + /* This happens in attribute fixups for "dual instanced" geometry + * shaders, since they use attributes that are vec4's. Since the exec + * width is only 4, it's essential that the caller set + * force_writemask_all in order to make sure the instruction is executed + * regardless of which channels are enabled. + */ + assert(inst->force_writemask_all); + + /* Fix up any <8;8,1> or <0;4,1> source registers to <4;4,1> to satisfy + * the following register region restrictions (from Graphics BSpec: + * 3D-Media-GPGPU Engine > EU Overview > Registers and Register Regions + * > Register Region Restrictions) + * + * 1. ExecSize must be greater than or equal to Width. + * + * 2. If ExecSize = Width and HorzStride != 0, VertStride must be set + * to Width * HorzStride." + */ + for (int i = 0; i < 3; i++) { + if (src[i].file == BRW_GENERAL_REGISTER_FILE) + src[i] = stride(src[i], 4, 4, 1); + } + } + switch (inst->opcode) { case BRW_OPCODE_MOV: brw_MOV(p, dst, src[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 8af7a3cf6de..b135f616ae1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -181,7 +181,13 @@ vec4_gs_visitor::emit_prolog() src_reg src(dst); dst.writemask = WRITEMASK_X; src.swizzle = BRW_SWIZZLE_WWWW; - emit(MOV(dst, src)); + inst = emit(MOV(dst, src)); + + /* In dual instanced dispatch mode, dst has a width of 4, so we need + * to make sure the MOV happens regardless of which channels are + * enabled. + */ + inst->force_writemask_all = true; } }