From 8bb15813e3047820a95724e4257aa2c862eeb31a Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Wed, 16 Oct 2013 11:40:41 -0700 Subject: [PATCH] i965/vec4: Add the ability for attributes to be interleaved. When geometry shaders are operated in "single" or "dual instanced" mode, a single set of geometry shader inputs is interleaved into the thread payload (with each payload register containing a pair of inputs) in order to save register space. This patch modifies vec4_visitor::lower_attributes_to_hw_regs so that it can handle the interleaved format. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_vec4.cpp | 28 ++++++++++++++++--- src/mesa/drivers/dri/i965/brw_vec4.h | 3 +- .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index ba7490053b1..589de48f33a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1184,12 +1184,32 @@ vec4_visitor::dump_instruction(backend_instruction *be_inst) printf("\n"); } + +static inline struct brw_reg +attribute_to_hw_reg(int attr, bool interleaved) +{ + if (interleaved) + return stride(brw_vec4_grf(attr / 2, (attr % 2) * 4), 0, 4, 1); + else + return brw_vec8_grf(attr, 0); +} + + /** * Replace each register of type ATTR in this->instructions with a reference * to a fixed HW register. + * + * If interleaved is true, then each attribute takes up half a register, with + * register N containing attribute 2*N in its first half and attribute 2*N+1 + * in its second half (this corresponds to the payload setup used by geometry + * shaders in "single" or "dual instanced" dispatch mode). If interleaved is + * false, then each attribute takes up a whole register, with register N + * containing attribute N (this corresponds to the payload setup used by + * vertex shaders, and by geometry shaders in "dual object" dispatch mode). */ void -vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map) +vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map, + bool interleaved) { foreach_list(node, &this->instructions) { vec4_instruction *inst = (vec4_instruction *)node; @@ -1203,7 +1223,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map) */ assert(grf != 0); - struct brw_reg reg = brw_vec8_grf(grf, 0); + struct brw_reg reg = attribute_to_hw_reg(grf, interleaved); reg.type = inst->dst.type; reg.dw1.bits.writemask = inst->dst.writemask; @@ -1222,7 +1242,7 @@ vec4_visitor::lower_attributes_to_hw_regs(const int *attribute_map) */ assert(grf != 0); - struct brw_reg reg = brw_vec8_grf(grf, 0); + struct brw_reg reg = attribute_to_hw_reg(grf, interleaved); reg.dw1.bits.swizzle = inst->src[i].swizzle; reg.type = inst->src[i].type; if (inst->src[i].abs) @@ -1260,7 +1280,7 @@ vec4_vs_visitor::setup_attributes(int payload_reg) nr_attributes++; } - lower_attributes_to_hw_regs(attribute_map); + lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); /* The BSpec says we always have to read at least one thing from * the VF, and it appears that the hardware wedges otherwise. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 854478ce06e..23e004ef397 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -515,7 +515,8 @@ public: protected: void emit_vertex(); - void lower_attributes_to_hw_regs(const int *attribute_map); + void lower_attributes_to_hw_regs(const int *attribute_map, + bool interleaved); void setup_payload_interference(struct ra_graph *g, int first_payload_node, int reg_node_count); virtual dst_reg *make_reg_for_system_value(ir_variable *ir) = 0; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index 0893c95b8fa..08a55a3fab9 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -110,7 +110,7 @@ vec4_gs_visitor::setup_payload() reg = setup_varying_inputs(reg, attribute_map); - lower_attributes_to_hw_regs(attribute_map); + lower_attributes_to_hw_regs(attribute_map, false /* interleaved */); this->first_non_payload_grf = reg; } -- 2.30.2