From 16512ba70d5a9b4645508030acf1572ee7c660a3 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Tue, 19 Feb 2013 07:31:16 -0800 Subject: [PATCH] i965/gs: add GS visitors. This patch introduces the vec4_gs_visitor class, which translates geometry shaders from GLSL IR to back-end opcodes. This class is derived from vec4_visitor (which is also the base class for vec4_vs_visitor), so as a result most of the back end code is shared. The only parts that differ are: - Geometry shaders use a different input payload organization, since the inputs need to match up with the outputs of the previous pipeline stage (vec4_gs_visitor::setup_payload() and vec4_gs_visitor::setup_varying_inputs()). - Geometry shader input array dereferences need a special stride computation, since all geometry shader inputs are interleaved into one giant array (vec4_gs_visitor::compute_array_stride()). - There are no geometry shader system values (vec4_gs_visitor::make_reg_for_system_value()). - At the beginning of a geometry shader, extra data in R0 needs to be zeroed out, and a vertex counter needs to be initialized (vec4_gs_visitor::emit_prolog()). - When EmitVertex() appears in the shader, the current contents of output variables need to be emitted to the URB, and the vertex counter needs to be incremented (vec4_gs_visitor::visit(ir_emit_vertex *)). - When generating a URB_WRITE message to output vertex data, the current state of the vertex counter needs to be used to store a write offset in the message header (vec4_gs_visitor::emit_urb_write_header()). - The URB_WRITE message that outputs vertex data needs to be sent using GS_OPCODE_URB_WRITE, since VS_OPCODE_URB_WRITE would overwrite the offsets in the message header (vec4_gs_visitor::emit_urb_write_opcode()). - At the end of a geometry shader, the final vertex count needs to be delivered using a URB WRITE message (vec4_gs_visitor::emit_thread_end()). - EndPrimitive() functionality is not implemented yet (vec4_gs_visitor::visit(ir_end_primitive *)). - There is no support for assembly shaders (vec4_gs_visitor::emit_program_code()). v2: Make num_input_vertices const. Refer to registers as rN rather than gN, for consistency with the PRM. Fix misspelling. Improve comment in the ir_emit_vertex visitor explaining why we emit vertices inside a conditional. Enclose the conditional code in the ir_emit_vertex visitor between curly braces. Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + .../drivers/dri/i965/brw_vec4_gs_visitor.cpp | 259 ++++++++++++++++++ .../drivers/dri/i965/brw_vec4_gs_visitor.h | 91 ++++++ 3 files changed, 351 insertions(+) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 60cd6e0f440..290cd93a107 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -87,6 +87,7 @@ i965_FILES = \ brw_vec4.cpp \ brw_vec4_copy_propagation.cpp \ brw_vec4_emit.cpp \ + brw_vec4_gs_visitor.cpp \ brw_vec4_live_variables.cpp \ brw_vec4_reg_allocate.cpp \ brw_vec4_visitor.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp new file mode 100644 index 00000000000..3549d5d7865 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -0,0 +1,259 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_vec4_gs_visitor.cpp + * + * Geometry-shader-specific code derived from the vec4_visitor class. + */ + +#include "brw_vec4_gs_visitor.h" + +const unsigned MAX_GS_INPUT_VERTICES = 6; + +namespace brw { + +vec4_gs_visitor::vec4_gs_visitor(struct brw_context *brw, + struct brw_vec4_gs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader, + void *mem_ctx) + : vec4_visitor(brw, &c->base, &c->gp->program.Base, &c->key.base, + &c->prog_data.base, prog, shader, mem_ctx, + INTEL_DEBUG & DEBUG_GS), + c(c) +{ +} + + +dst_reg * +vec4_gs_visitor::make_reg_for_system_value(ir_variable *ir) +{ + /* Geometry shaders don't use any system values. */ + assert(!"Unreached"); + return NULL; +} + + +int +vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map) +{ + /* For geometry shaders there are N copies of the input attributes, where N + * is the number of input vertices. attribute_map[BRW_VARYING_SLOT_COUNT * + * i + j] represents attribute j for vertex i. + * + * Note that GS inputs are read from the VUE 256 bits (2 vec4's) at a time, + * so the total number of input slots that will be delivered to the GS (and + * thus the stride of the input arrays) is urb_read_length * 2. + */ + const unsigned num_input_vertices = c->gp->program.VerticesIn; + assert(num_input_vertices <= MAX_GS_INPUT_VERTICES); + unsigned input_array_stride = c->prog_data.base.urb_read_length * 2; + + for (int slot = 0; slot < c->key.input_vue_map.num_slots; slot++) { + int varying = c->key.input_vue_map.slot_to_varying[slot]; + for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) { + attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] = + payload_reg + input_array_stride * vertex + slot; + } + } + + return payload_reg + input_array_stride * num_input_vertices; +} + + +void +vec4_gs_visitor::setup_payload() +{ + int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES]; + + /* If a geometry shader tries to read from an input that wasn't written by + * the vertex shader, that produces undefined results, but it shouldn't + * crash anything. So initialize attribute_map to zeros--that ensures that + * these undefined results are read from r0. + */ + memset(attribute_map, 0, sizeof(attribute_map)); + + int reg = 0; + + /* The payload always contains important data in r0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_varying_inputs(reg, attribute_map); + + lower_attributes_to_hw_regs(attribute_map); + + this->first_non_payload_grf = reg; +} + + +void +vec4_gs_visitor::emit_prolog() +{ + /* In vertex shaders, r0.2 is guaranteed to be initialized to zero. In + * geometry shaders, it isn't (it contains a bunch of information we don't + * need, like the input primitive type). We need r0.2 to be zero in order + * to build scratch read/write messages correctly (otherwise this value + * will be interpreted as a global offset, causing us to do our scratch + * reads/writes to garbage memory). So just set it to zero at the top of + * the shader. + */ + this->current_annotation = "clear r0.2"; + dst_reg r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD)); + emit(GS_OPCODE_SET_DWORD_2_IMMED, r0, 0u); + + /* Create a virtual register to hold the vertex count */ + this->vertex_count = src_reg(this, glsl_type::uint_type); + + /* Initialize the vertex_count register to 0 */ + this->current_annotation = "initialize vertex_count"; + vec4_instruction *inst = emit(MOV(dst_reg(this->vertex_count), 0u)); + inst->force_writemask_all = true; + + this->current_annotation = NULL; +} + + +void +vec4_gs_visitor::emit_program_code() +{ + /* We don't support NV_geometry_program4. */ + assert(!"Unreached"); +} + + +void +vec4_gs_visitor::emit_thread_end() +{ + /* MRF 0 is reserved for the debugger, so start with message header + * in MRF 1. + */ + int base_mrf = 1; + + current_annotation = "thread end"; + dst_reg mrf_reg(MRF, base_mrf); + src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + inst->force_writemask_all = true; + emit(GS_OPCODE_SET_VERTEX_COUNT, mrf_reg, this->vertex_count); + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + inst = emit(GS_OPCODE_THREAD_END); + inst->base_mrf = base_mrf; + inst->mlen = 1; +} + + +void +vec4_gs_visitor::emit_urb_write_header(int mrf) +{ + /* The SEND instruction that writes the vertex data to the VUE will use + * per_slot_offset=true, which means that DWORDs 3 and 4 of the message + * header specify an offset (in multiples of 256 bits) into the URB entry + * at which the write should take place. + * + * So we have to prepare a message header with the appropriate offset + * values. + */ + dst_reg mrf_reg(MRF, mrf); + src_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + this->current_annotation = "URB write header"; + vec4_instruction *inst = emit(MOV(mrf_reg, r0)); + inst->force_writemask_all = true; + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count, + (uint32_t) c->prog_data.output_vertex_size_hwords); +} + + +vec4_instruction * +vec4_gs_visitor::emit_urb_write_opcode(bool complete) +{ + /* We don't care whether the vertex is complete, because in general + * geometry shaders output multiple vertices, and we don't terminate the + * thread until all vertices are complete. + */ + (void) complete; + + vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE); + inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + return inst; +} + + +int +vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir) +{ + /* Geometry shader inputs are arrays, but they use an unusual array layout: + * instead of all array elements for a given geometry shader input being + * stored consecutively, all geometry shader inputs are interleaved into + * one giant array. At this stage of compilation, we assume that the + * stride of the array is BRW_VARYING_SLOT_COUNT. Later, + * setup_attributes() will remap our accesses to the actual input array. + */ + ir_dereference_variable *deref_var = ir->array->as_dereference_variable(); + if (deref_var && deref_var->var->mode == ir_var_shader_in) + return BRW_VARYING_SLOT_COUNT; + else + return vec4_visitor::compute_array_stride(ir); +} + + +void +vec4_gs_visitor::visit(ir_emit_vertex *) +{ + this->current_annotation = "emit vertex: safety check"; + + /* To ensure that we don't output more vertices than the shader specified + * using max_vertices, do the logic inside a conditional of the form "if + * (vertex_count < MAX)" + */ + unsigned num_output_vertices = c->gp->program.VerticesOut; + emit(CMP(dst_null_d(), this->vertex_count, + src_reg(num_output_vertices), BRW_CONDITIONAL_L)); + emit(IF(BRW_PREDICATE_NORMAL)); + { + this->current_annotation = "emit vertex: vertex data"; + emit_vertex(); + + this->current_annotation = "emit vertex: increment vertex count"; + emit(ADD(dst_reg(this->vertex_count), this->vertex_count, + src_reg(1u))); + } + emit(BRW_OPCODE_ENDIF); + + this->current_annotation = NULL; +} + +void +vec4_gs_visitor::visit(ir_end_primitive *) +{ + assert(!"Not implemented yet"); +} + + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h new file mode 100644 index 00000000000..e998666a6b4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -0,0 +1,91 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_vec4_gs_visitor.h + * + * Geometry-shader-specific code derived from the vec4_visitor class. + */ + +#ifndef BRW_VEC4_GS_VISITOR_H +#define BRW_VEC4_GS_VISITOR_H + +#include "brw_vec4.h" + + +struct brw_vec4_gs_prog_key +{ + struct brw_vec4_prog_key base; + + struct brw_vue_map input_vue_map; +}; + + +/** + * Scratch data used when compiling a GLSL geometry shader. + */ +struct brw_vec4_gs_compile +{ + struct brw_vec4_compile base; + struct brw_vec4_gs_prog_key key; + struct brw_vec4_gs_prog_data prog_data; + + struct brw_geometry_program *gp; +}; + + +#ifdef __cplusplus +namespace brw { + +class vec4_gs_visitor : public vec4_visitor +{ +public: + vec4_gs_visitor(struct brw_context *brw, + struct brw_vec4_gs_compile *c, + struct gl_shader_program *prog, + struct brw_shader *shader, + void *mem_ctx); + +protected: + virtual dst_reg *make_reg_for_system_value(ir_variable *ir); + virtual void setup_payload(); + virtual void emit_prolog(); + virtual void emit_program_code(); + virtual void emit_thread_end(); + virtual void emit_urb_write_header(int mrf); + virtual vec4_instruction *emit_urb_write_opcode(bool complete); + virtual int compute_array_stride(ir_dereference_array *ir); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + +private: + int setup_varying_inputs(int payload_reg, int *attribute_map); + + src_reg vertex_count; + const struct brw_vec4_gs_compile * const c; +}; + +} /* namespace brw */ +#endif /* __cplusplus */ + +#endif /* BRW_VEC4_GS_VISITOR_H */ -- 2.30.2