From 1245724f728915694ecb9c318a68107c01ccc808 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 17 Nov 2015 01:30:35 -0800 Subject: [PATCH] i965: Port tessellation evaluation shaders to vec4 mode. This can be used on Broadwell by setting INTEL_SCALAR_TES=0. More importantly, it will be used for Ivybridge and Haswell. Signed-off-by: Kenneth Graunke Reviewed-by: Edward O'Callaghan Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 4 + src/mesa/drivers/dri/i965/brw_shader.cpp | 25 ++- src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 + .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 2 + .../drivers/dri/i965/brw_vec4_generator.cpp | 61 ++++++ src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 204 ++++++++++++++++++ src/mesa/drivers/dri/i965/brw_vec4_tes.h | 69 ++++++ 8 files changed, 365 insertions(+), 2 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_tes.cpp create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_tes.h diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 0b706de69a0..05c49ee9a12 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -76,6 +76,7 @@ i965_compiler_FILES = \ brw_vec4_surface_builder.cpp \ brw_vec4_surface_builder.h \ brw_vec4_tcs.cpp \ + brw_vec4_tes.cpp \ brw_vec4_visitor.cpp \ brw_vec4_vs_visitor.cpp \ brw_vue_map.c \ diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index cc19c06f162..61bcebdbc4b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1313,6 +1313,10 @@ enum opcode { TCS_OPCODE_SET_OUTPUT_URB_OFFSETS, TCS_OPCODE_GET_PRIMITIVE_ID, TCS_OPCODE_CREATE_BARRIER_HEADER, + + TES_OPCODE_GET_PRIMITIVE_ID, + TES_OPCODE_CREATE_INPUT_READ_HEADER, + TES_OPCODE_ADD_INDIRECT_URB_OFFSET, }; enum brw_urb_write_flags { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 5140cfb7bc6..3a36678e8d5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -26,6 +26,7 @@ #include "brw_eu.h" #include "brw_fs.h" #include "brw_nir.h" +#include "brw_vec4_tes.h" #include "glsl/glsl_parser_extras.h" #include "main/shaderobj.h" #include "main/uniforms.h" @@ -86,7 +87,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); compiler->scalar_stage[MESA_SHADER_TESS_CTRL] = false; - compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true; + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = + devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_TES", true); compiler->scalar_stage[MESA_SHADER_GEOMETRY] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; @@ -566,6 +568,12 @@ brw_instruction_name(enum opcode op) return "tcs_get_primitive_id"; case TCS_OPCODE_CREATE_BARRIER_HEADER: return "tcs_create_barrier_header"; + case TES_OPCODE_CREATE_INPUT_READ_HEADER: + return "tes_create_input_read_header"; + case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: + return "tes_add_indirect_urb_offset"; + case TES_OPCODE_GET_PRIMITIVE_ID: + return "tes_get_primitive_id"; } unreachable("not reached"); @@ -1400,6 +1408,19 @@ brw_compile_tes(const struct brw_compiler *compiler, return g.get_assembly(final_assembly_size); } else { - unreachable("XXX: vec4 tessellation evalation shaders not merged yet."); + brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, + nir, mem_ctx, shader_time_index); + if (!v.run()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + if (unlikely(INTEL_DEBUG & DEBUG_TES)) + v.dump_instructions(); + + return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, + &prog_data->base, v.cfg, + final_assembly_size); } } diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 0cded0c87c6..116dd353016 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -189,6 +189,7 @@ vec4_instruction::has_source_and_destination_hazard() const switch (opcode) { case TCS_OPCODE_SET_INPUT_URB_OFFSETS: case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: + case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: return true; default: return false; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp index c31e72def67..166bc17e1e1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp @@ -47,6 +47,8 @@ can_do_writemask(const struct brw_device_info *devinfo, case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: case TCS_OPCODE_SET_INPUT_URB_OFFSETS: case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: + case TES_OPCODE_CREATE_INPUT_READ_HEADER: + case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: case VEC4_OPCODE_URB_READ: return false; default: diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 6325569956f..2541c25c6b8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -864,6 +864,46 @@ generate_tcs_output_urb_offsets(struct brw_codegen *p, brw_pop_insn_state(p); } +static void +generate_tes_create_input_read_header(struct brw_codegen *p, + struct brw_reg dst) +{ + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + /* Initialize the register to 0 */ + brw_MOV(p, dst, brw_imm_ud(0)); + + /* Enable all the channels in m0.5 bits 15:8 */ + brw_MOV(p, get_element_ud(dst, 5), brw_imm_ud(0xff00)); + + /* Copy g1.3 (the patch URB handle) to m0.0 and m0.1. For safety, + * mask out irrelevant "Reserved" bits, as they're not marked MBZ. + */ + brw_AND(p, vec2(get_element_ud(dst, 0)), + retype(brw_vec1_grf(1, 3), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0x1fff)); + brw_pop_insn_state(p); +} + +static void +generate_tes_add_indirect_urb_offset(struct brw_codegen *p, + struct brw_reg dst, + struct brw_reg header, + struct brw_reg offset) +{ + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, dst, header); + /* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */ + brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0)); + + brw_pop_insn_state(p); +} + static void generate_vec4_urb_read(struct brw_codegen *p, vec4_instruction *inst, @@ -889,6 +929,15 @@ generate_vec4_urb_read(struct brw_codegen *p, brw_inst_set_urb_global_offset(devinfo, send, inst->offset); } +static void +generate_tes_get_primitive_id(struct brw_codegen *p, struct brw_reg dst) +{ + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, dst, retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_D)); + brw_pop_insn_state(p); +} + static void generate_tcs_get_primitive_id(struct brw_codegen *p, struct brw_reg dst) { @@ -1780,6 +1829,18 @@ generate_code(struct brw_codegen *p, generate_tcs_create_barrier_header(p, prog_data, dst); break; + case TES_OPCODE_CREATE_INPUT_READ_HEADER: + generate_tes_create_input_read_header(p, dst); + break; + + case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: + generate_tes_add_indirect_urb_offset(p, dst, src[0], src[1]); + break; + + case TES_OPCODE_GET_PRIMITIVE_ID: + generate_tes_get_primitive_id(p, dst); + break; + case SHADER_OPCODE_BARRIER: brw_barrier(p, src[0]); brw_WAIT(p); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp new file mode 100644 index 00000000000..ce5fefc75a9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -0,0 +1,204 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_vec4_tes.cpp + * + * Tessellaton evaluation shader specific code derived from the vec4_visitor class. + */ + +#include "brw_vec4_tes.h" + +namespace brw { + +vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, + void *log_data, + const struct brw_tes_prog_key *key, + struct brw_tes_prog_data *prog_data, + const nir_shader *shader, + void *mem_ctx, + int shader_time_index) + : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base, + shader, mem_ctx, false, shader_time_index) +{ +} + + +dst_reg * +vec4_tes_visitor::make_reg_for_system_value(int location, const glsl_type *type) +{ + return NULL; +} + +void +vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) +{ + const struct brw_tes_prog_data *tes_prog_data = + (const struct brw_tes_prog_data *) prog_data; + + switch (instr->intrinsic) { + case nir_intrinsic_load_tess_level_outer: { + dst_reg dst(this, glsl_type::vec4_type); + nir_system_values[SYSTEM_VALUE_TESS_LEVEL_OUTER] = dst; + + dst_reg temp(this, glsl_type::vec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, input_read_header); + read->offset = 1; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); + break; + } + case nir_intrinsic_load_tess_level_inner: { + dst_reg dst(this, glsl_type::vec2_type); + nir_system_values[SYSTEM_VALUE_TESS_LEVEL_INNER] = dst; + + /* Set up the message header to reference the proper parts of the URB */ + dst_reg temp(this, glsl_type::vec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, input_read_header); + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { + emit(MOV(dst, swizzle(src_reg(temp), BRW_SWIZZLE_WZYX))); + } else { + read->offset = 1; + emit(MOV(dst, src_reg(temp))); + } + break; + } + default: + vec4_visitor::nir_setup_system_value_intrinsic(instr); + } +} + + +void +vec4_tes_visitor::setup_payload() +{ + int reg = 0; + + /* The payload always contains important data in r0 and r1, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. + */ + reg += 2; + + reg = setup_uniforms(reg); + + this->first_non_payload_grf = reg; +} + + +void +vec4_tes_visitor::emit_prolog() +{ + input_read_header = src_reg(this, glsl_type::uvec4_type); + emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); + + this->current_annotation = NULL; +} + + +void +vec4_tes_visitor::emit_urb_write_header(int mrf) +{ + /* No need to do anything for DS; an implied write to this MRF will be + * performed by VS_OPCODE_URB_WRITE. + */ + (void) mrf; +} + + +vec4_instruction * +vec4_tes_visitor::emit_urb_write_opcode(bool complete) +{ + /* For DS, the URB writes end the thread. */ + if (complete) { + if (INTEL_DEBUG & DEBUG_SHADER_TIME) + emit_shader_time_end(); + } + + vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); + inst->urb_write_flags = complete ? + BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; + + return inst; +} + +void +vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_tess_coord: + /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ + emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), + src_reg(brw_vec8_grf(1, 0)))); + break; + case nir_intrinsic_load_primitive_id: + emit(TES_OPCODE_GET_PRIMITIVE_ID, + get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); + break; + + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: { + src_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + src_reg header = input_read_header; + + if (indirect_offset.file != BAD_FILE) { + header = src_reg(this, glsl_type::uvec4_type); + emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), + input_read_header, indirect_offset); + } + + dst_reg temp(this, glsl_type::ivec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + /* Copy to target. We might end up with some funky writemasks landing + * in here, but we really don't want them in the above pseudo-ops. + */ + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src_reg(temp))); + break; + } + default: + vec4_visitor::nir_emit_intrinsic(instr); + } +} + + +void +vec4_tes_visitor::emit_thread_end() +{ + /* For DS, we always end the thread by emitting a single vertex. + * emit_urb_write_opcode() will take care of setting the eot flag on the + * SEND instruction. + */ + emit_vertex(); +} + +} /* namespace brw */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.h b/src/mesa/drivers/dri/i965/brw_vec4_tes.h new file mode 100644 index 00000000000..4b697aa592f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.h @@ -0,0 +1,69 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_vec4_tes.h + * + * The vec4 mode tessellation evaluation shader compiler backend. + */ + +#ifndef BRW_VEC4_TES_H +#define BRW_VEC4_TES_H + +#include "brw_vec4.h" + +#ifdef __cplusplus +namespace brw { + +class vec4_tes_visitor : public vec4_visitor +{ +public: + vec4_tes_visitor(const struct brw_compiler *compiler, + void *log_data, + const struct brw_tes_prog_key *key, + struct brw_tes_prog_data *prog_data, + const nir_shader *nir, + void *mem_ctx, + int shader_time_index); + +protected: + virtual dst_reg *make_reg_for_system_value(int location, + const glsl_type *type); + virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); + virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); + + virtual void setup_payload(); + virtual void emit_prolog(); + virtual void emit_thread_end(); + + virtual void emit_urb_write_header(int mrf); + virtual vec4_instruction *emit_urb_write_opcode(bool complete); + +private: + src_reg input_read_header; +}; + +} /* namespace brw */ +#endif /* __cplusplus */ + +#endif /* BRW_VEC4_TES_H */ -- 2.30.2