From a5038427c3624e559f954124d77304f9ae9b884c Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Tue, 10 Nov 2015 14:35:27 -0800 Subject: [PATCH] i965: Add tessellation evaluation shaders The TES is essentially a post-tessellator VS, which has access to the entire TCS output patch, and a special gl_TessCoord input. Otherwise, they're very straightforward. This patch implements SIMD8 tessellation evaluation shaders for Gen8+. The tessellator can generate a lot of geometry, so operating in SIMD8 mode (8 vertices per thread) is more efficient than SIMD4x2 mode (only 2 vertices per thread). I have another patch which implements SIMD4x2 mode for older hardware (or via an environment variable override). We currently handle all inputs via the pull model. v2: Improve comments (suggested by Jordan Justen). Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_compiler.h | 24 ++ src/mesa/drivers/dri/i965/brw_context.h | 6 + src/mesa/drivers/dri/i965/brw_fs.cpp | 49 +++ src/mesa/drivers/dri/i965/brw_fs.h | 10 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 122 ++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 +- src/mesa/drivers/dri/i965/brw_link.cpp | 4 + src/mesa/drivers/dri/i965/brw_program.h | 2 + src/mesa/drivers/dri/i965/brw_shader.cpp | 94 ++++++ src/mesa/drivers/dri/i965/brw_shader.h | 3 + src/mesa/drivers/dri/i965/brw_state_upload.c | 3 + src/mesa/drivers/dri/i965/brw_tes.c | 300 +++++++++++++++++++ 13 files changed, 627 insertions(+), 3 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_tes.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index d147a731f3e..7354aafbd39 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -151,6 +151,7 @@ i965_FILES = \ brw_state_upload.c \ brw_structs.h \ brw_tcs_surface_state.c \ + brw_tes.c \ brw_tes_surface_state.c \ brw_tex.c \ brw_tex_layout.c \ diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index c9e03175010..64d831d4e91 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -191,6 +191,14 @@ struct brw_vs_prog_key { struct brw_sampler_prog_key_data tex; }; +/** The program key for Tessellation Evaluation Shaders. */ +struct brw_tes_prog_key +{ + unsigned program_string_id; + + struct brw_sampler_prog_key_data tex; +}; + /** The program key for Geometry Shaders. */ struct brw_gs_prog_key { @@ -668,6 +676,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, unsigned *final_assembly_size, char **error_str); +/** + * Compile a tessellation evaluation shader. + * + * Returns the final assembly and the program's size. + */ +const unsigned * +brw_compile_tes(const struct brw_compiler *compiler, void *log_data, + void *mem_ctx, + const struct brw_tes_prog_key *key, + struct brw_tes_prog_data *prog_data, + const struct nir_shader *shader, + struct gl_shader_program *shader_prog, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str); + /** * Compile a vertex shader. * diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 69bc04ceb08..5e840d18920 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct gl_vertex_program *p) return (const struct brw_vertex_program *) p; } +static inline struct brw_tess_eval_program * +brw_tess_eval_program(struct gl_tess_eval_program *p) +{ + return (struct brw_tess_eval_program *) p; +} + static inline struct brw_geometry_program * brw_geometry_program(struct gl_geometry_program *p) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c833ef0be3b..6ac2f857fee 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1685,6 +1685,21 @@ fs_visitor::assign_vs_urb_setup() } } +void +fs_visitor::assign_tes_urb_setup() +{ + assert(stage == MESA_SHADER_TESS_EVAL); + + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data; + + first_non_payload_grf += 8 * vue_prog_data->urb_read_length; + + /* Rewrite all ATTR file references to HW_REGs. */ + foreach_block_and_inst(block, fs_inst, inst, cfg) { + convert_attr_sources_to_hw_regs(inst); + } +} + void fs_visitor::assign_gs_urb_setup() { @@ -5231,6 +5246,40 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) return !failed; } +bool +fs_visitor::run_tes() +{ + assert(stage == MESA_SHADER_TESS_EVAL); + + /* R0: thread header, R1-3: gl_TessCoord.xyz, R4: URB handles */ + payload.num_regs = 5; + + if (shader_time_index >= 0) + emit_shader_time_begin(); + + emit_nir_code(); + + if (failed) + return false; + + emit_urb_writes(); + + if (shader_time_index >= 0) + emit_shader_time_end(); + + calculate_cfg(); + + optimize(); + + assign_curb_setup(); + assign_tes_urb_setup(); + + fixup_3src_null_dest(); + allocate_registers(); + + return !failed; +} + bool fs_visitor::run_gs() { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index f2e384129cb..372f7606cef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -81,7 +81,8 @@ public: struct gl_program *prog, const nir_shader *shader, unsigned dispatch_width, - int shader_time_index); + int shader_time_index, + const struct brw_vue_map *input_vue_map = NULL); fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, struct brw_gs_compile *gs_compile, @@ -109,6 +110,7 @@ public: bool run_fs(bool do_rep_send); bool run_vs(gl_clip_plane *clip_planes); + bool run_tes(); bool run_gs(); bool run_cs(); void optimize(); @@ -124,6 +126,7 @@ public: void assign_urb_setup(); void convert_attr_sources_to_hw_regs(fs_inst *inst); void assign_vs_urb_setup(); + void assign_tes_urb_setup(); void assign_gs_urb_setup(); bool assign_regs(bool allow_spilling); void assign_regs_trivial(); @@ -249,6 +252,8 @@ public: nir_intrinsic_instr *instr); void nir_emit_intrinsic(const brw::fs_builder &bld, nir_intrinsic_instr *instr); + void nir_emit_tes_intrinsic(const brw::fs_builder &bld, + nir_intrinsic_instr *instr); void nir_emit_ssbo_atomic(const brw::fs_builder &bld, int op, nir_intrinsic_instr *instr); void nir_emit_shared_atomic(const brw::fs_builder &bld, @@ -260,6 +265,7 @@ public: fs_reg get_nir_src(nir_src src); fs_reg get_nir_dest(nir_dest dest); fs_reg get_nir_image_deref(const nir_deref_var *deref); + fs_reg get_indirect_offset(nir_intrinsic_instr *instr); void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst, unsigned wr_mask); @@ -313,6 +319,8 @@ public: struct brw_stage_prog_data *prog_data; struct gl_program *prog; + const struct brw_vue_map *input_vue_map; + int *param_size; int *virtual_grf_start; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index faa26704b2c..9728e2a2ad8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -123,6 +123,7 @@ fs_visitor::nir_setup_outputs() switch (stage) { case MESA_SHADER_VERTEX: + case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: { unsigned location = var->data.location; nir_setup_single_output_varying(®, var->type, &location); @@ -443,6 +444,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr) case MESA_SHADER_VERTEX: nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; + case MESA_SHADER_TESS_EVAL: + nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr)); + break; case MESA_SHADER_GEOMETRY: nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; @@ -1709,6 +1713,24 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst, } } +fs_reg +fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr) +{ + nir_src *offset_src = nir_get_io_offset_src(instr); + nir_const_value *const_value = nir_src_as_const_value(*offset_src); + + if (const_value) { + /* The only constant offset we should find is 0. brw_nir.c's + * add_const_offset_to_base() will fold other constant offsets + * into instr->const_index[0]. + */ + assert(const_value->u[0] == 0); + return fs_reg(); + } + + return get_nir_src(*offset_src); +} + void fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) @@ -1740,6 +1762,106 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld, } } +void +fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld, + nir_intrinsic_instr *instr) +{ + assert(stage == MESA_SHADER_TESS_EVAL); + struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) prog_data; + + fs_reg dest; + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + dest = get_nir_dest(instr->dest); + + switch (instr->intrinsic) { + case nir_intrinsic_load_primitive_id: + bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1))); + break; + case nir_intrinsic_load_tess_coord: + /* gl_TessCoord is part of the payload in g1-3 */ + for (unsigned i = 0; i < 3; i++) { + bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0))); + } + break; + + case nir_intrinsic_load_tess_level_outer: + /* When the TES reads gl_TessLevelOuter, we ensure that the patch header + * appears as a push-model input. So, we can simply use the ATTR file + * rather than issuing URB read messages. The data is stored in the + * high DWords in reverse order - DWord 7 contains .x, DWord 6 contains + * .y, and so on. + */ + switch (tes_prog_data->domain) { + case BRW_TESS_DOMAIN_QUAD: + for (unsigned i = 0; i < 4; i++) + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); + break; + case BRW_TESS_DOMAIN_TRI: + for (unsigned i = 0; i < 3; i++) + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); + break; + case BRW_TESS_DOMAIN_ISOLINE: + for (unsigned i = 0; i < 2; i++) + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i)); + break; + } + break; + + case nir_intrinsic_load_tess_level_inner: + /* When the TES reads gl_TessLevelInner, we ensure that the patch header + * appears as a push-model input. So, we can simply use the ATTR file + * rather than issuing URB read messages. + */ + switch (tes_prog_data->domain) { + case BRW_TESS_DOMAIN_QUAD: + bld.MOV(dest, component(fs_reg(ATTR, 0), 3)); + bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2)); + break; + case BRW_TESS_DOMAIN_TRI: + bld.MOV(dest, component(fs_reg(ATTR, 0), 4)); + break; + case BRW_TESS_DOMAIN_ISOLINE: + /* ignore - value is undefined */ + break; + } + break; + + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: { + fs_reg indirect_offset = get_indirect_offset(instr); + unsigned imm_offset = instr->const_index[0]; + + fs_inst *inst; + if (indirect_offset.file == BAD_FILE) { + /* Replicate the patch handle to all enabled channels */ + fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1); + bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD)); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle); + inst->mlen = 1; + } else { + /* Indirect indexing - use per-slot offsets as well. */ + const fs_reg srcs[] = { + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD), + indirect_offset + }; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0); + + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, payload); + inst->mlen = 2; + } + inst->offset = imm_offset; + inst->base_mrf = -1; + inst->regs_written = instr->num_components; + break; + } + default: + nir_emit_intrinsic(bld, instr); + break; + } +} + void fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 0582e7831de..b6405cd5f0d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -700,7 +700,10 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count) fs_reg sources[8]; fs_reg urb_handle; - urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + if (stage == MESA_SHADER_TESS_EVAL) + urb_handle = fs_reg(retype(brw_vec8_grf(4, 0), BRW_REGISTER_TYPE_UD)); + else + urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); /* If we don't have any valid slots to write, just do a minimal urb write * send to terminate the shader. This includes 1 slot of undefined data, @@ -934,9 +937,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, struct gl_program *prog, const nir_shader *shader, unsigned dispatch_width, - int shader_time_index) + int shader_time_index, + const struct brw_vue_map *input_vue_map) : backend_shader(compiler, log_data, mem_ctx, shader, prog_data), key(key), gs_compile(NULL), prog_data(prog_data), prog(prog), + input_vue_map(input_vue_map), dispatch_width(dispatch_width), shader_time_index(shader_time_index), bld(fs_builder(this, dispatch_width).at_end()) @@ -972,6 +977,9 @@ fs_visitor::init() case MESA_SHADER_VERTEX: key_tex = &((const brw_vs_prog_key *) key)->tex; break; + case MESA_SHADER_TESS_EVAL: + key_tex = &((const brw_tes_prog_key *) key)->tex; + break; case MESA_SHADER_GEOMETRY: key_tex = &((const brw_gs_prog_key *) key)->tex; break; diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 31d29ec9045..f5a7d204b3d 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx, struct gl_shader_program *sh_prog) { struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX]; + struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE]; @@ -52,6 +53,9 @@ brw_shader_precompile(struct gl_context *ctx, if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program)) return false; + if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program)) + return false; + if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program)) return false; diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index 339b8e19ec5..1cdab97a82a 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -56,6 +56,8 @@ void brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog, struct gl_shader *shader, struct gl_program *prog); +void brw_upload_tes_prog(struct brw_context *brw); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 7a6751bc71b..d9545685b1b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -24,6 +24,7 @@ #include "brw_context.h" #include "brw_cfg.h" #include "brw_eu.h" +#include "brw_fs.h" #include "brw_nir.h" #include "glsl/glsl_parser_extras.h" #include "main/shaderobj.h" @@ -84,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->scalar_stage[MESA_SHADER_VERTEX] = devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS); + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true; compiler->scalar_stage[MESA_SHADER_GEOMETRY] = devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false); compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true; @@ -135,6 +137,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true; } + compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false; + if (compiler->scalar_stage[MESA_SHADER_GEOMETRY]) compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false; @@ -1289,3 +1293,93 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) } } +extern "C" const unsigned * +brw_compile_tes(const struct brw_compiler *compiler, + void *log_data, + void *mem_ctx, + const struct brw_tes_prog_key *key, + struct brw_tes_prog_data *prog_data, + const nir_shader *src_shader, + struct gl_shader_program *shader_prog, + int shader_time_index, + unsigned *final_assembly_size, + char **error_str) +{ + const struct brw_device_info *devinfo = compiler->devinfo; + struct gl_shader *shader = + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; + + nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar); + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar); + + brw_compute_vue_map(devinfo, &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; + + assert(output_size_bytes >= 1); + if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); + return NULL; + } + + /* URB entry sizes are stored as a multiple of 64 bytes. */ + prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; + + struct brw_vue_map input_vue_map; + brw_compute_tess_vue_map(&input_vue_map, + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID, + nir->info.patch_inputs_read); + + bool need_patch_header = nir->info.system_values_read & + (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) | + BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER)); + + /* The TES will pull most inputs using URB read messages. + * + * However, we push the patch header for TessLevel factors when required, + * as it's a tiny amount of extra data. + */ + prog_data->base.urb_read_length = need_patch_header ? 1 : 0; + + if (unlikely(INTEL_DEBUG & DEBUG_TES)) { + fprintf(stderr, "TES Input "); + brw_print_vue_map(stderr, &input_vue_map); + fprintf(stderr, "TES Output "); + brw_print_vue_map(stderr, &prog_data->base.vue_map); + } + + if (is_scalar) { + fs_visitor v(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, shader->Program, nir, 8, + shader_time_index, &input_vue_map); + if (!v.run_tes()) { + if (error_str) + *error_str = ralloc_strdup(mem_ctx, v.fail_msg); + return NULL; + } + + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; + + fs_generator g(compiler, log_data, mem_ctx, (void *) key, + &prog_data->base.base, v.promoted_constants, false, + "TES"); + if (unlikely(INTEL_DEBUG & DEBUG_TES)) { + g.enable_debug(ralloc_asprintf(mem_ctx, + "%s tessellation evaluation shader %s", + nir->info.label ? nir->info.label + : "unnamed", + nir->info.name)); + } + + g.generate_code(v.cfg, 8); + + return g.get_assembly(final_assembly_size); + } else { + unreachable("XXX: vec4 tessellation evalation shaders not merged yet."); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 8c5778f9048..2e73f123082 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage, bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); +bool brw_tes_precompile(struct gl_context *ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog); bool brw_gs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index cf3cf97daea..c657b254f04 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw, { if (pipeline == BRW_RENDER_PIPELINE) { brw_upload_vs_prog(brw); + brw_upload_tes_prog(brw); if (brw->gen < 6) brw_upload_ff_gs_prog(brw); @@ -691,6 +692,8 @@ brw_upload_programs(struct brw_context *brw, bool old_separate = brw->vue_map_geom_out.separate; if (brw->geometry_program) brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map; + else if (brw->tess_eval_program) + brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map; else brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map; diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c new file mode 100644 index 00000000000..3c1270679f9 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_tes.c @@ -0,0 +1,300 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file brw_tes.c + * + * Tessellation evaluation shader state upload code. + */ + +#include "brw_context.h" +#include "brw_nir.h" +#include "brw_program.h" +#include "brw_shader.h" +#include "brw_state.h" +#include "program/prog_parameter.h" + +static void +brw_tes_debug_recompile(struct brw_context *brw, + struct gl_shader_program *shader_prog, + const struct brw_tes_prog_key *key) +{ + struct brw_cache_item *c = NULL; + const struct brw_tes_prog_key *old_key = NULL; + bool found = false; + + perf_debug("Recompiling tessellation evaluation shader for program %d\n", + shader_prog->Name); + + for (unsigned int i = 0; i < brw->cache.size; i++) { + for (c = brw->cache.items[i]; c; c = c->next) { + if (c->cache_id == BRW_CACHE_TES_PROG) { + old_key = c->key; + + if (old_key->program_string_id == key->program_string_id) + break; + } + } + if (c) + break; + } + + if (!c) { + perf_debug(" Didn't find previous compile in the shader cache for " + "debug\n"); + return; + } + + found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); + + if (!found) { + perf_debug(" Something else\n"); + } +} + +static bool +brw_codegen_tes_prog(struct brw_context *brw, + struct gl_shader_program *shader_prog, + struct brw_tess_eval_program *tep, + struct brw_tes_prog_key *key) +{ + const struct brw_compiler *compiler = brw->intelScreen->compiler; + const struct brw_device_info *devinfo = brw->intelScreen->devinfo; + struct brw_stage_state *stage_state = &brw->tes.base; + nir_shader *nir = tep->program.Base.nir; + struct brw_tes_prog_data prog_data; + bool start_busy = false; + double start_time = 0; + + memset(&prog_data, 0, sizeof(prog_data)); + + brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo, + shader_prog, &tep->program.Base, + &prog_data.base.base, 0); + + switch (tep->program.Spacing) { + case GL_EQUAL: + prog_data.partitioning = BRW_TESS_PARTITIONING_INTEGER; + break; + case GL_FRACTIONAL_ODD: + prog_data.partitioning = BRW_TESS_PARTITIONING_ODD_FRACTIONAL; + break; + case GL_FRACTIONAL_EVEN: + prog_data.partitioning = BRW_TESS_PARTITIONING_EVEN_FRACTIONAL; + break; + default: + unreachable("invalid domain shader spacing"); + } + + switch (tep->program.PrimitiveMode) { + case GL_QUADS: + prog_data.domain = BRW_TESS_DOMAIN_QUAD; + break; + case GL_TRIANGLES: + prog_data.domain = BRW_TESS_DOMAIN_TRI; + break; + case GL_ISOLINES: + prog_data.domain = BRW_TESS_DOMAIN_ISOLINE; + break; + default: + unreachable("invalid domain shader primitive mode"); + } + + if (tep->program.PointMode) { + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; + } else if (tep->program.PrimitiveMode == GL_ISOLINES) { + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; + } else { + /* Hardware winding order is backwards from OpenGL */ + switch (tep->program.VertexOrder) { + case GL_CCW: + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW; + break; + case GL_CW: + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; + break; + default: + unreachable("invalid domain shader vertex order"); + } + } + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + * + * Note: param_count needs to be num_uniform_components * 4, since we add + * padding around uniform values below vec4 size, so the worst case is that + * every uniform is a float which gets padded to the size of a vec4. + */ + struct gl_shader *tes = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + int param_count = nir->num_uniforms; + if (!compiler->scalar_stage[MESA_SHADER_TESS_EVAL]) + param_count *= 4; + + prog_data.base.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.image_param = + rzalloc_array(NULL, struct brw_image_param, tes->NumImages); + prog_data.base.base.nr_params = param_count; + prog_data.base.base.nr_image_params = tes->NumImages; + + brw_nir_setup_glsl_uniforms(nir, shader_prog, &tep->program.Base, + &prog_data.base.base, + compiler->scalar_stage[MESA_SHADER_TESS_EVAL]); + + if (unlikely(INTEL_DEBUG & DEBUG_TES)) + brw_dump_ir("tessellation evaluation", shader_prog, tes, NULL); + + int st_index = -1; + if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME)) + st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TES); + + if (unlikely(brw->perf_debug)) { + start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo); + start_time = get_time(); + } + + void *mem_ctx = ralloc_context(NULL); + unsigned program_size; + char *error_str; + const unsigned *program = + brw_compile_tes(compiler, brw, mem_ctx, key, &prog_data, nir, + shader_prog, st_index, &program_size, &error_str); + if (program == NULL) { + if (shader_prog) { + shader_prog->LinkStatus = false; + ralloc_strcat(&shader_prog->InfoLog, error_str); + } + + _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: " + "%s\n", error_str); + + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(brw->perf_debug)) { + struct brw_shader *btes = (struct brw_shader *) tes; + if (btes->compiled_once) { + brw_tes_debug_recompile(brw, shader_prog, key); + } + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { + perf_debug("TES compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + btes->compiled_once = true; + } + + /* Scratch space is used for register spilling */ + if (prog_data.base.base.total_scratch) { + brw_get_scratch_bo(brw, &stage_state->scratch_bo, + prog_data.base.base.total_scratch * + brw->max_ds_threads); + } + + brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG, + key, sizeof(*key), + program, program_size, + &prog_data, sizeof(prog_data), + &stage_state->prog_offset, &brw->tes.prog_data); + ralloc_free(mem_ctx); + + return true; +} + + +void +brw_upload_tes_prog(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_shader_program **current = ctx->_Shader->CurrentProgram; + struct brw_stage_state *stage_state = &brw->tes.base; + struct brw_tes_prog_key key; + /* BRW_NEW_TESS_EVAL_PROGRAM */ + struct brw_tess_eval_program *tep = + (struct brw_tess_eval_program *) brw->tess_eval_program; + + if (!brw_state_dirty(brw, + _NEW_TEXTURE, + BRW_NEW_TESS_EVAL_PROGRAM)) + return; + + if (tep == NULL) { + /* Other state atoms had better not try to access prog_data, since + * there's no TES program. + */ + brw->tes.prog_data = NULL; + brw->tes.base.prog_data = NULL; + return; + } + + struct gl_program *prog = &tep->program.Base; + + memset(&key, 0, sizeof(key)); + + key.program_string_id = tep->id; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, + &key.tex); + + if (!brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG, + &key, sizeof(key), + &stage_state->prog_offset, &brw->tes.prog_data)) { + bool success = brw_codegen_tes_prog(brw, current[MESA_SHADER_TESS_EVAL], + tep, &key); + assert(success); + (void)success; + } + brw->tes.base.prog_data = &brw->tes.prog_data->base.base; +} + + +bool +brw_tes_precompile(struct gl_context *ctx, + struct gl_shader_program *shader_prog, + struct gl_program *prog) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_tes_prog_key key; + uint32_t old_prog_offset = brw->tes.base.prog_offset; + struct brw_tes_prog_data *old_prog_data = brw->tes.prog_data; + bool success; + + struct gl_tess_eval_program *tep = (struct gl_tess_eval_program *)prog; + struct brw_tess_eval_program *btep = brw_tess_eval_program(tep); + + memset(&key, 0, sizeof(key)); + + key.program_string_id = btep->id; + brw_setup_tex_for_precompile(brw, &key.tex, prog); + + success = brw_codegen_tes_prog(brw, shader_prog, btep, &key); + + brw->tes.base.prog_offset = old_prog_offset; + brw->tes.prog_data = old_prog_data; + + return success; +} -- 2.30.2