From 8bc073d6014d1e5d8b4a23020573abbd038f893a Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 18 Dec 2015 02:23:39 -0800 Subject: [PATCH] i965: Automatically create a passthrough TCS when needed. Signed-off-by: Kenneth Graunke Reviewed-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_tcs.c | 62 +++++++++++++++++++--- src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 57 ++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_vec4_tcs.h | 6 ++- 3 files changed, 113 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 1da87eedeec..ecb6fd0c8ba 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -82,13 +82,31 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_tess_ctrl_program *tcp, struct brw_tcs_prog_key *key) { + struct gl_context *ctx = &brw->ctx; const struct brw_compiler *compiler = brw->intelScreen->compiler; struct brw_stage_state *stage_state = &brw->tcs.base; - nir_shader *nir = tcp->program.Base.nir; + nir_shader *nir; struct brw_tcs_prog_data prog_data; bool start_busy = false; double start_time = 0; + if (tcp) { + nir = tcp->program.Base.nir; + } else { + /* Create a dummy nir_shader. We won't actually use NIR code to + * generate assembly (it's easier to generate assembly directly), + * but the whole compiler assumes one of these exists. + */ + const nir_shader_compiler_options *options = + ctx->Const.ShaderCompilerOptions[MESA_SHADER_TESS_CTRL].NirOptions; + nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, options); + nir->num_uniforms = 2; /* both halves of the patch header */ + nir->info.outputs_written = key->outputs_written; + nir->info.inputs_read = key->outputs_written; + nir->info.tcs.vertices_out = key->input_vertices; + nir->info.name = ralloc_strdup(nir, "passthrough"); + } + memset(&prog_data, 0, sizeof(prog_data)); /* Allocate the references to the uniforms that will end up in the @@ -99,7 +117,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, * padding around uniform values below vec4 size, so the worst case is that * every uniform is a float which gets padded to the size of a vec4. */ - struct gl_shader *tcs = shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; + struct gl_shader *tcs = shader_prog ? + shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL] : NULL; int param_count = nir->num_uniforms; if (!compiler->scalar_stage[MESA_SHADER_TESS_CTRL]) param_count *= 4; @@ -108,15 +127,38 @@ brw_codegen_tcs_prog(struct brw_context *brw, rzalloc_array(NULL, const gl_constant_value *, param_count); prog_data.base.base.pull_param = rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.image_param = - rzalloc_array(NULL, struct brw_image_param, tcs->NumImages); prog_data.base.base.nr_params = param_count; - prog_data.base.base.nr_image_params = tcs->NumImages; - brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program.Base, - &prog_data.base.base, false); + if (tcs) { + prog_data.base.base.image_param = + rzalloc_array(NULL, struct brw_image_param, tcs->NumImages); + prog_data.base.base.nr_image_params = tcs->NumImages; + + brw_nir_setup_glsl_uniforms(nir, shader_prog, &tcp->program.Base, + &prog_data.base.base, false); + } else { + /* Upload the Patch URB Header as the first two uniforms. + * Do the annoying scrambling so the shader doesn't have to. + */ + const float **param = (const float **) prog_data.base.base.param; + static float zero = 0.0f; + for (int i = 0; i < 4; i++) { + param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i]; + } + + if (key->tes_primitive_mode == GL_QUADS) { + param[3] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; + param[2] = &ctx->TessCtrlProgram.patch_default_inner_level[1]; + param[1] = &zero; + param[0] = &zero; + } else if (key->tes_primitive_mode == GL_TRIANGLES) { + param[4] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; + for (int i = 0; i < 4; i++) + param[i] = &zero; + } + } - if (unlikely(INTEL_DEBUG & DEBUG_TCS)) + if (unlikely(INTEL_DEBUG & DEBUG_TCS) && tcs) brw_dump_ir("tessellation control", shader_prog, tcs, NULL); int st_index = -1; @@ -138,6 +180,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, if (shader_prog) { shader_prog->LinkStatus = false; ralloc_strcat(&shader_prog->InfoLog, error_str); + } else { + ralloc_free(nir); } _mesa_problem(NULL, "Failed to compile tessellation control shader: " @@ -172,6 +216,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, &prog_data, sizeof(prog_data), &stage_state->prog_offset, &brw->tcs.prog_data); ralloc_free(mem_ctx); + if (!tcs) + ralloc_free(nir); return true; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp index bd985598f65..4bcf99eaad1 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp @@ -38,14 +38,65 @@ vec4_tcs_visitor::vec4_tcs_visitor(const struct brw_compiler *compiler, struct brw_tcs_prog_data *prog_data, const nir_shader *nir, void *mem_ctx, - int shader_time_index) + int shader_time_index, + const struct brw_vue_map *input_vue_map) : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base, nir, mem_ctx, false, shader_time_index), - key(key) + input_vue_map(input_vue_map), key(key) { } +void +vec4_tcs_visitor::emit_nir_code() +{ + if (key->program_string_id != 0) { + /* We have a real application-supplied TCS, emit real code. */ + vec4_visitor::emit_nir_code(); + } else { + /* There is no TCS; automatically generate a passthrough shader + * that writes the API-specified default tessellation levels and + * copies VS outputs to TES inputs. + */ + uniforms = 2; + uniform_size[0] = 1; + uniform_size[1] = 1; + + uint64_t varyings = key->outputs_written; + + src_reg vertex_offset(this, glsl_type::uint_type); + emit(MUL(dst_reg(vertex_offset), invocation_id, + brw_imm_ud(prog_data->vue_map.num_per_vertex_slots))); + + while (varyings != 0) { + const int varying = ffsll(varyings) - 1; + + unsigned in_offset = input_vue_map->varying_to_slot[varying]; + unsigned out_offset = prog_data->vue_map.varying_to_slot[varying]; + assert(out_offset >= 2); + + dst_reg val(this, glsl_type::vec4_type); + emit_input_urb_read(val, invocation_id, in_offset, src_reg()); + emit_urb_write(src_reg(val), WRITEMASK_XYZW, out_offset, + vertex_offset); + + varyings &= ~BITFIELD64_BIT(varying); + } + + /* Only write the tessellation factors from invocation 0. + * There's no point in making other threads do redundant work. + */ + emit(CMP(dst_null_d(), invocation_id, brw_imm_ud(0), + BRW_CONDITIONAL_EQ)); + emit(IF(BRW_PREDICATE_NORMAL)); + emit_urb_write(src_reg(UNIFORM, 0, glsl_type::vec4_type), + WRITEMASK_XYZW, 0, src_reg()); + emit_urb_write(src_reg(UNIFORM, 1, glsl_type::vec4_type), + WRITEMASK_XYZW, 1, src_reg()); + emit(BRW_OPCODE_ENDIF); + } +} + void vec4_tcs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) { @@ -478,7 +529,7 @@ brw_compile_tcs(const struct brw_compiler *compiler, } vec4_tcs_visitor v(compiler, log_data, key, prog_data, - nir, mem_ctx, shader_time_index); + nir, mem_ctx, shader_time_index, &input_vue_map); if (!v.run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h index 2bf4885a560..2c6801b2ae3 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.h @@ -45,9 +45,11 @@ public: struct brw_tcs_prog_data *prog_data, const nir_shader *nir, void *mem_ctx, - int shader_time_index); + int shader_time_index, + const struct brw_vue_map *input_vue_map); protected: + virtual void emit_nir_code(); virtual dst_reg *make_reg_for_system_value(int location, const glsl_type *type); virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); @@ -74,6 +76,8 @@ protected: virtual void emit_urb_write_header(int mrf) {} virtual vec4_instruction *emit_urb_write_opcode(bool complete) { return NULL; } + const struct brw_vue_map *input_vue_map; + const struct brw_tcs_prog_key *key; src_reg invocation_id; }; -- 2.30.2