X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_vec4_gs_visitor.cpp;h=05cbab26620669494d23d212e0c586f55e27b51e;hb=e5f735a986576a7634dfb8fed23c969bf4260f45;hp=ca59927cd3ab75de88104f8a0f6aa1b500f76a7e;hpb=95c917668ca887432b8a7a299555c6c2ca449e04;p=mesa.git diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index ca59927cd3a..05cbab26620 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -32,7 +32,7 @@ #include "brw_cfg.h" #include "brw_fs.h" #include "brw_nir.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" namespace brw { @@ -44,7 +44,7 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, void *mem_ctx, bool no_spills, int shader_time_index) - : vec4_visitor(compiler, log_data, &c->key.tex, + : vec4_visitor(compiler, log_data, &c->key.base.tex, &prog_data->base, shader, mem_ctx, no_spills, shader_time_index), c(c), @@ -610,15 +610,17 @@ static const GLuint gl_prim_to_hw_prim[GL_TRIANGLE_STRIP_ADJACENCY+1] = { [GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, }; +} /* namespace brw */ + extern "C" const unsigned * brw_compile_gs(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, const struct brw_gs_prog_key *key, struct brw_gs_prog_data *prog_data, - const nir_shader *src_shader, + nir_shader *nir, struct gl_program *prog, int shader_time_index, - unsigned *final_assembly_size, + struct brw_compile_stats *stats, char **error_str) { struct brw_gs_compile c; @@ -626,7 +628,6 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, c.key = *key; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY]; - nir_shader *shader = nir_shader_clone(mem_ctx, src_shader); /* The GLSL linker will have already matched up GS inputs and the outputs * of prior stages. The driver does extend VS outputs in some cases, but @@ -636,40 +637,40 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * For SSO pipelines, we use a fixed VUE map layout based on variable * locations, so we can rely on rendezvous-by-location making this work. */ - GLbitfield64 inputs_read = shader->info.inputs_read; + GLbitfield64 inputs_read = nir->info.inputs_read; brw_compute_vue_map(compiler->devinfo, &c.input_vue_map, inputs_read, - shader->info.separate_shader); + nir->info.separate_shader, 1); - shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar); - brw_nir_lower_vue_inputs(shader, &c.input_vue_map); - brw_nir_lower_vue_outputs(shader, is_scalar); - shader = brw_postprocess_nir(shader, compiler, is_scalar); + brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); + brw_nir_lower_vue_inputs(nir, &c.input_vue_map); + brw_nir_lower_vue_outputs(nir); + brw_postprocess_nir(nir, compiler, is_scalar); prog_data->base.clip_distance_mask = - ((1 << shader->info.clip_distance_array_size) - 1); + ((1 << nir->info.clip_distance_array_size) - 1); prog_data->base.cull_distance_mask = - ((1 << shader->info.cull_distance_array_size) - 1) << - shader->info.clip_distance_array_size; + ((1 << nir->info.cull_distance_array_size) - 1) << + nir->info.clip_distance_array_size; prog_data->include_primitive_id = - (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0; + (nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0; - prog_data->invocations = shader->info.gs.invocations; + prog_data->invocations = nir->info.gs.invocations; if (compiler->devinfo->gen >= 8) - prog_data->static_vertex_count = nir_gs_count_vertices(shader); + prog_data->static_vertex_count = nir_gs_count_vertices(nir); if (compiler->devinfo->gen >= 7) { - if (shader->info.gs.output_primitive == GL_POINTS) { + if (nir->info.gs.output_primitive == GL_POINTS) { /* When the output type is points, the geometry shader may output data * to multiple streams, and EndPrimitive() has no effect. So we * configure the hardware to interpret the control data as stream ID. */ prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID; - /* We only have to emit control bits if we are using streams */ - if (prog && prog->info.gs.uses_streams) + /* We only have to emit control bits if we are using non-zero streams */ + if (nir->info.gs.active_stream_mask != (1 << 0)) c.control_data_bits_per_vertex = 2; else c.control_data_bits_per_vertex = 0; @@ -686,14 +687,14 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * EndPrimitive(). */ c.control_data_bits_per_vertex = - shader->info.gs.uses_end_primitive ? 1 : 0; + nir->info.gs.uses_end_primitive ? 1 : 0; } } else { /* There are no control data bits in gen6. */ c.control_data_bits_per_vertex = 0; } c.control_data_header_size_bits = - shader->info.gs.vertices_out * c.control_data_bits_per_vertex; + nir->info.gs.vertices_out * c.control_data_bits_per_vertex; /* 1 HWORD = 32 bytes = 256 bits */ prog_data->control_data_header_size_hwords = @@ -788,7 +789,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, unsigned output_size_bytes; if (compiler->devinfo->gen >= 7) { output_size_bytes = - prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out; + prog_data->output_vertex_size_hwords * 32 * nir->info.gs.vertices_out; output_size_bytes += 32 * prog_data->control_data_header_size_hwords; } else { output_size_bytes = prog_data->output_vertex_size_hwords * 32; @@ -829,11 +830,11 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; } - assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim)); + assert(nir->info.gs.output_primitive < ARRAY_SIZE(brw::gl_prim_to_hw_prim)); prog_data->output_topology = - gl_prim_to_hw_prim[shader->info.gs.output_primitive]; + brw::gl_prim_to_hw_prim[nir->info.gs.output_primitive]; - prog_data->vertices_in = shader->info.gs.vertices_in; + prog_data->vertices_in = nir->info.gs.vertices_in; /* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we * need to program a URB read length of ceiling(num_slots / 2). @@ -851,24 +852,25 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, } if (is_scalar) { - fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader, + fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, nir, shader_time_index); if (v.run_gs()) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; - fs_generator g(compiler, log_data, mem_ctx, &c.key, - &prog_data->base.base, v.promoted_constants, - false, MESA_SHADER_GEOMETRY); + fs_generator g(compiler, log_data, mem_ctx, + &prog_data->base.base, false, MESA_SHADER_GEOMETRY); if (unlikely(INTEL_DEBUG & DEBUG_GS)) { const char *label = - shader->info.label ? shader->info.label : "unnamed"; + nir->info.label ? nir->info.label : "unnamed"; char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s", - label, shader->info.name); + label, nir->info.name); g.enable_debug(name); } - g.generate_code(v.cfg, 8); - return g.get_assembly(final_assembly_size); + g.generate_code(v.cfg, 8, v.shader_stats, + v.performance_analysis.require(), stats); + g.add_const_data(nir->constant_data, nir->constant_data_size); + return g.get_assembly(); } } @@ -881,7 +883,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) { prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; - vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader, + brw::vec4_gs_visitor v(compiler, log_data, &c, prog_data, nir, mem_ctx, true /* no_spills */, shader_time_index); /* Backup 'nr_params' and 'param' as they can be modified by the @@ -890,17 +892,18 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * values. */ const unsigned param_count = prog_data->base.base.nr_params; - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - param_count); + uint32_t *param = ralloc_array(NULL, uint32_t, param_count); memcpy(param, prog_data->base.base.param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); if (v.run()) { /* Success! Backup is not needed */ ralloc_free(param); return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, - shader, &prog_data->base, v.cfg, - final_assembly_size); + nir, &prog_data->base, + v.cfg, + v.performance_analysis.require(), + stats); } else { /* These variables could be modified by the execution of the GS * visitor if it packed the uniforms in the push constant buffer. @@ -910,8 +913,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * FIXME: Could more variables be modified by this execution? */ memcpy(prog_data->base.base.param, param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); prog_data->base.base.nr_params = param_count; + prog_data->base.base.nr_pull_params = 0; ralloc_free(param); } } @@ -945,30 +949,28 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, else prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE; - vec4_gs_visitor *gs = NULL; + brw::vec4_gs_visitor *gs = NULL; const unsigned *ret = NULL; if (compiler->devinfo->gen >= 7) - gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data, - shader, mem_ctx, false /* no_spills */, + gs = new brw::vec4_gs_visitor(compiler, log_data, &c, prog_data, + nir, mem_ctx, false /* no_spills */, shader_time_index); else - gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, prog, - shader, mem_ctx, false /* no_spills */, + gs = new brw::gen6_gs_visitor(compiler, log_data, &c, prog_data, prog, + nir, mem_ctx, false /* no_spills */, shader_time_index); if (!gs->run()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, gs->fail_msg); } else { - ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader, + ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, &prog_data->base, gs->cfg, - final_assembly_size); + gs->performance_analysis.require(), + stats); } delete gs; return ret; } - - -} /* namespace brw */