#include "brw_cfg.h"
#include "brw_fs.h"
#include "brw_nir.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
namespace brw {
void *mem_ctx,
bool no_spills,
int shader_time_index)
- : vec4_visitor(compiler, log_data, &c->key.tex,
+ : vec4_visitor(compiler, log_data, &c->key.base.tex,
&prog_data->base, shader, mem_ctx,
no_spills, shader_time_index),
c(c),
assert(c->control_data_bits_per_vertex == 2);
/* Must be a valid stream */
- assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+ assert(stream_id < MAX_VERTEX_STREAMS);
/* Control data bits are initialized to 0 so we don't have to set any
* bits when sending vertices to stream 0.
[GL_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
+} /* namespace brw */
+
extern "C" const unsigned *
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_gs_prog_key *key,
struct brw_gs_prog_data *prog_data,
- const nir_shader *src_shader,
+ nir_shader *nir,
struct gl_program *prog,
int shader_time_index,
- unsigned *final_assembly_size,
+ struct brw_compile_stats *stats,
char **error_str)
{
struct brw_gs_compile c;
c.key = *key;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_GEOMETRY];
- nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
/* The GLSL linker will have already matched up GS inputs and the outputs
* of prior stages. The driver does extend VS outputs in some cases, but
* For SSO pipelines, we use a fixed VUE map layout based on variable
* locations, so we can rely on rendezvous-by-location making this work.
*/
- GLbitfield64 inputs_read = shader->info.inputs_read;
+ GLbitfield64 inputs_read = nir->info.inputs_read;
brw_compute_vue_map(compiler->devinfo,
&c.input_vue_map, inputs_read,
- shader->info.separate_shader);
+ nir->info.separate_shader, 1);
- shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
- brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
- brw_nir_lower_vue_outputs(shader, is_scalar);
- shader = brw_postprocess_nir(shader, compiler, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
+ brw_nir_lower_vue_inputs(nir, &c.input_vue_map);
+ brw_nir_lower_vue_outputs(nir);
+ brw_postprocess_nir(nir, compiler, is_scalar);
prog_data->base.clip_distance_mask =
- ((1 << shader->info.clip_distance_array_size) - 1);
+ ((1 << nir->info.clip_distance_array_size) - 1);
prog_data->base.cull_distance_mask =
- ((1 << shader->info.cull_distance_array_size) - 1) <<
- shader->info.clip_distance_array_size;
+ ((1 << nir->info.cull_distance_array_size) - 1) <<
+ nir->info.clip_distance_array_size;
prog_data->include_primitive_id =
- (shader->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
+ (nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID)) != 0;
- prog_data->invocations = shader->info.gs.invocations;
+ prog_data->invocations = nir->info.gs.invocations;
if (compiler->devinfo->gen >= 8)
- prog_data->static_vertex_count = nir_gs_count_vertices(shader);
+ prog_data->static_vertex_count = nir_gs_count_vertices(nir);
if (compiler->devinfo->gen >= 7) {
- if (shader->info.gs.output_primitive == GL_POINTS) {
+ if (nir->info.gs.output_primitive == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
prog_data->control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
- /* We only have to emit control bits if we are using streams */
- if (prog && prog->info.gs.uses_streams)
+ /* We only have to emit control bits if we are using non-zero streams */
+ if (nir->info.gs.active_stream_mask != (1 << 0))
c.control_data_bits_per_vertex = 2;
else
c.control_data_bits_per_vertex = 0;
* EndPrimitive().
*/
c.control_data_bits_per_vertex =
- shader->info.gs.uses_end_primitive ? 1 : 0;
+ nir->info.gs.uses_end_primitive ? 1 : 0;
}
} else {
/* There are no control data bits in gen6. */
c.control_data_bits_per_vertex = 0;
}
c.control_data_header_size_bits =
- shader->info.gs.vertices_out * c.control_data_bits_per_vertex;
+ nir->info.gs.vertices_out * c.control_data_bits_per_vertex;
/* 1 HWORD = 32 bytes = 256 bits */
prog_data->control_data_header_size_hwords =
unsigned output_size_bytes;
if (compiler->devinfo->gen >= 7) {
output_size_bytes =
- prog_data->output_vertex_size_hwords * 32 * shader->info.gs.vertices_out;
+ prog_data->output_vertex_size_hwords * 32 * nir->info.gs.vertices_out;
output_size_bytes += 32 * prog_data->control_data_header_size_hwords;
} else {
output_size_bytes = prog_data->output_vertex_size_hwords * 32;
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6.
*/
- if (compiler->devinfo->gen >= 7)
+ if (compiler->devinfo->gen >= 7) {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
- else
+ /* On Cannonlake software shall not program an allocation size that
+ * specifies a size that is a multiple of 3 64B (512-bit) cachelines.
+ */
+ if (compiler->devinfo->gen == 10 &&
+ prog_data->base.urb_entry_size % 3 == 0)
+ prog_data->base.urb_entry_size++;
+ } else {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+ }
- assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
+ assert(nir->info.gs.output_primitive < ARRAY_SIZE(brw::gl_prim_to_hw_prim));
prog_data->output_topology =
- gl_prim_to_hw_prim[shader->info.gs.output_primitive];
+ brw::gl_prim_to_hw_prim[nir->info.gs.output_primitive];
- prog_data->vertices_in = shader->info.gs.vertices_in;
+ prog_data->vertices_in = nir->info.gs.vertices_in;
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
}
if (is_scalar) {
- fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, shader,
+ fs_visitor v(compiler, log_data, mem_ctx, &c, prog_data, nir,
shader_time_index);
if (v.run_gs()) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
- fs_generator g(compiler, log_data, mem_ctx, &c.key,
- &prog_data->base.base, v.promoted_constants,
- false, MESA_SHADER_GEOMETRY);
+ fs_generator g(compiler, log_data, mem_ctx,
+ &prog_data->base.base, false, MESA_SHADER_GEOMETRY);
if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
const char *label =
- shader->info.label ? shader->info.label : "unnamed";
+ nir->info.label ? nir->info.label : "unnamed";
char *name = ralloc_asprintf(mem_ctx, "%s geometry shader %s",
- label, shader->info.name);
+ label, nir->info.name);
g.enable_debug(name);
}
- g.generate_code(v.cfg, 8);
- return g.get_assembly(final_assembly_size);
+ g.generate_code(v.cfg, 8, v.shader_stats,
+ v.performance_analysis.require(), stats);
+ g.add_const_data(nir->constant_data, nir->constant_data_size);
+ return g.get_assembly();
}
}
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
+ brw::vec4_gs_visitor v(compiler, log_data, &c, prog_data, nir,
mem_ctx, true /* no_spills */, shader_time_index);
+
+ /* Backup 'nr_params' and 'param' as they can be modified by the
+ * the DUAL_OBJECT visitor. If it fails, we will run the fallback
+ * (DUAL_INSTANCED or SINGLE mode) and we need to restore original
+ * values.
+ */
+ const unsigned param_count = prog_data->base.base.nr_params;
+ uint32_t *param = ralloc_array(NULL, uint32_t, param_count);
+ memcpy(param, prog_data->base.base.param,
+ sizeof(uint32_t) * param_count);
+
if (v.run()) {
+ /* Success! Backup is not needed */
+ ralloc_free(param);
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
- shader, &prog_data->base, v.cfg,
- final_assembly_size);
+ nir, &prog_data->base,
+ v.cfg,
+ v.performance_analysis.require(),
+ stats);
+ } else {
+ /* These variables could be modified by the execution of the GS
+ * visitor if it packed the uniforms in the push constant buffer.
+ * As it failed, we need restore them so we can start again with
+ * DUAL_INSTANCED or SINGLE mode.
+ *
+ * FIXME: Could more variables be modified by this execution?
+ */
+ memcpy(prog_data->base.base.param, param,
+ sizeof(uint32_t) * param_count);
+ prog_data->base.base.nr_params = param_count;
+ prog_data->base.base.nr_pull_params = 0;
+ ralloc_free(param);
}
}
}
else
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
- vec4_gs_visitor *gs = NULL;
+ brw::vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
if (compiler->devinfo->gen >= 7)
- gs = new vec4_gs_visitor(compiler, log_data, &c, prog_data,
- shader, mem_ctx, false /* no_spills */,
+ gs = new brw::vec4_gs_visitor(compiler, log_data, &c, prog_data,
+ nir, mem_ctx, false /* no_spills */,
shader_time_index);
else
- gs = new gen6_gs_visitor(compiler, log_data, &c, prog_data, prog,
- shader, mem_ctx, false /* no_spills */,
+ gs = new brw::gen6_gs_visitor(compiler, log_data, &c, prog_data, prog,
+ nir, mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
+ ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
&prog_data->base, gs->cfg,
- final_assembly_size);
+ gs->performance_analysis.require(),
+ stats);
}
delete gs;
return ret;
}
-
-
-} /* namespace brw */