struct brw_gs_compile
{
struct brw_gs_prog_key key;
- struct brw_gs_prog_data prog_data;
struct brw_vue_map input_vue_map;
unsigned control_data_bits_per_vertex;
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const struct nir_shader *shader,
struct gl_shader_program *shader_prog,
int shader_time_index,
{
struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct brw_stage_state *stage_state = &brw->gs.base;
+ struct brw_gs_prog_data prog_data;
struct brw_gs_compile c;
+ memset(&prog_data, 0, sizeof(prog_data));
memset(&c, 0, sizeof(c));
c.key = *key;
- c.prog_data.include_primitive_id =
+ prog_data.include_primitive_id =
(gp->program.Base.InputsRead & VARYING_BIT_PRIMITIVE_ID) != 0;
- c.prog_data.invocations = gp->program.Invocations;
+ prog_data.invocations = gp->program.Invocations;
assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
- &gp->program.Base, &c.prog_data);
+ &gp->program.Base, &prog_data);
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
int param_count = gp->program.Base.nir->num_uniforms * 4;
- c.prog_data.base.base.param =
+ prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
- c.prog_data.base.base.pull_param =
+ prog_data.base.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
- c.prog_data.base.base.image_param =
+ prog_data.base.base.image_param =
rzalloc_array(NULL, struct brw_image_param, gs->NumImages);
- c.prog_data.base.base.nr_params = param_count;
- c.prog_data.base.base.nr_image_params = gs->NumImages;
+ prog_data.base.base.nr_params = param_count;
+ prog_data.base.base.nr_image_params = gs->NumImages;
brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
- &c.prog_data.base.base, false);
+ &prog_data.base.base, false);
if (brw->gen >= 8) {
- c.prog_data.static_vertex_count =
+ prog_data.static_vertex_count =
nir_gs_count_vertices(gp->program.Base.nir);
}
* to multiple streams, and EndPrimitive() has no effect. So we
* configure the hardware to interpret the control data as stream ID.
*/
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+ prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
/* We only have to emit control bits if we are using streams */
if (prog->Geom.UsesStreams)
* streams is not supported. So we configure the hardware to interpret
* the control data as EndPrimitive information (a.k.a. "cut bits").
*/
- c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+ prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
/* We only need to output control data if the shader actually calls
* EndPrimitive().
/* If it is using transform feedback, enable it */
if (prog->TransformFeedback.NumVarying)
- c.prog_data.gen6_xfb_enabled = true;
+ prog_data.gen6_xfb_enabled = true;
else
- c.prog_data.gen6_xfb_enabled = false;
+ prog_data.gen6_xfb_enabled = false;
}
c.control_data_header_size_bits =
gp->program.VerticesOut * c.control_data_bits_per_vertex;
/* 1 HWORD = 32 bytes = 256 bits */
- c.prog_data.control_data_header_size_hwords =
+ prog_data.control_data_header_size_hwords =
ALIGN(c.control_data_header_size_bits, 256) / 256;
GLbitfield64 outputs_written = gp->program.Base.OutputsWritten;
brw_compute_vue_map(brw->intelScreen->devinfo,
- &c.prog_data.base.vue_map, outputs_written,
+ &prog_data.base.vue_map, outputs_written,
prog ? prog->SeparateShader : false);
/* Compute the output vertex size.
* per interpolation type, so this is plenty.
*
*/
- unsigned output_vertex_size_bytes = c.prog_data.base.vue_map.num_slots * 16;
+ unsigned output_vertex_size_bytes = prog_data.base.vue_map.num_slots * 16;
assert(brw->gen == 6 ||
output_vertex_size_bytes <= GEN7_MAX_GS_OUTPUT_VERTEX_SIZE_BYTES);
- c.prog_data.output_vertex_size_hwords =
+ prog_data.output_vertex_size_hwords =
ALIGN(output_vertex_size_bytes, 32) / 32;
/* Compute URB entry size. The maximum allowed URB entry size is 32k.
unsigned output_size_bytes;
if (brw->gen >= 7) {
output_size_bytes =
- c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
- output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
+ prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+ output_size_bytes += 32 * prog_data.control_data_header_size_hwords;
} else {
- output_size_bytes = c.prog_data.output_vertex_size_hwords * 32;
+ output_size_bytes = prog_data.output_vertex_size_hwords * 32;
}
/* Broadwell stores "Vertex Count" as a full 8 DWord (32 byte) URB output,
* a multiple of 128 bytes in gen6.
*/
if (brw->gen >= 7)
- c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+ prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
else
- c.prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+ prog_data.base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
- c.prog_data.output_topology =
+ prog_data.output_topology =
get_hw_prim_for_gl_prim(gp->program.OutputType);
/* The GLSL linker will have already matched up GS inputs and the outputs
/* GS inputs are read from the VUE 256 bits (2 vec4's) at a time, so we
* need to program a URB read length of ceiling(num_slots / 2).
*/
- c.prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
+ prog_data.base.urb_read_length = (c.input_vue_map.num_slots + 1) / 2;
if (unlikely(INTEL_DEBUG & DEBUG_GS))
brw_dump_ir("geometry", prog, gs, NULL);
char *error_str;
const unsigned *program =
brw_compile_gs(brw->intelScreen->compiler, brw, mem_ctx, &c,
- shader->Program->nir, prog,
+ &prog_data, shader->Program->nir, prog,
st_index, &program_size, &error_str);
if (program == NULL) {
ralloc_free(mem_ctx);
}
/* Scratch space is used for register spilling */
- if (c.prog_data.base.base.total_scratch) {
+ if (prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- c.prog_data.base.base.total_scratch *
+ prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
&c.key, sizeof(c.key),
program, program_size,
- &c.prog_data, sizeof(c.prog_data),
+ &prog_data, sizeof(prog_data),
&stage_state->prog_offset, &brw->gs.prog_data);
ralloc_free(mem_ctx);
break;
case nir_intrinsic_load_primitive_id:
- assert(c->prog_data.include_primitive_id);
+ assert(gs_prog_data->include_primitive_id);
dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
break;
vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
: vec4_visitor(compiler, log_data, &c->key.tex,
- &c->prog_data.base, shader, mem_ctx,
+ &prog_data->base, shader, mem_ctx,
no_spills, shader_time_index),
- c(c)
+ c(c),
+ gs_prog_data(prog_data)
{
}
*/
const unsigned num_input_vertices = nir->info.gs.vertices_in;
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
- unsigned input_array_stride = c->prog_data.base.urb_read_length * 2;
+ unsigned input_array_stride = prog_data->urb_read_length * 2;
for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
int varying = c->input_vue_map.slot_to_varying[slot];
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
- c->prog_data.base.dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
+ prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
/* If a geometry shader tries to read from an input that wasn't written by
* the vertex shader, that produces undefined results, but it shouldn't
reg++;
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
- if (c->prog_data.include_primitive_id)
+ if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
reg = setup_uniforms(reg);
*/
int base_mrf = 1;
- bool static_vertex_count = c->prog_data.static_vertex_count != -1;
+ bool static_vertex_count = gs_prog_data->static_vertex_count != -1;
/* If the previous instruction was a URB write, we don't need to issue
* a second one - we can just set the EOT bit on the previous write.
vec4_instruction *inst = emit(MOV(mrf_reg, r0));
inst->force_writemask_all = true;
emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, this->vertex_count,
- (uint32_t) c->prog_data.output_vertex_size_hwords);
+ (uint32_t) gs_prog_data->output_vertex_size_hwords);
}
(void) complete;
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
- inst->offset = c->prog_data.control_data_header_size_hwords;
+ inst->offset = gs_prog_data->control_data_header_size_hwords;
/* We need to increment Global Offset by 1 to make room for Broadwell's
* extra "Vertex Count" payload at the beginning of the URB entry.
*/
- if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
+ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset++;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
* URB entry. Since this is an OWord message, Global Offset is counted
* in 128-bit units, so we must set it to 2.
*/
- if (devinfo->gen >= 8 && c->prog_data.static_vertex_count == -1)
+ if (devinfo->gen >= 8 && gs_prog_data->static_vertex_count == -1)
inst->offset = 2;
inst->base_mrf = base_mrf;
inst->mlen = 2;
* do for GL_POINTS outputs that don't use streams).
*/
if (c->control_data_header_size_bits > 0 &&
- c->prog_data.control_data_format ==
+ gs_prog_data->control_data_format ==
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
this->current_annotation = "emit vertex: Stream control data bits";
set_stream_control_data_bits(stream_id);
* consists of cut bits. Fortunately, the only time it isn't is when the
* output type is points, in which case EndPrimitive() is a no-op.
*/
- if (c->prog_data.control_data_format !=
+ if (gs_prog_data->control_data_format !=
GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT) {
return;
}
brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
struct gl_shader_program *shader_prog,
int shader_time_index,
* so without spilling. If the GS invocations count > 1, then we can't use
* dual object mode.
*/
- if (c->prog_data.invocations <= 1 &&
+ if (prog_data->invocations <= 1 &&
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
- vec4_gs_visitor v(compiler, log_data, c, shader,
+ vec4_gs_visitor v(compiler, log_data, c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
if (v.run()) {
- vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
return g.generate_assembly(v.cfg, final_assembly_size, shader);
}
* mode is more performant when invocations > 1. Gen6 only supports
* SINGLE mode.
*/
- if (c->prog_data.invocations <= 1 || compiler->devinfo->gen < 7)
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
+ if (prog_data->invocations <= 1 || compiler->devinfo->gen < 7)
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X1_SINGLE;
else
- c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_INSTANCE;
vec4_gs_visitor *gs = NULL;
const unsigned *ret = NULL;
if (compiler->devinfo->gen >= 7)
- gs = new vec4_gs_visitor(compiler, log_data, c, shader,
- mem_ctx, false /* no_spills */,
+ gs = new vec4_gs_visitor(compiler, log_data, c, prog_data,
+ shader, mem_ctx, false /* no_spills */,
shader_time_index);
else
- gs = new gen6_gs_visitor(compiler, log_data, c, shader_prog, shader,
- mem_ctx, false /* no_spills */,
+ gs = new gen6_gs_visitor(compiler, log_data, c, prog_data, shader_prog,
+ shader, mem_ctx, false /* no_spills */,
shader_time_index);
if (!gs->run()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
- vec4_generator g(compiler, log_data, &c->prog_data.base, mem_ctx,
+ vec4_generator g(compiler, log_data, &prog_data->base, mem_ctx,
INTEL_DEBUG & DEBUG_GS, "geometry", "GS");
ret = g.generate_assembly(gs->cfg, final_assembly_size, shader);
}
vec4_gs_visitor(const struct brw_compiler *compiler,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
src_reg vertex_count;
src_reg control_data_bits;
const struct brw_gs_compile * const c;
+ struct brw_gs_prog_data * const gs_prog_data;
};
} /* namespace brw */
this->prim_count = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->prim_count), 0u));
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
/* Create a virtual register to hold destination indices in SOL */
this->destination_indices = src_reg(this, glsl_type::uvec4_type);
/* Create a virtual register to hold number of written primitives */
* in the 3DSTATE_GS state packet. That information can be obtained by other
* means though, so we can safely use r1 for this purpose.
*/
- if (c->prog_data.include_primitive_id) {
+ if (gs_prog_data->include_primitive_id) {
this->primitive_id =
src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
* vertex.
*/
emit(OR(dst, this->first_vertex,
- (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+ (gs_prog_data->output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
emit(MOV(dst_reg(this->first_vertex), 0u));
}
emit(ADD(dst_reg(this->vertex_output_offset),
this->current_annotation = "gen6 thread end: ff_sync";
vec4_instruction *inst;
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
src_reg sol_temp(this, glsl_type::uvec4_type);
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
dst_reg(this->svbi),
}
emit(BRW_OPCODE_WHILE);
- if (c->prog_data.gen6_xfb_enabled)
+ if (gs_prog_data->gen6_xfb_enabled)
xfb_write();
}
emit(BRW_OPCODE_ENDIF);
*/
this->current_annotation = "gen6 thread end: EOT";
- if (c->prog_data.gen6_xfb_enabled) {
+ if (gs_prog_data->gen6_xfb_enabled) {
/* When emitting EOT, set SONumPrimsWritten Increment Value. */
src_reg data(this, glsl_type::uint_type);
emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
* information (and move the original value to a virtual register if
* necessary).
*/
- if (c->prog_data.include_primitive_id)
+ if (gs_prog_data->include_primitive_id)
attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
reg++;
BRW_SWIZZLE4(3, 3, 3, 3)
};
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
-
const struct gl_transform_feedback_info *linked_xfb_info =
&this->shader_prog->LinkedTransformFeedback;
int i;
*/
assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
- prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
- for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
- prog_data->transform_feedback_bindings[i] =
+ gs_prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+ for (i = 0; i < gs_prog_data->num_transform_feedback_bindings; i++) {
+ gs_prog_data->transform_feedback_bindings[i] =
linked_xfb_info->Outputs[i].OutputRegister;
- prog_data->transform_feedback_swizzles[i] =
+ gs_prog_data->transform_feedback_swizzles[i] =
swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
}
}
gen6_gs_visitor::xfb_write()
{
unsigned num_verts;
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
- if (!prog_data->num_transform_feedback_bindings)
+ if (!gs_prog_data->num_transform_feedback_bindings)
return;
- switch (c->prog_data.output_topology) {
+ switch (gs_prog_data->output_topology) {
case _3DPRIM_POINTLIST:
num_verts = 1;
break;
void
gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
{
- struct brw_gs_prog_data *prog_data =
- (struct brw_gs_prog_data *) &c->prog_data;
unsigned binding;
- unsigned num_bindings = prog_data->num_transform_feedback_bindings;
+ unsigned num_bindings = gs_prog_data->num_transform_feedback_bindings;
src_reg sol_temp(this, glsl_type::uvec4_type);
/* Check for buffer overflow: we need room to write the complete primitive
*/
for (binding = 0; binding < num_bindings; ++binding) {
unsigned char varying =
- prog_data->transform_feedback_bindings[binding];
+ gs_prog_data->transform_feedback_bindings[binding];
/* Set up the correct destination index for this vertex */
vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
else if (varying == VARYING_SLOT_VIEWPORT)
data.swizzle = BRW_SWIZZLE_ZZZZ;
else
- data.swizzle = prog_data->transform_feedback_swizzles[binding];
+ data.swizzle = gs_prog_data->transform_feedback_swizzles[binding];
/* Write data */
inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
gen6_gs_visitor(const struct brw_compiler *comp,
void *log_data,
struct brw_gs_compile *c,
+ struct brw_gs_prog_data *prog_data,
struct gl_shader_program *prog,
const nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
- vec4_gs_visitor(comp, log_data, c, shader, mem_ctx, no_spills,
+ vec4_gs_visitor(comp, log_data, c, prog_data, shader, mem_ctx, no_spills,
shader_time_index),
shader_prog(prog)
{