#include "brw_vec4_gs_visitor.h"
#include "gen6_gs_visitor.h"
+#include "brw_cfg.h"
#include "brw_fs.h"
#include "brw_nir.h"
#include "common/gen_debug.h"
}
-dst_reg *
-vec4_gs_visitor::make_reg_for_system_value(int location)
+static inline struct brw_reg
+attribute_to_hw_reg(int attr, brw_reg_type type, bool interleaved)
{
- dst_reg *reg = new(mem_ctx) dst_reg(this, glsl_type::int_type);
-
- switch (location) {
- case SYSTEM_VALUE_INVOCATION_ID:
- this->current_annotation = "initialize gl_InvocationID";
- if (gs_prog_data->invocations > 1)
- emit(GS_OPCODE_GET_INSTANCE_ID, *reg);
- else
- emit(MOV(*reg, brw_imm_ud(0)));
- break;
- default:
- unreachable("not reached");
+ struct brw_reg reg;
+
+ unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(type));
+ if (interleaved) {
+ reg = stride(brw_vecn_grf(width, attr / 2, (attr % 2) * 4), 0, width, 1);
+ } else {
+ reg = brw_vecn_grf(width, attr, 0);
}
+ reg.type = type;
return reg;
}
-
+/**
+ * Replace each register of type ATTR in this->instructions with a reference
+ * to a fixed HW register.
+ *
+ * If interleaved is true, then each attribute takes up half a register, with
+ * register N containing attribute 2*N in its first half and attribute 2*N+1
+ * in its second half (this corresponds to the payload setup used by geometry
+ * shaders in "single" or "dual instanced" dispatch mode). If interleaved is
+ * false, then each attribute takes up a whole register, with register N
+ * containing attribute N (this corresponds to the payload setup used by
+ * vertex shaders, and by geometry shaders in "dual object" dispatch mode).
+ */
int
-vec4_gs_visitor::setup_varying_inputs(int payload_reg, int *attribute_map,
+vec4_gs_visitor::setup_varying_inputs(int payload_reg,
int attributes_per_reg)
{
/* For geometry shaders there are N copies of the input attributes, where N
assert(num_input_vertices <= MAX_GS_INPUT_VERTICES);
unsigned input_array_stride = prog_data->urb_read_length * 2;
- for (int slot = 0; slot < c->input_vue_map.num_slots; slot++) {
- int varying = c->input_vue_map.slot_to_varying[slot];
- for (unsigned vertex = 0; vertex < num_input_vertices; vertex++) {
- attribute_map[BRW_VARYING_SLOT_COUNT * vertex + varying] =
- attributes_per_reg * payload_reg + input_array_stride * vertex +
- slot;
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != ATTR)
+ continue;
+
+ assert(inst->src[i].offset % REG_SIZE == 0);
+ int grf = payload_reg * attributes_per_reg +
+ inst->src[i].nr + inst->src[i].offset / REG_SIZE;
+
+ struct brw_reg reg =
+ attribute_to_hw_reg(grf, inst->src[i].type, attributes_per_reg > 1);
+ reg.swizzle = inst->src[i].swizzle;
+ if (inst->src[i].abs)
+ reg = brw_abs(reg);
+ if (inst->src[i].negate)
+ reg = negate(reg);
+
+ inst->src[i] = reg;
}
}
return payload_reg + regs_used;
}
-
void
vec4_gs_visitor::setup_payload()
{
- int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
-
/* If we are in dual instanced or single mode, then attributes are going
* to be interleaved, so one register contains two attribute slots.
*/
int attributes_per_reg =
prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
- /* If a geometry shader tries to read from an input that wasn't written by
- * the vertex shader, that produces undefined results, but it shouldn't
- * crash anything. So initialize attribute_map to zeros--that ensures that
- * these undefined results are read from r0.
- */
- memset(attribute_map, 0, sizeof(attribute_map));
-
int reg = 0;
/* The payload always contains important data in r0, which contains
/* If the shader uses gl_PrimitiveIDIn, that goes in r1. */
if (gs_prog_data->include_primitive_id)
- attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg++;
+ reg++;
reg = setup_uniforms(reg);
- reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
-
- lower_attributes_to_hw_regs(attribute_map, attributes_per_reg > 1);
+ reg = setup_varying_inputs(reg, attributes_per_reg);
this->first_non_payload_grf = reg;
}
assert(c->control_data_bits_per_vertex == 2);
/* Must be a valid stream */
- assert(stream_id >= 0 && stream_id < MAX_VERTEX_STREAMS);
+ assert(stream_id < MAX_VERTEX_STREAMS);
/* Control data bits are initialized to 0 so we don't have to set any
* bits when sending vertices to stream 0.
const nir_shader *src_shader,
struct gl_program *prog,
int shader_time_index,
- unsigned *final_assembly_size,
char **error_str)
{
struct brw_gs_compile c;
shader->info.separate_shader);
shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, is_scalar);
- brw_nir_lower_vue_inputs(shader, is_scalar, &c.input_vue_map);
+ brw_nir_lower_vue_inputs(shader, &c.input_vue_map);
brw_nir_lower_vue_outputs(shader, is_scalar);
shader = brw_postprocess_nir(shader, compiler, is_scalar);
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6.
*/
- if (compiler->devinfo->gen >= 7)
+ if (compiler->devinfo->gen >= 7) {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
- else
+ /* On Cannonlake software shall not program an allocation size that
+ * specifies a size that is a multiple of 3 64B (512-bit) cachelines.
+ */
+ if (compiler->devinfo->gen == 10 &&
+ prog_data->base.urb_entry_size % 3 == 0)
+ prog_data->base.urb_entry_size++;
+ } else {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
+ }
assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
prog_data->output_topology =
g.enable_debug(name);
}
g.generate_code(v.cfg, 8);
- return g.get_assembly(final_assembly_size);
+ return g.get_assembly();
}
}
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
mem_ctx, true /* no_spills */, shader_time_index);
+
+ /* Backup 'nr_params' and 'param' as they can be modified by the
+ * the DUAL_OBJECT visitor. If it fails, we will run the fallback
+ * (DUAL_INSTANCED or SINGLE mode) and we need to restore original
+ * values.
+ */
+ const unsigned param_count = prog_data->base.base.nr_params;
+ uint32_t *param = ralloc_array(NULL, uint32_t, param_count);
+ memcpy(param, prog_data->base.base.param,
+ sizeof(uint32_t) * param_count);
+
if (v.run()) {
+ /* Success! Backup is not needed */
+ ralloc_free(param);
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
- shader, &prog_data->base, v.cfg,
- final_assembly_size);
+ shader, &prog_data->base, v.cfg);
+ } else {
+ /* These variables could be modified by the execution of the GS
+ * visitor if it packed the uniforms in the push constant buffer.
+ * As it failed, we need restore them so we can start again with
+ * DUAL_INSTANCED or SINGLE mode.
+ *
+ * FIXME: Could more variables be modified by this execution?
+ */
+ memcpy(prog_data->base.base.param, param,
+ sizeof(uint32_t) * param_count);
+ prog_data->base.base.nr_params = param_count;
+ prog_data->base.base.nr_pull_params = 0;
+ ralloc_free(param);
}
}
}
*error_str = ralloc_strdup(mem_ctx, gs->fail_msg);
} else {
ret = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, shader,
- &prog_data->base, gs->cfg,
- final_assembly_size);
+ &prog_data->base, gs->cfg);
}
delete gs;