} /* end of anonymous namespace */
+static void
+nir_remap_attributes(nir_shader *shader)
+{
+ nir_foreach_variable(var, &shader->inputs) {
+ var->data.location += _mesa_bitcount_64(shader->info->double_inputs_read &
+ BITFIELD64_MASK(var->data.location));
+ }
+
+ /* Once the remap is done, reset double_inputs_read, so later it will have
+ * which location/slots are doubles */
+ shader->info->double_inputs_read = 0;
+}
+
nir_shader *
glsl_to_nir(const struct gl_shader_program *shader_prog,
gl_shader_stage stage,
nir_lower_constant_initializers(shader, (nir_variable_mode)~0);
+ /* Remap the locations to slots so those requiring two slots will occupy
+ * two locations. For instance, if we have in the IR code a dvec3 attr0 in
+ * location 0 and vec4 attr1 in location 1, in NIR attr0 will use
+ * locations/slots 0 and 1, and attr1 will use location/slot 2 */
+ if (shader->stage == MESA_SHADER_VERTEX)
+ nir_remap_attributes(shader);
+
shader->info->name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);
if (shader_prog->Label)
shader->info->label = ralloc_strdup(shader, shader_prog->Label);
var->data.compact = ir->type->without_array()->is_scalar();
}
}
+
+ /* Mark all the locations that require two slots */
+ if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
+ for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) {
+ uint64_t bitfield = BITFIELD64_BIT(var->data.location + i);
+ shader->info->double_inputs_read |= bitfield;
+ }
+ }
break;
case ir_var_shader_out:
else
shader->info->inputs_read |= bitfield;
- /* double inputs read is only for vertex inputs */
- if (shader->stage == MESA_SHADER_VERTEX &&
- glsl_type_is_dual_slot(glsl_without_array(var->type)))
- shader->info->double_inputs_read |= bitfield;
-
if (shader->stage == MESA_SHADER_FRAGMENT) {
shader->info->fs.uses_sample_qualifier |= var->data.sample;
}
mark_whole_variable(nir_shader *shader, nir_variable *var)
{
const struct glsl_type *type = var->type;
- bool is_vertex_input = false;
if (nir_is_per_vertex_io(var, shader->stage)) {
assert(glsl_type_is_array(type));
type = glsl_get_array_element(type);
}
- if (shader->stage == MESA_SHADER_VERTEX &&
- var->data.mode == nir_var_shader_in)
- is_vertex_input = true;
-
const unsigned slots =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
- : glsl_count_attribute_slots(type, is_vertex_input);
+ : glsl_count_attribute_slots(type, false);
set_io_mask(shader, var, 0, slots);
}
static unsigned
-get_io_offset(nir_deref_var *deref, bool is_vertex_input)
+get_io_offset(nir_deref_var *deref)
{
unsigned offset = 0;
return -1;
}
- offset += glsl_count_attribute_slots(tail->type, is_vertex_input) *
+ offset += glsl_count_attribute_slots(tail->type, false) *
deref_array->base_offset;
}
/* TODO: we can get the offset for structs here see nir_lower_io() */
return false;
}
- bool is_vertex_input = false;
- if (shader->stage == MESA_SHADER_VERTEX &&
- var->data.mode == nir_var_shader_in)
- is_vertex_input = true;
-
- unsigned offset = get_io_offset(deref, is_vertex_input);
+ unsigned offset = get_io_offset(deref);
if (offset == -1)
return false;
}
/* double element width for double types that takes two slots */
- if (!is_vertex_input &&
- glsl_type_is_dual_slot(glsl_without_array(type))) {
+ if (glsl_type_is_dual_slot(glsl_without_array(type))) {
elem_width *= 2;
}
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset:
case nir_intrinsic_load_var:
- case nir_intrinsic_store_var:
- if (instr->variables[0]->var->data.mode == nir_var_shader_in ||
- instr->variables[0]->var->data.mode == nir_var_shader_out) {
+ case nir_intrinsic_store_var: {
+ nir_variable *var = instr->variables[0]->var;
+
+ if (var->data.mode == nir_var_shader_in ||
+ var->data.mode == nir_var_shader_out) {
if (!try_mask_partial_io(shader, instr->variables[0]))
- mark_whole_variable(shader, instr->variables[0]->var);
+ mark_whole_variable(shader, var);
+
+ /* We need to track which input_reads bits correspond to a
+ * dvec3/dvec4 input attribute */
+ if (shader->stage == MESA_SHADER_VERTEX &&
+ var->data.mode == nir_var_shader_in &&
+ glsl_type_is_dual_slot(glsl_without_array(var->type))) {
+ for (uint i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
+ int idx = var->data.location + i;
+ shader->info->double_inputs_read |= BITFIELD64_BIT(idx);
+ }
+ }
}
break;
+ }
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_front_face:
default: unreachable("Invalid component");
}
+ /*
+ * Take in account hardware restrictions when dealing with 64-bit floats.
+ *
+ * From Broadwell spec, command reference structures, page 586:
+ * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
+ * 64-bit components are stored * in the URB without any conversion. In
+ * this case, vertex elements must be written as 128 or 256 bits, with
+ * VFCOMP_STORE_0 being used to pad the output as required. E.g., if
+ * R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
+ * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
+ * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
+ * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
+ * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
+ * Component 3 to be specified as VFCOMP_STORE_0 in order to output a
+ * 256-bit vertex element."
+ */
if (bits) {
return VFCOMP_STORE_SRC;
- } else if (comp < 3) {
+ } else if (comp >= 2 &&
+ !isl_format_layouts[format].channels.b.bits &&
+ isl_format_layouts[format].channels.r.type == ISL_RAW) {
+ /* When emitting 64-bit attributes, we need to write either 128 or 256
+ * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
+ * VFCOMP_STORE_0 to pad the written chunk */
+ return VFCOMP_NOSTORE;
+ } else if (comp < 3 ||
+ isl_format_layouts[format].channels.r.type == ISL_RAW) {
+ /* Note we need to pad with value 0, not 1, due hardware restrictions
+ * (see comment above) */
return VFCOMP_STORE_0;
} else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
isl_format_layouts[format].channels.r.type == ISL_SINT) {
/* Pull inputs_read out of the VS prog data */
const uint64_t inputs_read = vs_prog_data->inputs_read;
+ const uint64_t double_inputs_read = vs_prog_data->double_inputs_read;
assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
+ const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
#if GEN_GEN >= 8
/* On BDW+, we only need to allocate space for base ids. Setting up
vs_prog_data->uses_baseinstance;
#endif
- uint32_t elem_count = __builtin_popcount(elements) + needs_svgs_elem;
- if (elem_count == 0)
+ uint32_t elem_count = __builtin_popcount(elements) -
+ __builtin_popcount(elements_double) / 2;
+
+ uint32_t total_elems = elem_count + needs_svgs_elem;
+ if (total_elems == 0)
return;
uint32_t *p;
- const uint32_t num_dwords = 1 + elem_count * 2;
+ const uint32_t num_dwords = 1 + total_elems * 2;
p = anv_batch_emitn(&pipeline->batch, num_dwords,
GENX(3DSTATE_VERTEX_ELEMENTS));
memset(p + 1, 0, (num_dwords - 1) * 4);
if ((elements & (1 << desc->location)) == 0)
continue; /* Binding unused */
- uint32_t slot = __builtin_popcount(elements & ((1 << desc->location) - 1));
+ uint32_t slot =
+ __builtin_popcount(elements & ((1 << desc->location) - 1)) -
+ DIV_ROUND_UP(__builtin_popcount(elements_double &
+ ((1 << desc->location) -1)), 2);
struct GENX(VERTEX_ELEMENT_STATE) element = {
.VertexBufferIndex = desc->binding,
#endif
}
- const uint32_t id_slot = __builtin_popcount(elements);
+ const uint32_t id_slot = elem_count;
if (needs_svgs_elem) {
/* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
* "Within a VERTEX_ELEMENT_STATE structure, if a Component
/* Accumulate the list of enabled arrays. */
brw->vb.nr_enabled = 0;
while (vs_inputs) {
- GLuint index = ffsll(vs_inputs) - 1;
+ GLuint first = ffsll(vs_inputs) - 1;
+ GLuint index =
+ first - DIV_ROUND_UP(_mesa_bitcount_64(vs_prog_data->double_inputs_read &
+ BITFIELD64_MASK(first)), 2);
struct brw_vertex_element *input = &brw->vb.inputs[index];
input->is_dual_slot = brw->gen >= 8 &&
- (vs_prog_data->double_inputs_read & BITFIELD64_BIT(index)) != 0;
- vs_inputs &= ~BITFIELD64_BIT(index);
+ (vs_prog_data->double_inputs_read & BITFIELD64_BIT(first)) != 0;
+ vs_inputs &= ~BITFIELD64_BIT(first);
+ if (input->is_dual_slot)
+ vs_inputs &= ~BITFIELD64_BIT(first + 1);
brw->vb.enabled[brw->vb.nr_enabled++] = input;
}
return 0;
}
-/* Attribute arrays are loaded as one vec4 per element (or matrix column),
- * except for double-precision types, which are loaded as one dvec4.
- */
-extern "C" int
-type_size_vs_input(const struct glsl_type *type)
-{
- if (type->is_double()) {
- return type_size_dvec4(type);
- } else {
- return type_size_vec4(type);
- }
-}
-
/**
* Create a MOV to read the timestamp register.
*
fs_visitor::emit_vs_system_value(int location)
{
fs_reg *reg = new(this->mem_ctx)
- fs_reg(ATTR, 4 * (_mesa_bitcount_64(nir->info->inputs_read) +
- _mesa_bitcount_64(nir->info->double_inputs_read)),
+ fs_reg(ATTR, 4 * _mesa_bitcount_64(nir->info->inputs_read),
BRW_REGISTER_TYPE_D);
struct brw_vs_prog_data *vs_prog_data = brw_vs_prog_data(prog_data);
int attr = intrin->const_index[0];
int slot = _mesa_bitcount_64(nir_info->inputs_read &
BITFIELD64_MASK(attr));
- int dslot = _mesa_bitcount_64(nir_info->double_inputs_read &
- BITFIELD64_MASK(attr));
- intrin->const_index[0] = 4 * (slot + dslot);
+ intrin->const_index[0] = 4 * slot;
}
}
return true;
* loaded as one vec4 or dvec4 per element (or matrix column), depending on
* whether it is a double-precision type or not.
*/
- nir_lower_io(nir, nir_var_shader_in, type_size_vs_input, 0);
+ nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
/* This pass needs actual constants */
nir_opt_constant_folding(nir);
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
int type_size_dvec4(const struct glsl_type *type);
-int type_size_vs_input(const struct glsl_type *type);
static inline int
type_size_scalar_bytes(const struct glsl_type *type)
((1 << shader->info->cull_distance_array_size) - 1) <<
shader->info->clip_distance_array_size;
- unsigned nr_attributes = _mesa_bitcount_64(prog_data->inputs_read);
+ unsigned nr_attribute_slots = _mesa_bitcount_64(prog_data->inputs_read);
/* gl_VertexID and gl_InstanceID are system values, but arrive via an
* incoming vertex attribute. So, add an extra slot.
BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
- nr_attributes++;
+ nr_attribute_slots++;
}
/* gl_DrawID has its very own vec4 */
if (shader->info->system_values_read &
BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
- nr_attributes++;
+ nr_attribute_slots++;
}
- unsigned nr_attribute_slots =
- nr_attributes +
- _mesa_bitcount_64(shader->info->double_inputs_read);
+ unsigned nr_attributes = nr_attribute_slots -
+ DIV_ROUND_UP(_mesa_bitcount_64(shader->info->double_inputs_read), 2);
/* The 3DSTATE_VS documentation lists the lower bound on "Vertex URB Entry
* Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. Empirically, in