case GLSL_TYPE_BOOL:
return type->components();
case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_UINT64:
+ case GLSL_TYPE_INT64:
return type->components() * 2;
case GLSL_TYPE_ARRAY:
return type_size_scalar(type->fields.array) * type->length;
return 0;
}
-/* Attribute arrays are loaded as one vec4 per element (or matrix column),
- * except for double-precision types, which are loaded as one dvec4.
- */
-extern "C" int
-type_size_vs_input(const struct glsl_type *type)
-{
- if (type->is_double()) {
- return type_size_dvec4(type);
- } else {
- return type_size_vec4(type);
- }
-}
-
/**
* Create a MOV to read the timestamp register.
*
}
static void
-set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
+set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
+ unsigned *max_chunk_bitsize,
+ bool contiguous, unsigned bitsize,
+ const unsigned target_bitsize,
int *push_constant_loc, int *pull_constant_loc,
unsigned *num_push_constants,
unsigned *num_pull_constants,
if (*chunk_start < 0)
*chunk_start = uniform;
+ /* Keep track of the maximum bit size access in contiguous uniforms */
+ *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize);
+
/* If this element does not need to be contiguous with the next, we
* split at this point and everything between chunk_start and u forms a
* single chunk.
*/
if (!contiguous) {
+ /* If bitsize doesn't match the target one, skip it */
+ if (*max_chunk_bitsize != target_bitsize) {
+ /* FIXME: right now we only support 32 and 64-bit accesses */
+ assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);
+ *max_chunk_bitsize = 0;
+ *chunk_start = -1;
+ return;
+ }
+
unsigned chunk_size = uniform - *chunk_start + 1;
/* Decide whether we should push or pull this parameter. In the
pull_constant_loc[j] = (*num_pull_constants)++;
}
+ *max_chunk_bitsize = 0;
*chunk_start = -1;
}
}
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
- bool is_live_64bit[uniforms];
- memset(is_live_64bit, 0, sizeof(is_live_64bit));
+ unsigned bitsize_access[uniforms];
+ memset(bitsize_access, 0, sizeof(bitsize_access));
/* For each uniform slot, a value of true indicates that the given slot and
* the next slot must remain contiguous. This is used to keep us from
for (unsigned j = constant_nr; j < last; j++) {
is_live[j] = true;
contiguous[j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[j] = true;
- }
+ bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type));
}
is_live[last] = true;
+ bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));
} else {
if (constant_nr >= 0 && constant_nr < (int) uniforms) {
int regs_read = inst->components_read(i) *
type_sz(inst->src[i].type) / 4;
for (int j = 0; j < regs_read; j++) {
is_live[constant_nr + j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[constant_nr + j] = true;
- }
+ bitsize_access[constant_nr + j] =
+ MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type));
}
}
}
memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
int chunk_start = -1;
+ unsigned max_chunk_bitsize = 0;
/* First push 64-bit uniforms to ensure they are properly aligned */
+ const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || !is_live_64bit[u])
+ if (!is_live[u])
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_64_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,
}
/* Then push the rest of uniforms */
+ const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || is_live_64bit[u])
+ if (!is_live[u])
continue;
/* Skip thread_local_id_index to put it in the last push register. */
if (thread_local_id_index == (int)u)
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_32_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,
if (pull_index == -1)
continue;
- const unsigned index = stage_prog_data->binding_table.pull_constants_start;
- fs_reg dst;
-
- if (type_sz(inst->src[i].type) <= 4)
- dst = vgrf(glsl_type::float_type);
- else
- dst = vgrf(glsl_type::double_type);
-
assert(inst->src[i].stride == 0);
- const fs_builder ubld = ibld.exec_all().group(4, 0);
- struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+ const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+ const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+ const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);
+ const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ const unsigned base = pull_index * 4;
+
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, brw_imm_ud(index), offset);
+ dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1)));
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
- inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4;
+ inst->src[i].offset = (base & (block_sz - 1)) +
+ inst->src[i].offset % 4;
brw_mark_surface_used(prog_data, index);
}
}
/* Fix the disptach mask */
- if (nir->info->tcs.vertices_out % 8) {
+ if (nir->info->tess.tcs_vertices_out % 8) {
bld.CMP(bld.null_reg_ud(), invocation_id,
- brw_imm_ud(nir->info->tcs.vertices_out), BRW_CONDITIONAL_L);
+ brw_imm_ud(nir->info->tess.tcs_vertices_out), BRW_CONDITIONAL_L);
bld.IF(BRW_PREDICATE_NORMAL);
}
emit_nir_code();
- if (nir->info->tcs.vertices_out % 8) {
+ if (nir->info->tess.tcs_vertices_out % 8) {
bld.emit(BRW_OPCODE_ENDIF);
}
unsigned *final_assembly_size,
char **error_str)
{
+ const struct gen_device_info *devinfo = compiler->devinfo;
+
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
- shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
- true);
- brw_nir_lower_fs_inputs(shader, vue_map, prog, compiler->devinfo, key);
+ shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
+ brw_nir_lower_fs_inputs(shader, devinfo, key);
brw_nir_lower_fs_outputs(shader);
+
+ if (devinfo->gen < 6) {
+ brw_setup_vue_interpolation(vue_map, shader, prog_data, devinfo);
+ }
+
if (!key->multisample_fbo)
NIR_PASS_V(shader, demote_sample_qualifiers);
NIR_PASS_V(shader, move_interpolation_to_top);
- shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+ shader = brw_postprocess_nir(shader, compiler, true);
/* key->alpha_test_func means simulating alpha testing via discards,
* so the shader definitely kills pixels.
char **error_str)
{
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
- shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
- true);
+ shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex, true);
brw_nir_lower_cs_shared(shader);
prog_data->base.total_shared += shader->num_shared;
(unsigned)4 * (prog_data->thread_local_id_index + 1));
brw_nir_lower_intrinsics(shader, &prog_data->base);
- shader = brw_postprocess_nir(shader, compiler->devinfo, true);
+ shader = brw_postprocess_nir(shader, compiler, true);
prog_data->local_size[0] = shader->info->cs.local_size[0];
prog_data->local_size[1] = shader->info->cs.local_size[1];