}
static void
-set_push_pull_constant_loc(unsigned uniform, int *chunk_start, bool contiguous,
+set_push_pull_constant_loc(unsigned uniform, int *chunk_start,
+ unsigned *max_chunk_bitsize,
+ bool contiguous, unsigned bitsize,
+ const unsigned target_bitsize,
int *push_constant_loc, int *pull_constant_loc,
unsigned *num_push_constants,
unsigned *num_pull_constants,
if (*chunk_start < 0)
*chunk_start = uniform;
+ /* Keep track of the maximum bit size access in contiguous uniforms */
+ *max_chunk_bitsize = MAX2(*max_chunk_bitsize, bitsize);
+
/* If this element does not need to be contiguous with the next, we
* split at this point and everything between chunk_start and u forms a
* single chunk.
*/
if (!contiguous) {
+ /* If bitsize doesn't match the target one, skip it */
+ if (*max_chunk_bitsize != target_bitsize) {
+ /* FIXME: right now we only support 32 and 64-bit accesses */
+ assert(*max_chunk_bitsize == 4 || *max_chunk_bitsize == 8);
+ *max_chunk_bitsize = 0;
+ *chunk_start = -1;
+ return;
+ }
+
unsigned chunk_size = uniform - *chunk_start + 1;
/* Decide whether we should push or pull this parameter. In the
pull_constant_loc[j] = (*num_pull_constants)++;
}
+ *max_chunk_bitsize = 0;
*chunk_start = -1;
}
}
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
- bool is_live_64bit[uniforms];
- memset(is_live_64bit, 0, sizeof(is_live_64bit));
+ unsigned bitsize_access[uniforms];
+ memset(bitsize_access, 0, sizeof(bitsize_access));
/* For each uniform slot, a value of true indicates that the given slot and
* the next slot must remain contiguous. This is used to keep us from
for (unsigned j = constant_nr; j < last; j++) {
is_live[j] = true;
contiguous[j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[j] = true;
- }
+ bitsize_access[j] = MAX2(bitsize_access[j], type_sz(inst->src[i].type));
}
is_live[last] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[last] = true;
- }
+ bitsize_access[last] = MAX2(bitsize_access[last], type_sz(inst->src[i].type));
} else {
if (constant_nr >= 0 && constant_nr < (int) uniforms) {
int regs_read = inst->components_read(i) *
type_sz(inst->src[i].type) / 4;
for (int j = 0; j < regs_read; j++) {
is_live[constant_nr + j] = true;
- if (type_sz(inst->src[i].type) == 8) {
- is_live_64bit[constant_nr + j] = true;
- }
+ bitsize_access[constant_nr + j] =
+ MAX2(bitsize_access[constant_nr + j], type_sz(inst->src[i].type));
}
}
}
memset(pull_constant_loc, -1, uniforms * sizeof(*pull_constant_loc));
int chunk_start = -1;
+ unsigned max_chunk_bitsize = 0;
/* First push 64-bit uniforms to ensure they are properly aligned */
+ const unsigned uniform_64_bit_size = type_sz(BRW_REGISTER_TYPE_DF);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || !is_live_64bit[u])
+ if (!is_live[u])
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_64_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,
}
/* Then push the rest of uniforms */
+ const unsigned uniform_32_bit_size = type_sz(BRW_REGISTER_TYPE_F);
for (unsigned u = 0; u < uniforms; u++) {
- if (!is_live[u] || is_live_64bit[u])
+ if (!is_live[u])
continue;
/* Skip thread_local_id_index to put it in the last push register. */
if (thread_local_id_index == (int)u)
continue;
- set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
+ set_push_pull_constant_loc(u, &chunk_start, &max_chunk_bitsize,
+ contiguous[u], bitsize_access[u],
+ uniform_32_bit_size,
push_constant_loc, pull_constant_loc,
&num_push_constants, &num_pull_constants,
max_push_components, max_chunk_size,