unsigned shader_userdata_index,
unsigned element_dw_size,
unsigned num_elements,
- const uint32_t *null_descriptor,
unsigned *ce_offset)
{
- int i;
-
assert(num_elements <= sizeof(desc->dirty_mask)*8);
desc->list = CALLOC(num_elements, element_dw_size * 4);
desc->element_dw_size = element_dw_size;
desc->num_elements = num_elements;
- desc->dirty_mask = num_elements == 32 ? ~0u : (1u << num_elements) - 1;
+ desc->dirty_mask = u_bit_consecutive64(0, num_elements);
desc->shader_userdata_offset = shader_userdata_index * 4;
if (ce_offset) {
/* make sure that ce_offset stays 32 byte aligned */
*ce_offset += align(element_dw_size * num_elements * 4, 32);
}
-
- /* Initialize the array to NULL descriptors if the element size is 8. */
- if (null_descriptor) {
- assert(element_dw_size % 8 == 0);
- for (i = 0; i < num_elements * element_dw_size / 8; i++)
- memcpy(desc->list + i * 8, null_descriptor,
- 8 * 4);
- }
}
static void si_release_descriptors(struct si_descriptors *desc)
while(desc->dirty_mask) {
int begin, count;
- u_bit_scan_consecutive_range(&desc->dirty_mask, &begin,
- &count);
+ u_bit_scan_consecutive_range64(&desc->dirty_mask, &begin,
+ &count);
begin *= desc->element_dw_size;
count *= desc->element_dw_size;
/* SAMPLER VIEWS */
static unsigned
-si_sampler_descriptors_idx(unsigned shader)
+si_sampler_and_image_descriptors_idx(unsigned shader)
{
return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
- SI_SHADER_DESCS_SAMPLERS;
+ SI_SHADER_DESCS_SAMPLERS_AND_IMAGES;
}
static struct si_descriptors *
-si_sampler_descriptors(struct si_context *sctx, unsigned shader)
+si_sampler_and_image_descriptors(struct si_context *sctx, unsigned shader)
{
- return &sctx->descriptors[si_sampler_descriptors_idx(shader)];
+ return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
}
static void si_release_sampler_views(struct si_sampler_views *views)
{
struct si_sampler_views *views = &sctx->samplers[shader].views;
struct si_sampler_view *rview = (struct si_sampler_view*)view;
- struct si_descriptors *descs = si_sampler_descriptors(sctx, shader);
- uint32_t *desc = descs->list + slot * 16;
+ struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
+ unsigned desc_slot = si_get_sampler_slot(slot);
+ uint32_t *desc = descs->list + desc_slot * 16;
if (views->views[slot] == view && !disallow_early_out)
return;
views->enabled_mask &= ~(1u << slot);
}
- descs->dirty_mask |= 1u << slot;
- sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
+ descs->dirty_mask |= 1ull << desc_slot;
+ sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
static bool is_compressed_colortex(struct r600_texture *rtex)
/* IMAGE VIEWS */
-static unsigned
-si_image_descriptors_idx(unsigned shader)
-{
- return SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS +
- SI_SHADER_DESCS_IMAGES;
-}
-
-static struct si_descriptors*
-si_image_descriptors(struct si_context *sctx, unsigned shader)
-{
- return &sctx->descriptors[si_image_descriptors_idx(shader)];
-}
-
static void
si_release_image_views(struct si_images_info *images)
{
struct si_images_info *images = &ctx->images[shader];
if (images->enabled_mask & (1u << slot)) {
- struct si_descriptors *descs = si_image_descriptors(ctx, shader);
+ struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
+ unsigned desc_slot = si_get_image_slot(slot);
pipe_resource_reference(&images->views[slot].resource, NULL);
images->compressed_colortex_mask &= ~(1 << slot);
- memcpy(descs->list + slot*8, null_image_descriptor, 8*4);
+ memcpy(descs->list + desc_slot*8, null_image_descriptor, 8*4);
images->enabled_mask &= ~(1u << slot);
- descs->dirty_mask |= 1u << slot;
- ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
+ /* two 8-byte images share one 16-byte slot */
+ descs->dirty_mask |= 1u << (desc_slot / 2);
+ ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
}
{
struct si_screen *screen = ctx->screen;
struct si_images_info *images = &ctx->images[shader];
- struct si_descriptors *descs = si_image_descriptors(ctx, shader);
+ struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
struct r600_resource *res;
- uint32_t *desc = descs->list + slot * 8;
+ unsigned desc_slot = si_get_image_slot(slot);
+ uint32_t *desc = descs->list + desc_slot * 8;
if (!view || !view->resource) {
si_disable_shader_image(ctx, shader, slot);
}
images->enabled_mask |= 1u << slot;
- descs->dirty_mask |= 1u << slot;
- ctx->descriptors_dirty |= 1u << si_image_descriptors_idx(shader);
+ /* two 8-byte images share one 16-byte slot */
+ descs->dirty_mask |= 1u << (desc_slot / 2);
+ ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
/* Since this can flush, it must be done after enabled_mask is updated. */
si_sampler_view_add_buffer(ctx, &res->b.b,
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_textures_info *samplers = &sctx->samplers[shader];
- struct si_descriptors *desc = si_sampler_descriptors(sctx, shader);
+ struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
int i;
for (i = 0; i < count; i++) {
unsigned slot = start + i;
+ unsigned desc_slot = si_get_sampler_slot(slot);
if (!sstates[i] ||
sstates[i] == samplers->views.sampler_states[slot])
((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
continue;
- memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
- desc->dirty_mask |= 1u << slot;
- sctx->descriptors_dirty |= 1u << si_sampler_descriptors_idx(shader);
+ memcpy(desc->list + desc_slot * 16 + 12, sstates[i]->val, 4*4);
+ desc->dirty_mask |= 1ull << desc_slot;
+ sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
}
}
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
si_init_descriptors(descs, shader_userdata_index, 4,
- num_buffers, NULL, ce_offset);
+ num_buffers, ce_offset);
}
static void si_release_buffer_resources(struct si_buffer_resources *buffers,
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
struct si_sampler_views *views = &sctx->samplers[shader].views;
struct si_descriptors *descs =
- si_sampler_descriptors(sctx, shader);
+ si_sampler_and_image_descriptors(sctx, shader);
unsigned mask = views->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
if (views->views[i]->texture == buf) {
+ unsigned desc_slot = si_get_sampler_slot(i);
+
si_desc_reset_buffer_offset(ctx,
descs->list +
- i * 16 + 4,
+ desc_slot * 16 + 4,
old_va, buf);
- descs->dirty_mask |= 1u << i;
+ descs->dirty_mask |= 1ull << desc_slot;
sctx->descriptors_dirty |=
- 1u << si_sampler_descriptors_idx(shader);
+ 1u << si_sampler_and_image_descriptors_idx(shader);
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
rbuffer, RADEON_USAGE_READ,
for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
struct si_images_info *images = &sctx->images[shader];
struct si_descriptors *descs =
- si_image_descriptors(sctx, shader);
+ si_sampler_and_image_descriptors(sctx, shader);
unsigned mask = images->enabled_mask;
while (mask) {
unsigned i = u_bit_scan(&mask);
if (images->views[i].resource == buf) {
+ unsigned desc_slot = si_get_image_slot(i);
+
if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
si_mark_image_range_valid(&images->views[i]);
si_desc_reset_buffer_offset(
- ctx, descs->list + i * 8 + 4,
+ ctx, descs->list + desc_slot * 8 + 4,
old_va, buf);
- descs->dirty_mask |= 1u << i;
+ /* two 8-byte images share one 16-byte slot */
+ descs->dirty_mask |= 1u << (desc_slot / 2);
sctx->descriptors_dirty |=
- 1u << si_image_descriptors_idx(shader);
+ 1u << si_sampler_and_image_descriptors_idx(shader);
radeon_add_to_buffer_list_check_mem(
&sctx->b, &sctx->b.gfx, rbuffer,
* Rarely used descriptors don't use CE RAM.
*/
bool big_ce = sctx->b.chip_class <= VI;
- bool images_use_ce = big_ce;
bool const_and_shaderbufs_use_ce = big_ce ||
i == PIPE_SHADER_VERTEX ||
i == PIPE_SHADER_FRAGMENT;
- bool samplers_use_ce = big_ce ||
- i == PIPE_SHADER_FRAGMENT;
+ bool samplers_and_images_use_ce = big_ce ||
+ i == PIPE_SHADER_FRAGMENT;
si_init_buffer_resources(&sctx->const_and_shader_buffers[i],
si_const_and_shader_buffer_descriptors(sctx, i),
RADEON_PRIO_CONST_BUFFER,
const_and_shaderbufs_use_ce ? &ce_offset : NULL);
- si_init_descriptors(si_sampler_descriptors(sctx, i),
- gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS :
- gfx9_gs ? GFX9_SGPR_GS_SAMPLERS :
- SI_SGPR_SAMPLERS,
- 16, SI_NUM_SAMPLERS,
- null_texture_descriptor,
- samplers_use_ce ? &ce_offset : NULL);
-
- si_init_descriptors(si_image_descriptors(sctx, i),
- gfx9_tcs ? GFX9_SGPR_TCS_IMAGES :
- gfx9_gs ? GFX9_SGPR_GS_IMAGES :
- SI_SGPR_IMAGES,
- 8, SI_NUM_IMAGES,
- null_image_descriptor,
- images_use_ce ? &ce_offset : NULL);
+ struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, i);
+ si_init_descriptors(desc,
+ gfx9_tcs ? GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES :
+ gfx9_gs ? GFX9_SGPR_GS_SAMPLERS_AND_IMAGES :
+ SI_SGPR_SAMPLERS_AND_IMAGES,
+ 16, SI_NUM_IMAGES / 2 + SI_NUM_SAMPLERS,
+ samplers_and_images_use_ce ? &ce_offset : NULL);
+
+ int j;
+ for (j = 0; j < SI_NUM_IMAGES; j++)
+ memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
+ for (; j < SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2; j++)
+ memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
}
si_init_buffer_resources(&sctx->rw_buffers,
RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER,
&ce_offset);
si_init_descriptors(&sctx->vertex_buffers, SI_SGPR_VERTEX_BUFFERS,
- 4, SI_NUM_VERTEX_BUFFERS, NULL, NULL);
+ 4, SI_NUM_VERTEX_BUFFERS, NULL);
sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
8 + GFX9_SGPR_TCS_CONST_AND_SHADER_BUFFERS);
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
- 8 + GFX9_SGPR_TCS_SAMPLERS);
- ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
- 8 + GFX9_SGPR_TCS_IMAGES);
+ 8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
ret = si_insert_input_ret_float(ctx, ret,
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param,
8 + GFX9_SGPR_GS_CONST_AND_SHADER_BUFFERS);
ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 1,
- 8 + GFX9_SGPR_GS_SAMPLERS);
- ret = si_insert_input_ptr_as_2xi32(ctx, ret, desc_param + 2,
- 8 + GFX9_SGPR_GS_IMAGES);
+ 8 + GFX9_SGPR_GS_SAMPLERS_AND_IMAGES);
unsigned vgpr = 8 + GFX9_GS_NUM_USER_SGPR;
for (unsigned i = 0; i < 5; i++) {
{
params[(*num_params)++] = si_const_array(ctx->v4i32,
SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS);
- params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_SAMPLERS);
- params[(*num_params)++] = si_const_array(ctx->v8i32, SI_NUM_IMAGES);
+ params[(*num_params)++] = si_const_array(ctx->v8i32,
+ SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2);
if (assign_params) {
- ctx->param_const_and_shader_buffers = *num_params - 3;
- ctx->param_samplers = *num_params - 2;
- ctx->param_images = *num_params - 1;
+ ctx->param_const_and_shader_buffers = *num_params - 2;
+ ctx->param_samplers_and_images = *num_params - 1;
}
}
params[num_params++] = ctx->i64;
params[num_params++] = ctx->i64;
params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
params[num_params++] = ctx->i32;
params[num_params++] = ctx->i32;
params[num_params++] = ctx->i32;
params[num_params++] = ctx->i64;
params[num_params++] = ctx->i64;
params[num_params++] = ctx->i64;
- params[num_params++] = ctx->i64;
params[ctx->param_tcs_offchip_layout = num_params++] = ctx->i32;
params[num_params++] = ctx->i32;
params[num_params++] = ctx->i32;
/* Declare input SGPRs. */
params[ctx->param_rw_buffers = num_params++] = ctx->i64;
params[ctx->param_const_and_shader_buffers = num_params++] = ctx->i64;
- params[ctx->param_samplers = num_params++] = ctx->i64;
- params[ctx->param_images = num_params++] = ctx->i64;
+ params[ctx->param_samplers_and_images = num_params++] = ctx->i64;
assert(num_params == SI_PARAM_ALPHA_REF);
params[SI_PARAM_ALPHA_REF] = ctx->f32;
last_sgpr = SI_PARAM_ALPHA_REF;