radeonsi: allow 64 descriptors per array
authorMarek Olšák <marek.olsak@amd.com>
Sat, 31 Jan 2015 16:22:35 +0000 (17:22 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 4 Feb 2015 13:34:13 +0000 (14:34 +0100)
We need a slot for the stipple texture and the pixel shader already uses
32 textures (16 API slots + 16 FMASK slots).

Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_state.h

index 3452be3de928526ce1e8d3b6259c918773f91988..bbfd36dcbebe8e272d41b13dee5ecc44e998f3ba 100644 (file)
@@ -181,7 +181,7 @@ static void si_update_descriptors(struct si_context *sctx,
        if (desc->dirty_mask) {
                desc->atom.num_dw =
                        7 + /* copy */
-                       (4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */
+                       (4 + desc->element_dw_size) * util_bitcount64(desc->dirty_mask) + /* update */
                        4; /* pointer update */
 
                if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
@@ -241,7 +241,7 @@ static void si_emit_descriptors(struct si_context *sctx,
        int packet_start = 0;
        int packet_size = 0;
        int last_index = desc->num_elements; /* point to a non-existing element */
-       unsigned dirty_mask = desc->dirty_mask;
+       uint64_t dirty_mask = desc->dirty_mask;
        unsigned new_context_id = (desc->current_context_id + 1) % SI_NUM_CONTEXTS;
 
        assert(dirty_mask);
@@ -263,7 +263,7 @@ static void si_emit_descriptors(struct si_context *sctx,
         *     with CP DMA instead of emitting zeros.
         */
        while (dirty_mask) {
-               int i = u_bit_scan(&dirty_mask);
+               int i = u_bit_scan64(&dirty_mask);
 
                assert(i < desc->num_elements);
 
@@ -366,11 +366,11 @@ static enum radeon_bo_priority si_get_resource_ro_priority(struct r600_resource
 static void si_sampler_views_begin_new_cs(struct si_context *sctx,
                                          struct si_sampler_views *views)
 {
-       unsigned mask = views->desc.enabled_mask;
+       uint64_t mask = views->desc.enabled_mask;
 
        /* Add relocations to the CS. */
        while (mask) {
-               int i = u_bit_scan(&mask);
+               int i = u_bit_scan64(&mask);
                struct si_sampler_view *rview =
                        (struct si_sampler_view*)views->views[i];
 
@@ -409,14 +409,14 @@ static void si_set_sampler_view(struct si_context *sctx, unsigned shader,
 
                pipe_sampler_view_reference(&views->views[slot], view);
                views->desc_data[slot] = view_desc;
-               views->desc.enabled_mask |= 1 << slot;
+               views->desc.enabled_mask |= 1llu << slot;
        } else {
                pipe_sampler_view_reference(&views->views[slot], NULL);
                views->desc_data[slot] = null_descriptor;
-               views->desc.enabled_mask &= ~(1 << slot);
+               views->desc.enabled_mask &= ~(1llu << slot);
        }
 
-       views->desc.dirty_mask |= 1 << slot;
+       views->desc.dirty_mask |= 1llu << slot;
 }
 
 static void si_set_sampler_views(struct pipe_context *ctx,
@@ -514,12 +514,12 @@ void si_set_sampler_descriptors(struct si_context *sctx, unsigned shader,
                unsigned slot = start + i;
 
                if (!sstates[i]) {
-                       samplers->desc.dirty_mask &= ~(1 << slot);
+                       samplers->desc.dirty_mask &= ~(1llu << slot);
                        continue;
                }
 
                samplers->desc_data[slot] = sstates[i]->val;
-               samplers->desc.dirty_mask |= 1 << slot;
+               samplers->desc.dirty_mask |= 1llu << slot;
        }
 
        si_update_descriptors(sctx, &samplers->desc);
@@ -579,11 +579,11 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers)
 static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
                                             struct si_buffer_resources *buffers)
 {
-       unsigned mask = buffers->desc.enabled_mask;
+       uint64_t mask = buffers->desc.enabled_mask;
 
        /* Add relocations to the CS. */
        while (mask) {
-               int i = u_bit_scan(&mask);
+               int i = u_bit_scan64(&mask);
 
                r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource*)buffers->buffers[i],
@@ -767,14 +767,14 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
                r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource*)buffer,
                                      buffers->shader_usage, buffers->priority);
-               buffers->desc.enabled_mask |= 1 << slot;
+               buffers->desc.enabled_mask |= 1llu << slot;
        } else {
                /* Clear the descriptor. */
                memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
-               buffers->desc.enabled_mask &= ~(1 << slot);
+               buffers->desc.enabled_mask &= ~(1llu << slot);
        }
 
-       buffers->desc.dirty_mask |= 1 << slot;
+       buffers->desc.dirty_mask |= 1llu << slot;
        si_update_descriptors(sctx, &buffers->desc);
 }
 
@@ -860,14 +860,14 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
                r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                      (struct r600_resource*)buffer,
                                      buffers->shader_usage, buffers->priority);
-               buffers->desc.enabled_mask |= 1 << slot;
+               buffers->desc.enabled_mask |= 1llu << slot;
        } else {
                /* Clear the descriptor. */
                memset(buffers->desc_data[slot], 0, sizeof(uint32_t) * 4);
-               buffers->desc.enabled_mask &= ~(1 << slot);
+               buffers->desc.enabled_mask &= ~(1llu << slot);
        }
 
-       buffers->desc.dirty_mask |= 1 << slot;
+       buffers->desc.dirty_mask |= 1llu << slot;
        si_update_descriptors(sctx, &buffers->desc);
 }
 
@@ -945,24 +945,24 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                        r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                              (struct r600_resource*)buffer,
                                              buffers->shader_usage, buffers->priority);
-                       buffers->desc.enabled_mask |= 1 << bufidx;
+                       buffers->desc.enabled_mask |= 1llu << bufidx;
                } else {
                        /* Clear the descriptor and unset the resource. */
                        memset(buffers->desc_data[bufidx], 0,
                               sizeof(uint32_t) * 4);
                        pipe_resource_reference(&buffers->buffers[bufidx],
                                                NULL);
-                       buffers->desc.enabled_mask &= ~(1 << bufidx);
+                       buffers->desc.enabled_mask &= ~(1llu << bufidx);
                }
-               buffers->desc.dirty_mask |= 1 << bufidx;
+               buffers->desc.dirty_mask |= 1llu << bufidx;
        }
        for (; i < old_num_targets; i++) {
                bufidx = SI_SO_BUF_OFFSET + i;
                /* Clear the descriptor and unset the resource. */
                memset(buffers->desc_data[bufidx], 0, sizeof(uint32_t) * 4);
                pipe_resource_reference(&buffers->buffers[bufidx], NULL);
-               buffers->desc.enabled_mask &= ~(1 << bufidx);
-               buffers->desc.dirty_mask |= 1 << bufidx;
+               buffers->desc.enabled_mask &= ~(1llu << bufidx);
+               buffers->desc.dirty_mask |= 1llu << bufidx;
        }
 
        si_update_descriptors(sctx, &buffers->desc);
@@ -1035,10 +1035,10 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
        for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
                struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
                bool found = false;
-               uint32_t mask = buffers->desc.enabled_mask;
+               uint64_t mask = buffers->desc.enabled_mask;
 
                while (mask) {
-                       i = u_bit_scan(&mask);
+                       i = u_bit_scan64(&mask);
                        if (buffers->buffers[i] == buf) {
                                si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
                                                            old_va, buf);
@@ -1047,7 +1047,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
                                                      rbuffer, buffers->shader_usage,
                                                      buffers->priority);
 
-                               buffers->desc.dirty_mask |= 1 << i;
+                               buffers->desc.dirty_mask |= 1llu << i;
                                found = true;
 
                                if (i >= SI_SO_BUF_OFFSET && shader == PIPE_SHADER_VERTEX) {
@@ -1070,10 +1070,10 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
        for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
                struct si_buffer_resources *buffers = &sctx->const_buffers[shader];
                bool found = false;
-               uint32_t mask = buffers->desc.enabled_mask;
+               uint64_t mask = buffers->desc.enabled_mask;
 
                while (mask) {
-                       unsigned i = u_bit_scan(&mask);
+                       unsigned i = u_bit_scan64(&mask);
                        if (buffers->buffers[i] == buf) {
                                si_desc_reset_buffer_offset(ctx, buffers->desc_data[i],
                                                            old_va, buf);
@@ -1082,7 +1082,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
                                                      rbuffer, buffers->shader_usage,
                                                      buffers->priority);
 
-                               buffers->desc.dirty_mask |= 1 << i;
+                               buffers->desc.dirty_mask |= 1llu << i;
                                found = true;
                        }
                }
@@ -1101,16 +1101,16 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
        for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
                struct si_sampler_views *views = &sctx->samplers[shader].views;
                bool found = false;
-               uint32_t mask = views->desc.enabled_mask;
+               uint64_t mask = views->desc.enabled_mask;
 
                while (mask) {
-                       unsigned i = u_bit_scan(&mask);
+                       unsigned i = u_bit_scan64(&mask);
                        if (views->views[i]->texture == buf) {
                                r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx,
                                                      rbuffer, RADEON_USAGE_READ,
                                                      RADEON_PRIO_SHADER_BUFFER_RO);
 
-                               views->desc.dirty_mask |= 1 << i;
+                               views->desc.dirty_mask |= 1llu << i;
                                found = true;
                        }
                }
index 3cd252c0e6420443b31453dd75ba0805a6014dc8..d2feb7da47e761335ea1441d3672d283688fe34a 100644 (file)
@@ -155,9 +155,9 @@ struct si_descriptors {
        unsigned buffer_offset;
 
        /* The i-th bit is set if that element is dirty (changed but not emitted). */
-       unsigned dirty_mask;
+       uint64_t dirty_mask;
        /* The i-th bit is set if that element is enabled (non-NULL resource). */
-       unsigned enabled_mask;
+       uint64_t enabled_mask;
 
        /* We can't update descriptors directly because the GPU might be
         * reading them at the same time, so we have to update them