v3d: support rendering to multi-layered framebuffers
[mesa.git] / src / gallium / drivers / v3d / v3d_uniforms.c
index ebf65b6099350ae1647eb3abb6d8ba87ab5102e6..c94f6be4b764f6ae8fe2e43d79ccecc6f434f6ca 100644 (file)
  */
 
 #include "util/u_pack_color.h"
+#include "util/u_upload_mgr.h"
 #include "util/format_srgb.h"
 
 #include "v3d_context.h"
 #include "compiler/v3d_compiler.h"
 #include "broadcom/cle/v3d_packet_v33_pack.h"
 
-#if 0
-
-#define SWIZ(x,y,z,w) {          \
-        PIPE_SWIZZLE_##x, \
-        PIPE_SWIZZLE_##y, \
-        PIPE_SWIZZLE_##z, \
-        PIPE_SWIZZLE_##w  \
-}
-
-static void
-write_texture_border_color(struct v3d_job *job,
-                           struct v3d_cl_out **uniforms,
-                           struct v3d_texture_stateobj *texstate,
-                           uint32_t unit)
-{
-        struct pipe_sampler_state *sampler = texstate->samplers[unit];
-        struct pipe_sampler_view *texture = texstate->textures[unit];
-        struct v3d_resource *rsc = v3d_resource(texture->texture);
-        union util_color uc;
-
-        const struct util_format_description *tex_format_desc =
-                util_format_description(texture->format);
-
-        float border_color[4];
-        for (int i = 0; i < 4; i++)
-                border_color[i] = sampler->border_color.f[i];
-        if (util_format_is_srgb(texture->format)) {
-                for (int i = 0; i < 3; i++)
-                        border_color[i] =
-                                util_format_linear_to_srgb_float(border_color[i]);
-        }
-
-        /* Turn the border color into the layout of channels that it would
-         * have when stored as texture contents.
-         */
-        float storage_color[4];
-        util_format_unswizzle_4f(storage_color,
-                                 border_color,
-                                 tex_format_desc->swizzle);
-
-        /* Now, pack so that when the v3d_format-sampled texture contents are
-         * replaced with our border color, the v3d_get_format_swizzle()
-         * swizzling will get the right channels.
-         */
-        if (util_format_is_depth_or_stencil(texture->format)) {
-                uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
-                                       sampler->border_color.f[0]) << 8;
-        } else {
-                switch (rsc->v3d_format) {
-                default:
-                case VC5_TEXTURE_TYPE_RGBA8888:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_R8G8B8A8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_RGBA4444:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_A8B8G8R8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_RGB565:
-                        util_pack_color(storage_color,
-                                        PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
-                        break;
-                case VC5_TEXTURE_TYPE_ALPHA:
-                        uc.ui[0] = float_to_ubyte(storage_color[0]) << 24;
-                        break;
-                case VC5_TEXTURE_TYPE_LUMALPHA:
-                        uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) |
-                                    (float_to_ubyte(storage_color[0]) << 0));
-                        break;
-                }
-        }
-
-        cl_aligned_u32(uniforms, uc.ui[0]);
-}
-#endif
-
 static uint32_t
 get_texrect_scale(struct v3d_texture_stateobj *texstate,
                   enum quniform_contents contents,
@@ -147,26 +72,28 @@ get_texture_size(struct v3d_texture_stateobj *texstate,
         }
 }
 
-static struct v3d_bo *
-v3d_upload_ubo(struct v3d_context *v3d,
-               struct v3d_compiled_shader *shader,
-               const uint32_t *gallium_uniforms)
+static uint32_t
+get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
+               enum quniform_contents contents,
+               uint32_t data)
 {
-        if (!shader->prog_data.base->ubo_size)
-                return NULL;
-
-        struct v3d_bo *ubo = v3d_bo_alloc(v3d->screen,
-                                          shader->prog_data.base->ubo_size,
-                                          "ubo");
-        void *data = v3d_bo_map(ubo);
-        for (uint32_t i = 0; i < shader->prog_data.base->num_ubo_ranges; i++) {
-                memcpy(data + shader->prog_data.base->ubo_ranges[i].dst_offset,
-                       ((const void *)gallium_uniforms +
-                        shader->prog_data.base->ubo_ranges[i].src_offset),
-                       shader->prog_data.base->ubo_ranges[i].size);
-        }
+        struct v3d_image_view *image = &shaderimg->si[data];
 
-        return ubo;
+        switch (contents) {
+        case QUNIFORM_IMAGE_WIDTH:
+                return u_minify(image->base.resource->width0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_HEIGHT:
+                return u_minify(image->base.resource->height0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_DEPTH:
+                return u_minify(image->base.resource->depth0,
+                                image->base.u.tex.level);
+        case QUNIFORM_IMAGE_ARRAY_SIZE:
+                return image->base.resource->array_size;
+        default:
+                unreachable("Bad texture size field");
+        }
 }
 
 /**
@@ -224,17 +151,34 @@ write_tmu_p0(struct v3d_job *job,
              struct v3d_texture_stateobj *texstate,
              uint32_t data)
 {
-        /* Extract the texture unit from the top bits, and the compiler's
+        int unit = v3d_unit_data_get_unit(data);
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
+        struct v3d_resource *rsc = v3d_resource(sview->texture);
+
+        cl_aligned_reloc(&job->indirect, uniforms, sview->bo,
+                         v3d_unit_data_get_offset(data));
+        v3d_job_add_bo(job, rsc->bo);
+}
+
+static void
+write_image_tmu_p0(struct v3d_job *job,
+                   struct v3d_cl_out **uniforms,
+                   struct v3d_shaderimg_stateobj *img,
+                   uint32_t data)
+{
+        /* Extract the image unit from the top bits, and the compiler's
          * packed p0 from the bottom.
          */
         uint32_t unit = data >> 24;
         uint32_t p0 = data & 0x00ffffff;
 
-        struct pipe_sampler_view *psview = texstate->textures[unit];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-        struct v3d_resource *rsc = v3d_resource(psview->texture);
+        struct v3d_image_view *iview = &img->si[unit];
+        struct v3d_resource *rsc = v3d_resource(iview->base.resource);
 
-        cl_aligned_reloc(&job->indirect, uniforms, sview->bo, p0);
+        cl_aligned_reloc(&job->indirect, uniforms,
+                         v3d_resource(iview->tex_state)->bo,
+                         iview->tex_state_offset | p0);
         v3d_job_add_bo(job, rsc->bo);
 }
 
@@ -245,33 +189,41 @@ write_tmu_p1(struct v3d_job *job,
              struct v3d_texture_stateobj *texstate,
              uint32_t data)
 {
-        /* Extract the texture unit from the top bits, and the compiler's
-         * packed p1 from the bottom.
-         */
-        uint32_t unit = data >> 24;
-        uint32_t p0 = data & 0x00ffffff;
-
+        uint32_t unit = v3d_unit_data_get_unit(data);
         struct pipe_sampler_state *psampler = texstate->samplers[unit];
         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
+        struct pipe_sampler_view *psview = texstate->textures[unit];
+        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
+        int variant = 0;
+
+        if (sampler->border_color_variants)
+                variant = sview->sampler_variant;
 
-        cl_aligned_reloc(&job->indirect, uniforms, sampler->bo, p0);
+        cl_aligned_reloc(&job->indirect, uniforms,
+                         v3d_resource(sampler->sampler_state)->bo,
+                         sampler->sampler_state_offset[variant] |
+                         v3d_unit_data_get_offset(data));
 }
 
 struct v3d_cl_reloc
-v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
+v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
+                   struct v3d_compiled_shader *shader,
                    enum pipe_shader_type stage)
 {
         struct v3d_constbuf_stateobj *cb = &v3d->constbuf[stage];
         struct v3d_texture_stateobj *texstate = &v3d->tex[stage];
         struct v3d_uniform_list *uinfo = &shader->prog_data.base->uniforms;
-        struct v3d_job *job = v3d->job;
         const uint32_t *gallium_uniforms = cb->cb[0].user_buffer;
-        struct v3d_bo *ubo = v3d_upload_ubo(v3d, shader, gallium_uniforms);
 
-        /* We always need to return some space for uniforms, because the HW
-         * will be prefetching, even if we don't read any in the program.
+        /* The hardware always pre-fetches the next uniform (also when there
+         * aren't any), so we always allocate space for an extra slot. This
+         * fixes MMU exceptions reported since Linux kernel 5.4 when the
+         * uniforms fill up the tail bytes of a page in the indirect
+         * BO. In that scenario, when the hardware pre-fetches after reading
+         * the last uniform it will read beyond the end of the page and trigger
+         * the MMU exception.
          */
-        v3d_cl_ensure_space(&job->indirect, MAX2(uinfo->count, 1) * 4, 4);
+        v3d_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
 
         struct v3d_cl_reloc uniform_stream = cl_get_address(&job->indirect);
         v3d_bo_reference(uniform_stream.bo);
@@ -280,14 +232,14 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                 cl_start(&job->indirect);
 
         for (int i = 0; i < uinfo->count; i++) {
+                uint32_t data = uinfo->data[i];
 
                 switch (uinfo->contents[i]) {
                 case QUNIFORM_CONSTANT:
-                        cl_aligned_u32(&uniforms, uinfo->data[i]);
+                        cl_aligned_u32(&uniforms, data);
                         break;
                 case QUNIFORM_UNIFORM:
-                        cl_aligned_u32(&uniforms,
-                                       gallium_uniforms[uinfo->data[i]]);
+                        cl_aligned_u32(&uniforms, gallium_uniforms[data]);
                         break;
                 case QUNIFORM_VIEWPORT_X_SCALE:
                         cl_aligned_f(&uniforms, v3d->viewport.scale[0] * 256.0f);
@@ -305,37 +257,33 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
 
                 case QUNIFORM_USER_CLIP_PLANE:
                         cl_aligned_f(&uniforms,
-                                     v3d->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]);
+                                     v3d->clip.ucp[data / 4][data % 4]);
                         break;
 
                 case QUNIFORM_TMU_CONFIG_P0:
-                        write_tmu_p0(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                        write_tmu_p0(job, &uniforms, texstate, data);
                         break;
 
                 case QUNIFORM_TMU_CONFIG_P1:
-                        write_tmu_p1(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                        write_tmu_p1(job, &uniforms, texstate, data);
                         break;
 
-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        write_texture_p1(job, &uniforms, texstate,
-                                         uinfo->data[i]);
+                case QUNIFORM_IMAGE_TMU_CONFIG_P0:
+                        write_image_tmu_p0(job, &uniforms,
+                                           &v3d->shaderimg[stage], data);
                         break;
 
-#if 0
-                case QUNIFORM_TEXTURE_FIRST_LEVEL:
-                        write_texture_first_level(job, &uniforms, texstate,
-                                                  uinfo->data[i]);
+                case QUNIFORM_TEXTURE_CONFIG_P1:
+                        write_texture_p1(job, &uniforms, texstate,
+                                         data);
                         break;
-#endif
 
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
                         cl_aligned_u32(&uniforms,
                                        get_texrect_scale(texstate,
                                                          uinfo->contents[i],
-                                                         uinfo->data[i]));
+                                                         data));
                         break;
 
                 case QUNIFORM_TEXTURE_WIDTH:
@@ -346,7 +294,17 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                         cl_aligned_u32(&uniforms,
                                        get_texture_size(texstate,
                                                         uinfo->contents[i],
-                                                        uinfo->data[i]));
+                                                        data));
+                        break;
+
+                case QUNIFORM_IMAGE_WIDTH:
+                case QUNIFORM_IMAGE_HEIGHT:
+                case QUNIFORM_IMAGE_DEPTH:
+                case QUNIFORM_IMAGE_ARRAY_SIZE:
+                        cl_aligned_u32(&uniforms,
+                                       get_image_size(&v3d->shaderimg[stage],
+                                                      uinfo->contents[i],
+                                                      data));
                         break;
 
                 case QUNIFORM_ALPHA_REF:
@@ -354,28 +312,45 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                                      v3d->zsa->base.alpha.ref_value);
                         break;
 
-                case QUNIFORM_UBO_ADDR:
-                        if (uinfo->data[i] == 0) {
-                                cl_aligned_reloc(&job->indirect, &uniforms,
-                                                 ubo, 0);
-                        } else {
-                                int ubo_index = uinfo->data[i];
-                                struct v3d_resource *rsc =
-                                        v3d_resource(cb->cb[ubo_index].buffer);
-
-                                cl_aligned_reloc(&job->indirect, &uniforms,
-                                                 rsc->bo,
-                                                 cb->cb[ubo_index].buffer_offset);
+                case QUNIFORM_UBO_ADDR: {
+                        uint32_t unit = v3d_unit_data_get_unit(data);
+                        /* Constant buffer 0 may be a system memory pointer,
+                         * in which case we want to upload a shadow copy to
+                         * the GPU.
+                        */
+                        if (!cb->cb[unit].buffer) {
+                                u_upload_data(v3d->uploader, 0,
+                                              cb->cb[unit].buffer_size, 16,
+                                              cb->cb[unit].user_buffer,
+                                              &cb->cb[unit].buffer_offset,
+                                              &cb->cb[unit].buffer);
                         }
+
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d_resource(cb->cb[unit].buffer)->bo,
+                                         cb->cb[unit].buffer_offset +
+                                         v3d_unit_data_get_offset(data));
                         break;
+                }
 
-                case QUNIFORM_TEXTURE_FIRST_LEVEL:
-                        cl_aligned_f(&uniforms,
-                                     texstate->textures[uinfo->data[i]]->u.tex.first_level);
+                case QUNIFORM_SSBO_OFFSET: {
+                        struct pipe_shader_buffer *sb =
+                                &v3d->ssbo[stage].sb[data];
+
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d_resource(sb->buffer)->bo,
+                                         sb->buffer_offset);
                         break;
+                }
 
-                case QUNIFORM_TEXTURE_BORDER_COLOR:
-                        /* XXX */
+                case QUNIFORM_GET_BUFFER_SIZE:
+                        cl_aligned_u32(&uniforms,
+                                       v3d->ssbo[stage].sb[data].buffer_size);
+                        break;
+
+                case QUNIFORM_TEXTURE_FIRST_LEVEL:
+                        cl_aligned_f(&uniforms,
+                                     texstate->textures[data]->u.tex.first_level);
                         break;
 
                 case QUNIFORM_SPILL_OFFSET:
@@ -388,28 +363,38 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_compiled_shader *shader,
                                        v3d->prog.spill_size_per_thread);
                         break;
 
+                case QUNIFORM_NUM_WORK_GROUPS:
+                        cl_aligned_u32(&uniforms,
+                                       v3d->compute_num_workgroups[data]);
+                        break;
+
+                case QUNIFORM_SHARED_OFFSET:
+                        cl_aligned_reloc(&job->indirect, &uniforms,
+                                         v3d->compute_shared_memory, 0);
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
 
                         write_texture_p0(job, &uniforms, texstate,
                                          uinfo->contents[i] -
                                          QUNIFORM_TEXTURE_CONFIG_P0_0,
-                                         uinfo->data[i]);
+                                         data);
                         break;
 
                 }
 #if 0
                 uint32_t written_val = *((uint32_t *)uniforms - 1);
-                fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f)\n",
+                fprintf(stderr, "shader %p[%d]: 0x%08x / 0x%08x (%f) ",
                         shader, i, __gen_address_offset(&uniform_stream) + i * 4,
                         written_val, uif(written_val));
+                vir_dump_uniform(uinfo->contents[i], data);
+                fprintf(stderr, "\n");
 #endif
         }
 
         cl_end(&job->indirect, uniforms);
 
-        v3d_bo_unreference(&ubo);
-
         return uniform_stream;
 }
 
@@ -441,7 +426,6 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
                 case QUNIFORM_TMU_CONFIG_P0:
                 case QUNIFORM_TMU_CONFIG_P1:
                 case QUNIFORM_TEXTURE_CONFIG_P1:
-                case QUNIFORM_TEXTURE_BORDER_COLOR:
                 case QUNIFORM_TEXTURE_FIRST_LEVEL:
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
@@ -455,16 +439,36 @@ v3d_set_shader_uniform_dirty_flags(struct v3d_compiled_shader *shader)
                         /* We could flag this on just the stage we're
                          * compiling for, but it's not passed in.
                          */
-                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |
+                                 VC5_DIRTY_GEOMTEX | VC5_DIRTY_COMPTEX;
+                        break;
+
+                case QUNIFORM_SSBO_OFFSET:
+                case QUNIFORM_GET_BUFFER_SIZE:
+                        dirty |= VC5_DIRTY_SSBO;
+                        break;
+
+                case QUNIFORM_IMAGE_TMU_CONFIG_P0:
+                case QUNIFORM_IMAGE_WIDTH:
+                case QUNIFORM_IMAGE_HEIGHT:
+                case QUNIFORM_IMAGE_DEPTH:
+                case QUNIFORM_IMAGE_ARRAY_SIZE:
+                        dirty |= VC5_DIRTY_SHADER_IMAGE;
                         break;
 
                 case QUNIFORM_ALPHA_REF:
                         dirty |= VC5_DIRTY_ZSA;
                         break;
 
+                case QUNIFORM_NUM_WORK_GROUPS:
+                case QUNIFORM_SHARED_OFFSET:
+                        /* Compute always recalculates uniforms. */
+                        break;
+
                 default:
                         assert(quniform_contents_is_texture_p0(shader->prog_data.base->uniforms.contents[i]));
-                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX;
+                        dirty |= VC5_DIRTY_FRAGTEX | VC5_DIRTY_VERTTEX |
+                                 VC5_DIRTY_GEOMTEX | VC5_DIRTY_COMPTEX;
                         break;
                 }
         }