broadcom/vc4: Scissor blits performed using the rendering engine.
[mesa.git] / src / gallium / drivers / vc4 / vc4_state.c
index 81dac21f548bcc0b8f9320f28aa2f982df0faf5b..9a3438f8493564b6d340c7df7249b868f6bb4268 100644 (file)
@@ -51,7 +51,9 @@ vc4_set_blend_color(struct pipe_context *pctx,
                     const struct pipe_blend_color *blend_color)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        vc4->blend_color = *blend_color;
+        vc4->blend_color.f = *blend_color;
+        for (int i = 0; i < 4; i++)
+                vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]);
         vc4->dirty |= VC4_DIRTY_BLEND_COLOR;
 }
 
@@ -68,14 +70,16 @@ static void
 vc4_set_clip_state(struct pipe_context *pctx,
                    const struct pipe_clip_state *clip)
 {
-        fprintf(stderr, "clip todo\n");
+        struct vc4_context *vc4 = vc4_context(pctx);
+        vc4->clip = *clip;
+        vc4->dirty |= VC4_DIRTY_CLIP;
 }
 
 static void
 vc4_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        vc4->sample_mask = (uint16_t)sample_mask;
+        vc4->sample_mask = sample_mask & ((1 << VC4_MAX_SAMPLES) - 1);
         vc4->dirty |= VC4_DIRTY_SAMPLE_MASK;
 }
 
@@ -90,6 +94,9 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
                             const struct pipe_rasterizer_state *cso)
 {
         struct vc4_rasterizer_state *so;
+        struct V3D21_DEPTH_OFFSET depth_offset = { V3D21_DEPTH_OFFSET_header };
+        struct V3D21_POINT_SIZE point_size = { V3D21_POINT_SIZE_header };
+        struct V3D21_LINE_WIDTH line_width = { V3D21_LINE_WIDTH_header };
 
         so = CALLOC_STRUCT(vc4_rasterizer_state);
         if (!so)
@@ -102,8 +109,12 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
         if (!(cso->cull_face & PIPE_FACE_BACK))
                 so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_PRIM_BACK;
 
-        /* XXX: per_vertex */
-        so->point_size = cso->point_size;
+        /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
+         * BCM21553).
+         */
+        point_size.point_size = MAX2(cso->point_size, .125f);
+
+        line_width.line_width = cso->line_width;
 
         if (cso->front_ccw)
                 so->config_bits[0] |= VC4_CONFIG_BITS_CW_PRIMITIVES;
@@ -111,10 +122,19 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
         if (cso->offset_tri) {
                 so->config_bits[0] |= VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET;
 
-                so->offset_units = float_to_187_half(cso->offset_units);
-                so->offset_factor = float_to_187_half(cso->offset_scale);
+                depth_offset.depth_offset_units =
+                        float_to_187_half(cso->offset_units);
+                depth_offset.depth_offset_factor =
+                        float_to_187_half(cso->offset_scale);
         }
 
+        if (cso->multisample)
+                so->config_bits[0] |= VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X;
+
+        V3D21_DEPTH_OFFSET_pack(NULL, so->packed.depth_offset, &depth_offset);
+        V3D21_POINT_SIZE_pack(NULL, so->packed.point_size, &point_size);
+        V3D21_LINE_WIDTH_pack(NULL, so->packed.line_width, &line_width);
+
         return so;
 }
 
@@ -182,12 +202,30 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx,
 
         so->base = *cso;
 
+        /* We always keep the early Z state correct, since a later state using
+         * early Z may want it.
+         */
+        so->config_bits[2] |= VC4_CONFIG_BITS_EARLY_Z_UPDATE;
+
         if (cso->depth.enabled) {
                 if (cso->depth.writemask) {
                         so->config_bits[1] |= VC4_CONFIG_BITS_Z_UPDATE;
                 }
                 so->config_bits[1] |= (cso->depth.func <<
                                        VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT);
+
+                /* We only handle early Z in the < direction because otherwise
+                 * we'd have to runtime guess which direction to set in the
+                 * render config.
+                 */
+                if ((cso->depth.func == PIPE_FUNC_LESS ||
+                     cso->depth.func == PIPE_FUNC_LEQUAL) &&
+                    (!cso->stencil[0].enabled ||
+                     (cso->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
+                      (!cso->stencil[1].enabled ||
+                       cso->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP)))) {
+                        so->config_bits[2] |= VC4_CONFIG_BITS_EARLY_Z;
+                }
         } else {
                 so->config_bits[1] |= (PIPE_FUNC_ALWAYS <<
                                        VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT);
@@ -199,12 +237,16 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx,
 
                 uint8_t front_writemask_bits =
                         tlb_stencil_setup_writemask(front->writemask);
-                uint8_t back_writemask_bits =
-                        tlb_stencil_setup_writemask(back->writemask);
+                uint8_t back_writemask = front->writemask;
+                uint8_t back_writemask_bits = front_writemask_bits;
 
                 so->stencil_uniforms[0] =
                         tlb_stencil_setup_bits(front, front_writemask_bits);
                 if (back->enabled) {
+                        back_writemask = back->writemask;
+                        back_writemask_bits =
+                                tlb_stencil_setup_writemask(back->writemask);
+
                         so->stencil_uniforms[0] |= (1 << 30);
                         so->stencil_uniforms[1] =
                                 tlb_stencil_setup_bits(back, back_writemask_bits);
@@ -215,8 +257,8 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx,
 
                 if (front_writemask_bits == 0xff ||
                     back_writemask_bits == 0xff) {
-                        so->stencil_uniforms[2] = (front_writemask_bits |
-                                                   (back_writemask_bits << 8));
+                        so->stencil_uniforms[2] = (front->writemask |
+                                                   (back_writemask << 8));
                 }
         }
 
@@ -270,24 +312,6 @@ vc4_set_vertex_buffers(struct pipe_context *pctx,
         vc4->dirty |= VC4_DIRTY_VTXBUF;
 }
 
-static void
-vc4_set_index_buffer(struct pipe_context *pctx,
-                     const struct pipe_index_buffer *ib)
-{
-        struct vc4_context *vc4 = vc4_context(pctx);
-
-        if (ib) {
-                pipe_resource_reference(&vc4->indexbuf.buffer, ib->buffer);
-                vc4->indexbuf.index_size = ib->index_size;
-                vc4->indexbuf.offset = ib->offset;
-                vc4->indexbuf.user_buffer = ib->user_buffer;
-        } else {
-                pipe_resource_reference(&vc4->indexbuf.buffer, NULL);
-        }
-
-        vc4->dirty |= VC4_DIRTY_INDEXBUF;
-}
-
 static void
 vc4_blend_state_bind(struct pipe_context *pctx, void *hwcso)
 {
@@ -343,8 +367,9 @@ vc4_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
 }
 
 static void
-vc4_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
-                        struct pipe_constant_buffer *cb)
+vc4_set_constant_buffer(struct pipe_context *pctx,
+                        enum pipe_shader_type shader, uint index,
+                        const struct pipe_constant_buffer *cb)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
         struct vc4_constbuf_stateobj *so = &vc4->constbuf[shader];
@@ -378,7 +403,7 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
         struct pipe_framebuffer_state *cso = &vc4->framebuffer;
         unsigned i;
 
-        vc4_flush(pctx);
+        vc4->job = NULL;
 
         for (i = 0; i < framebuffer->nr_cbufs; i++)
                 pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
@@ -387,19 +412,37 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
 
         cso->nr_cbufs = framebuffer->nr_cbufs;
 
+        pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
         cso->width = framebuffer->width;
         cso->height = framebuffer->height;
 
-        pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+        /* Nonzero texture mipmap levels are laid out as if they were in
+         * power-of-two-sized spaces.  The renderbuffer config infers its
+         * stride from the width parameter, so we need to configure our
+         * framebuffer.  Note that if the z/color buffers were mismatched
+         * sizes, we wouldn't be able to do this.
+         */
+        if (cso->cbufs[0] && cso->cbufs[0]->u.tex.level) {
+                struct vc4_resource *rsc =
+                        vc4_resource(cso->cbufs[0]->texture);
+                cso->width =
+                        (rsc->slices[cso->cbufs[0]->u.tex.level].stride /
+                         rsc->cpp);
+        } else if (cso->zsbuf && cso->zsbuf->u.tex.level){
+                struct vc4_resource *rsc =
+                        vc4_resource(cso->zsbuf->texture);
+                cso->width =
+                        (rsc->slices[cso->zsbuf->u.tex.level].stride /
+                         rsc->cpp);
+        }
 
         vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
 }
 
 static struct vc4_texture_stateobj *
-vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader)
+vc4_get_stage_tex(struct vc4_context *vc4, enum pipe_shader_type shader)
 {
-        vc4->dirty |= VC4_DIRTY_TEXSTATE;
-
         switch (shader) {
         case PIPE_SHADER_FRAGMENT:
                 vc4->dirty |= VC4_DIRTY_FRAGTEX;
@@ -415,16 +458,69 @@ vc4_get_stage_tex(struct vc4_context *vc4, unsigned shader)
         }
 }
 
+static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest)
+{
+        switch (p_wrap) {
+        case PIPE_TEX_WRAP_REPEAT:
+                return 0;
+        case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+                return 1;
+        case PIPE_TEX_WRAP_MIRROR_REPEAT:
+                return 2;
+        case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+                return 3;
+        case PIPE_TEX_WRAP_CLAMP:
+                return (using_nearest ? 1 : 3);
+        default:
+                fprintf(stderr, "Unknown wrap mode %d\n", p_wrap);
+                assert(!"not reached");
+                return 0;
+        }
+}
+
 static void *
 vc4_create_sampler_state(struct pipe_context *pctx,
                          const struct pipe_sampler_state *cso)
 {
-        return vc4_generic_cso_state_create(cso, sizeof(*cso));
+        static const uint8_t minfilter_map[6] = {
+                VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR,
+                VC4_TEX_P1_MINFILT_LIN_MIP_NEAR,
+                VC4_TEX_P1_MINFILT_NEAR_MIP_LIN,
+                VC4_TEX_P1_MINFILT_LIN_MIP_LIN,
+                VC4_TEX_P1_MINFILT_NEAREST,
+                VC4_TEX_P1_MINFILT_LINEAR,
+        };
+        static const uint32_t magfilter_map[] = {
+                [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST,
+                [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR,
+        };
+        bool either_nearest =
+                (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
+                 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
+        struct vc4_sampler_state *so = CALLOC_STRUCT(vc4_sampler_state);
+
+        if (!so)
+                return NULL;
+
+        memcpy(so, cso, sizeof(*cso));
+
+        so->texture_p1 =
+                (VC4_SET_FIELD(magfilter_map[cso->mag_img_filter],
+                               VC4_TEX_P1_MAGFILT) |
+                 VC4_SET_FIELD(minfilter_map[cso->min_mip_filter * 2 +
+                                             cso->min_img_filter],
+                               VC4_TEX_P1_MINFILT) |
+                 VC4_SET_FIELD(translate_wrap(cso->wrap_s, either_nearest),
+                               VC4_TEX_P1_WRAP_S) |
+                 VC4_SET_FIELD(translate_wrap(cso->wrap_t, either_nearest),
+                               VC4_TEX_P1_WRAP_T));
+
+        return so;
 }
 
 static void
 vc4_sampler_states_bind(struct pipe_context *pctx,
-                        unsigned shader, unsigned start,
+                        enum pipe_shader_type shader, unsigned start,
                         unsigned nr, void **hwcso)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
@@ -438,12 +534,10 @@ vc4_sampler_states_bind(struct pipe_context *pctx,
                 if (hwcso[i])
                         new_nr = i + 1;
                 stage_tex->samplers[i] = hwcso[i];
-                stage_tex->dirty_samplers |= (1 << i);
         }
 
         for (; i < stage_tex->num_samplers; i++) {
                 stage_tex->samplers[i] = NULL;
-                stage_tex->dirty_samplers |= (1 << i);
         }
 
         stage_tex->num_samplers = new_nr;
@@ -453,18 +547,72 @@ static struct pipe_sampler_view *
 vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                         const struct pipe_sampler_view *cso)
 {
-        struct pipe_sampler_view *so = malloc(sizeof(*so));
+        struct vc4_sampler_view *so = CALLOC_STRUCT(vc4_sampler_view);
+        struct vc4_resource *rsc = vc4_resource(prsc);
 
         if (!so)
                 return NULL;
 
-        *so = *cso;
+        so->base = *cso;
+
         pipe_reference(NULL, &prsc->reference);
-        so->texture = prsc;
-        so->reference.count = 1;
-        so->context = pctx;
 
-        return so;
+        /* There is no hardware level clamping, and the start address of a
+         * texture may be misaligned, so in that case we have to copy to a
+         * temporary.
+         *
+         * Also, Raspberry Pi doesn't support sampling from raster textures,
+         * so we also have to copy to a temporary then.
+         */
+        if ((cso->u.tex.first_level &&
+             (cso->u.tex.first_level != cso->u.tex.last_level)) ||
+            rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
+                struct vc4_resource *shadow_parent = vc4_resource(prsc);
+                struct pipe_resource tmpl = shadow_parent->base;
+                struct vc4_resource *clone;
+
+                tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET;
+                tmpl.width0 = u_minify(tmpl.width0, cso->u.tex.first_level);
+                tmpl.height0 = u_minify(tmpl.height0, cso->u.tex.first_level);
+                tmpl.last_level = cso->u.tex.last_level - cso->u.tex.first_level;
+
+                prsc = vc4_resource_create(pctx->screen, &tmpl);
+                if (!prsc) {
+                        free(so);
+                        return NULL;
+                }
+                rsc = vc4_resource(prsc);
+                clone = vc4_resource(prsc);
+                clone->shadow_parent = &shadow_parent->base;
+                /* Flag it as needing update of the contents from the parent. */
+                clone->writes = shadow_parent->writes - 1;
+
+                assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
+        } else if (cso->u.tex.first_level) {
+                so->force_first_level = true;
+        }
+        so->base.texture = prsc;
+        so->base.reference.count = 1;
+        so->base.context = pctx;
+
+        so->texture_p0 =
+                (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
+                 VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
+                 VC4_SET_FIELD(so->force_first_level ?
+                               cso->u.tex.last_level :
+                               cso->u.tex.last_level -
+                               cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
+                 VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
+                               VC4_TEX_P0_CMMODE));
+        so->texture_p1 =
+                (VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) |
+                 VC4_SET_FIELD(prsc->height0 & 2047, VC4_TEX_P1_HEIGHT) |
+                 VC4_SET_FIELD(prsc->width0 & 2047, VC4_TEX_P1_WIDTH));
+
+        if (prsc->format == PIPE_FORMAT_ETC1_RGB8)
+                so->texture_p1 |= VC4_TEX_P1_ETCFLIP_MASK;
+
+        return &so->base;
 }
 
 static void
@@ -476,7 +624,8 @@ vc4_sampler_view_destroy(struct pipe_context *pctx,
 }
 
 static void
-vc4_set_sampler_views(struct pipe_context *pctx, unsigned shader,
+vc4_set_sampler_views(struct pipe_context *pctx,
+                      enum pipe_shader_type shader,
                       unsigned start, unsigned nr,
                       struct pipe_sampler_view **views)
 {
@@ -487,18 +636,14 @@ vc4_set_sampler_views(struct pipe_context *pctx, unsigned shader,
 
         assert(start == 0);
 
-        vc4->dirty |= VC4_DIRTY_TEXSTATE;
-
         for (i = 0; i < nr; i++) {
                 if (views[i])
                         new_nr = i + 1;
                 pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
-                stage_tex->dirty_samplers |= (1 << i);
         }
 
         for (; i < stage_tex->num_textures; i++) {
                 pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
-                stage_tex->dirty_samplers |= (1 << i);
         }
 
         stage_tex->num_textures = new_nr;
@@ -518,7 +663,6 @@ vc4_state_init(struct pipe_context *pctx)
         pctx->set_viewport_states = vc4_set_viewport_states;
 
         pctx->set_vertex_buffers = vc4_set_vertex_buffers;
-        pctx->set_index_buffer = vc4_set_index_buffer;
 
         pctx->create_blend_state = vc4_create_blend_state;
         pctx->bind_blend_state = vc4_blend_state_bind;