v3d: Block bin on render when doing vertex texturing.
[mesa.git] / src / gallium / drivers / v3d / v3dx_emit.c
index 8a65478a1614e455461ef00a69a5e1dc5f00ad8c..4402218f40437493efbc93031177bba0a8ef6421 100644 (file)
@@ -276,11 +276,20 @@ static void
 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
               struct pipe_blend_state *blend, int rt)
 {
-        cl_emit(&job->bcl, BLEND_CONFIG, config) {
-                struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
+        struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
 
 #if V3D_VERSION >= 40
-                config.render_target_mask = 1 << rt;
+        /* We don't need to emit blend state for disabled RTs. */
+        if (!rtblend->blend_enable)
+                return;
+#endif
+
+        cl_emit(&job->bcl, BLEND_CONFIG, config) {
+#if V3D_VERSION >= 40
+                if (blend->independent_blend_enable)
+                        config.render_target_mask = 1 << rt;
+                else
+                        config.render_target_mask = (1 << VC5_MAX_DRAW_BUFFERS) - 1;
 #else
                 assert(rt == 0);
 #endif
@@ -303,6 +312,93 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
         }
 }
 
+static void
+emit_flat_shade_flags(struct v3d_job *job,
+                      int varying_offset,
+                      uint32_t varyings,
+                      enum V3DX(Varying_Flags_Action) lower,
+                      enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.flat_shade_flags_for_varyings_v024 = varyings;
+                flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+
+#if V3D_VERSION >= 40
+static void
+emit_noperspective_flags(struct v3d_job *job,
+                         int varying_offset,
+                         uint32_t varyings,
+                         enum V3DX(Varying_Flags_Action) lower,
+                         enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.non_perspective_flags_for_varyings_v024 = varyings;
+                flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+
+static void
+emit_centroid_flags(struct v3d_job *job,
+                    int varying_offset,
+                    uint32_t varyings,
+                    enum V3DX(Varying_Flags_Action) lower,
+                    enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.centroid_flags_for_varyings_v024 = varyings;
+                flags.action_for_centroid_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_centroid_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+#endif /* V3D_VERSION >= 40 */
+
+static bool
+emit_varying_flags(struct v3d_job *job, uint32_t *flags,
+                   void (*flag_emit_callback)(struct v3d_job *job,
+                                              int varying_offset,
+                                              uint32_t flags,
+                                              enum V3DX(Varying_Flags_Action) lower,
+                                              enum V3DX(Varying_Flags_Action) higher))
+{
+        struct v3d_context *v3d = job->v3d;
+        bool emitted_any = false;
+
+        for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
+                if (!flags[i])
+                        continue;
+
+                if (emitted_any) {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED);
+                } else if (i == 0) {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
+                } else {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED,
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
+                }
+                emitted_any = true;
+        }
+
+        return emitted_any;
+}
+
 void
 v3dX(emit_state)(struct pipe_context *pctx)
 {
@@ -343,20 +439,18 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
                         clip.clip_window_left_pixel_coordinate = minx;
                         clip.clip_window_bottom_pixel_coordinate = miny;
-                        clip.clip_window_width_in_pixels = maxx - minx;
-                        clip.clip_window_height_in_pixels = maxy - miny;
-
-#if V3D_VERSION < 41
-                        /* The HW won't entirely clip out when scissor w/h is
-                         * 0.  Just treat it the same as rasterizer discard.
-                         */
-                        if (clip.clip_window_width_in_pixels == 0 ||
-                            clip.clip_window_height_in_pixels == 0) {
+                        if (maxx > minx && maxy > miny) {
+                                clip.clip_window_width_in_pixels = maxx - minx;
+                                clip.clip_window_height_in_pixels = maxy - miny;
+                        } else if (V3D_VERSION < 41) {
+                                /* The HW won't entirely clip out when scissor
+                                 * w/h is 0.  Just treat it the same as
+                                 * rasterizer discard.
+                                 */
                                 rasterizer_discard = true;
                                 clip.clip_window_width_in_pixels = 1;
                                 clip.clip_window_height_in_pixels = 1;
                         }
-#endif
                 }
 
                 job->draw_min_x = MIN2(job->draw_min_x, minx);
@@ -387,13 +481,22 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         config.enable_depth_offset =
                                 v3d->rasterizer->base.offset_tri;
 
+                        /* V3D follows GL behavior where the sample mask only
+                         * applies when MSAA is enabled.  Gallium has sample
+                         * mask apply anyway, and the MSAA blit shaders will
+                         * set sample mask without explicitly setting
+                         * rasterizer oversample.  Just force it on here,
+                         * since the blit shaders are the only way to have
+                         * !multisample && samplemask != 0xf.
+                         */
                         config.rasterizer_oversample_mode =
-                                v3d->rasterizer->base.multisample;
+                                v3d->rasterizer->base.multisample ||
+                                v3d->sample_mask != 0xf;
 
                         config.direct3d_provoking_vertex =
                                 v3d->rasterizer->base.flatshade_first;
 
-                        config.blend_enable = v3d->blend->rt[0].blend_enable;
+                        config.blend_enable = v3d->blend->blend_enables;
 
                         /* Note: EZ state may update based on the compiled FS,
                          * along with ZSA
@@ -422,8 +525,14 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
                         depth.depth_offset_factor =
                                 v3d->rasterizer->offset_factor;
-                        depth.depth_offset_units =
-                                v3d->rasterizer->offset_units;
+                        if (job->zsbuf &&
+                            job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
+                                depth.depth_offset_units =
+                                        v3d->rasterizer->z16_offset_units;
+                        } else {
+                                depth.depth_offset_units =
+                                        v3d->rasterizer->offset_units;
+                        }
                 }
         }
 
@@ -452,10 +561,12 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                 v3d->viewport.scale[2];
                 }
                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
-                        clip.minimum_zw = (v3d->viewport.translate[2] -
-                                           v3d->viewport.scale[2]);
-                        clip.maximum_zw = (v3d->viewport.translate[2] +
-                                           v3d->viewport.scale[2]);
+                        float z1 = (v3d->viewport.translate[2] -
+                                    v3d->viewport.scale[2]);
+                        float z2 = (v3d->viewport.translate[2] +
+                                    v3d->viewport.scale[2]);
+                        clip.minimum_zw = MIN2(z1, z2);
+                        clip.maximum_zw = MAX2(z1, z2);
                 }
 
                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
@@ -466,39 +577,35 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
-        if (v3d->dirty & VC5_DIRTY_BLEND && v3d->blend->rt[0].blend_enable) {
-                struct pipe_blend_state *blend = v3d->blend;
+        if (v3d->dirty & VC5_DIRTY_BLEND) {
+                struct v3d_blend_state *blend = v3d->blend;
 
-                if (blend->independent_blend_enable) {
-                        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
-                                emit_rt_blend(v3d, job, blend, i);
-                } else {
-                        emit_rt_blend(v3d, job, blend, 0);
+                if (blend->blend_enables) {
+#if V3D_VERSION >= 40
+                        cl_emit(&job->bcl, BLEND_ENABLES, enables) {
+                                enables.mask = blend->blend_enables;
+                        }
+#endif
+
+                        if (blend->base.independent_blend_enable) {
+                                for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
+                                        emit_rt_blend(v3d, job, &blend->base, i);
+                        } else {
+                                emit_rt_blend(v3d, job, &blend->base, 0);
+                        }
                 }
         }
 
         if (v3d->dirty & VC5_DIRTY_BLEND) {
-                struct pipe_blend_state *blend = v3d->blend;
+                struct pipe_blend_state *blend = &v3d->blend->base;
 
                 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
-                        if (blend->independent_blend_enable) {
-                                mask.render_target_0_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 0);
-                                mask.render_target_1_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[1].colormask, 1);
-                                mask.render_target_2_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[2].colormask, 2);
-                                mask.render_target_3_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[3].colormask, 3);
-                        } else {
-                                mask.render_target_0_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 0);
-                                mask.render_target_1_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 1);
-                                mask.render_target_2_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 2);
-                                mask.render_target_3_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 3);
+                        for (int i = 0; i < 4; i++) {
+                                int rt = blend->independent_blend_enable ? i : 0;
+                                int rt_mask = blend->rt[rt].colormask;
+
+                                mask.mask |= translate_colormask(v3d, rt_mask,
+                                                                 i) << (4 * i);
                         }
                 }
         }
@@ -553,76 +660,26 @@ v3dX(emit_state)(struct pipe_context *pctx)
 #endif
 
         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
-                bool emitted_any = false;
-
-                for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
-                        if (!v3d->prog.fs->prog_data.fs->flat_shade_flags[i])
-                                continue;
-
-                        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
-                                flags.varying_offset_v0 = i;
-
-                                if (emitted_any) {
-                                        flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                        flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                } else {
-                                        flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
-                                                ((i == 0) ?
-                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED :
-                                                 V3D_VARYING_FLAGS_ACTION_ZEROED);
-
-                                        flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_ZEROED;
-                                }
-
-                                flags.flat_shade_flags_for_varyings_v024 =
-                                        v3d->prog.fs->prog_data.fs->flat_shade_flags[i];
-                        }
-
-                        emitted_any = true;
-                }
-
-                if (!emitted_any) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->flat_shade_flags,
+                                        emit_flat_shade_flags)) {
                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
                 }
         }
 
 #if V3D_VERSION >= 40
-        if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
-                bool emitted_any = false;
-
-                for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->centroid_flags); i++) {
-                        if (!v3d->prog.fs->prog_data.fs->centroid_flags[i])
-                                continue;
-
-                        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
-                                flags.varying_offset_v0 = i;
-
-                                if (emitted_any) {
-                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                } else {
-                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
-                                                ((i == 0) ?
-                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED :
-                                                 V3D_VARYING_FLAGS_ACTION_ZEROED);
-
-                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_ZEROED;
-                                }
-
-                                flags.centroid_flags_for_varyings_v024 =
-                                        v3d->prog.fs->prog_data.fs->centroid_flags[i];
-                        }
-
-                        emitted_any = true;
+        if (v3d->dirty & VC5_DIRTY_NOPERSPECTIVE_FLAGS) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->noperspective_flags,
+                                        emit_noperspective_flags)) {
+                        cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
                 }
+        }
 
-                if (!emitted_any) {
+        if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->centroid_flags,
+                                        emit_centroid_flags)) {
                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
                 }
         }
@@ -681,6 +738,10 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                 so->targets[i];
                         struct v3d_resource *rsc = target ?
                                 v3d_resource(target->buffer) : NULL;
+                        struct pipe_shader_state *vs = &v3d->prog.bind_vs->base;
+                        struct pipe_stream_output_info *info = &vs->stream_output;
+                        uint32_t offset = (v3d->streamout.offsets[i] *
+                                           info->stride[i] * 4);
 
 #if V3D_VERSION >= 40
                         if (!target)
@@ -689,9 +750,10 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
                                 output.buffer_address =
                                         cl_address(rsc->bo,
-                                                   target->buffer_offset);
+                                                   target->buffer_offset +
+                                                   offset);
                                 output.buffer_size_in_32_bit_words =
-                                        target->buffer_size >> 2;
+                                        (target->buffer_size - offset) >> 2;
                                 output.buffer_number = i;
                         }
 #else /* V3D_VERSION < 40 */
@@ -699,7 +761,8 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                 if (target) {
                                         output.address =
                                                 cl_address(rsc->bo,
-                                                           target->buffer_offset);
+                                                           target->buffer_offset +
+                                                           offset);
                                 }
                         };
 #endif /* V3D_VERSION < 40 */
@@ -719,4 +782,16 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         }
                 }
         }
+
+#if V3D_VERSION >= 40
+        if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
+                cl_emit(&job->bcl, SAMPLE_STATE, state) {
+                        /* Note: SampleCoverage was handled at the
+                         * state_tracker level by converting to sample_mask.
+                         */
+                        state.coverage = fui(1.0) >> 16;
+                        state.mask = job->msaa ? v3d->sample_mask : 0xf;
+                }
+        }
+#endif
 }