v3d: support transform feedback with geometry shaders
[mesa.git] / src / gallium / drivers / v3d / v3dx_emit.c
index 8a65478a1614e455461ef00a69a5e1dc5f00ad8c..18c2473955bfb0378c6ac5a920411f1538075c15 100644 (file)
@@ -21,7 +21,7 @@
  * IN THE SOFTWARE.
  */
 
-#include "util/u_format.h"
+#include "util/format/u_format.h"
 #include "util/u_half.h"
 #include "v3d_context.h"
 #include "broadcom/common/v3d_macros.h"
@@ -69,7 +69,9 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-                return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
+                return (dst_alpha_one ?
+                        V3D_BLEND_FACTOR_ZERO :
+                        V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
         default:
                 unreachable("Bad blend factor");
         }
@@ -276,20 +278,29 @@ static void
 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
               struct pipe_blend_state *blend, int rt)
 {
-        cl_emit(&job->bcl, BLEND_CONFIG, config) {
-                struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
+        struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
+
+#if V3D_VERSION >= 40
+        /* We don't need to emit blend state for disabled RTs. */
+        if (!rtblend->blend_enable)
+                return;
+#endif
 
+        cl_emit(&job->bcl, BLEND_CFG, config) {
 #if V3D_VERSION >= 40
-                config.render_target_mask = 1 << rt;
+                if (blend->independent_blend_enable)
+                        config.render_target_mask = 1 << rt;
+                else
+                        config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1;
 #else
                 assert(rt == 0);
 #endif
 
-                config.colour_blend_mode = rtblend->rgb_func;
-                config.colour_blend_dst_factor =
+                config.color_blend_mode = rtblend->rgb_func;
+                config.color_blend_dst_factor =
                         v3d_factor(rtblend->rgb_dst_factor,
                                    v3d->blend_dst_alpha_one);
-                config.colour_blend_src_factor =
+                config.color_blend_src_factor =
                         v3d_factor(rtblend->rgb_src_factor,
                                    v3d->blend_dst_alpha_one);
 
@@ -303,6 +314,102 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
         }
 }
 
+static void
+emit_flat_shade_flags(struct v3d_job *job,
+                      int varying_offset,
+                      uint32_t varyings,
+                      enum V3DX(Varying_Flags_Action) lower,
+                      enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.flat_shade_flags_for_varyings_v024 = varyings;
+                flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+
+#if V3D_VERSION >= 40
+static void
+emit_noperspective_flags(struct v3d_job *job,
+                         int varying_offset,
+                         uint32_t varyings,
+                         enum V3DX(Varying_Flags_Action) lower,
+                         enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.non_perspective_flags_for_varyings_v024 = varyings;
+                flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+
+static void
+emit_centroid_flags(struct v3d_job *job,
+                    int varying_offset,
+                    uint32_t varyings,
+                    enum V3DX(Varying_Flags_Action) lower,
+                    enum V3DX(Varying_Flags_Action) higher)
+{
+        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
+                flags.varying_offset_v0 = varying_offset;
+                flags.centroid_flags_for_varyings_v024 = varyings;
+                flags.action_for_centroid_flags_of_lower_numbered_varyings =
+                        lower;
+                flags.action_for_centroid_flags_of_higher_numbered_varyings =
+                        higher;
+        }
+}
+#endif /* V3D_VERSION >= 40 */
+
+static bool
+emit_varying_flags(struct v3d_job *job, uint32_t *flags,
+                   void (*flag_emit_callback)(struct v3d_job *job,
+                                              int varying_offset,
+                                              uint32_t flags,
+                                              enum V3DX(Varying_Flags_Action) lower,
+                                              enum V3DX(Varying_Flags_Action) higher))
+{
+        struct v3d_context *v3d = job->v3d;
+        bool emitted_any = false;
+
+        for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
+                if (!flags[i])
+                        continue;
+
+                if (emitted_any) {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED);
+                } else if (i == 0) {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
+                } else {
+                        flag_emit_callback(job, i, flags[i],
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED,
+                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
+                }
+                emitted_any = true;
+        }
+
+        return emitted_any;
+}
+
+static inline struct v3d_uncompiled_shader *
+get_tf_shader(struct v3d_context *v3d)
+{
+        if (v3d->prog.bind_gs)
+                return v3d->prog.bind_gs;
+        else
+                return v3d->prog.bind_vs;
+}
+
 void
 v3dX(emit_state)(struct pipe_context *pctx)
 {
@@ -343,20 +450,18 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
                         clip.clip_window_left_pixel_coordinate = minx;
                         clip.clip_window_bottom_pixel_coordinate = miny;
-                        clip.clip_window_width_in_pixels = maxx - minx;
-                        clip.clip_window_height_in_pixels = maxy - miny;
-
-#if V3D_VERSION < 41
-                        /* The HW won't entirely clip out when scissor w/h is
-                         * 0.  Just treat it the same as rasterizer discard.
-                         */
-                        if (clip.clip_window_width_in_pixels == 0 ||
-                            clip.clip_window_height_in_pixels == 0) {
+                        if (maxx > minx && maxy > miny) {
+                                clip.clip_window_width_in_pixels = maxx - minx;
+                                clip.clip_window_height_in_pixels = maxy - miny;
+                        } else if (V3D_VERSION < 41) {
+                                /* The HW won't entirely clip out when scissor
+                                 * w/h is 0.  Just treat it the same as
+                                 * rasterizer discard.
+                                 */
                                 rasterizer_discard = true;
                                 clip.clip_window_width_in_pixels = 1;
                                 clip.clip_window_height_in_pixels = 1;
                         }
-#endif
                 }
 
                 job->draw_min_x = MIN2(job->draw_min_x, minx);
@@ -369,7 +474,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
                           VC5_DIRTY_ZSA |
                           VC5_DIRTY_BLEND |
                           VC5_DIRTY_COMPILED_FS)) {
-                cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
+                cl_emit(&job->bcl, CFG_BITS, config) {
                         config.enable_forward_facing_primitive =
                                 !rasterizer_discard &&
                                 !(v3d->rasterizer->base.cull_face &
@@ -387,13 +492,22 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         config.enable_depth_offset =
                                 v3d->rasterizer->base.offset_tri;
 
+                        /* V3D follows GL behavior where the sample mask only
+                         * applies when MSAA is enabled.  Gallium has sample
+                         * mask apply anyway, and the MSAA blit shaders will
+                         * set sample mask without explicitly setting
+                         * rasterizer oversample.  Just force it on here,
+                         * since the blit shaders are the only way to have
+                         * !multisample && samplemask != 0xf.
+                         */
                         config.rasterizer_oversample_mode =
-                                v3d->rasterizer->base.multisample;
+                                v3d->rasterizer->base.multisample ||
+                                v3d->sample_mask != 0xf;
 
                         config.direct3d_provoking_vertex =
                                 v3d->rasterizer->base.flatshade_first;
 
-                        config.blend_enable = v3d->blend->rt[0].blend_enable;
+                        config.blend_enable = v3d->blend->blend_enables;
 
                         /* Note: EZ state may update based on the compiled FS,
                          * along with ZSA
@@ -419,11 +533,15 @@ v3dX(emit_state)(struct pipe_context *pctx)
 
         if (v3d->dirty & VC5_DIRTY_RASTERIZER &&
             v3d->rasterizer->base.offset_tri) {
-                cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
-                        depth.depth_offset_factor =
-                                v3d->rasterizer->offset_factor;
-                        depth.depth_offset_units =
-                                v3d->rasterizer->offset_units;
+                if (job->zsbuf &&
+                    job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
+                        cl_emit_prepacked_sized(&job->bcl,
+                                                v3d->rasterizer->depth_offset_z16,
+                                                cl_packet_length(DEPTH_OFFSET));
+                } else {
+                        cl_emit_prepacked_sized(&job->bcl,
+                                                v3d->rasterizer->depth_offset,
+                                                cl_packet_length(DEPTH_OFFSET));
                 }
         }
 
@@ -452,10 +570,12 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                 v3d->viewport.scale[2];
                 }
                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
-                        clip.minimum_zw = (v3d->viewport.translate[2] -
-                                           v3d->viewport.scale[2]);
-                        clip.maximum_zw = (v3d->viewport.translate[2] +
-                                           v3d->viewport.scale[2]);
+                        float z1 = (v3d->viewport.translate[2] -
+                                    v3d->viewport.scale[2]);
+                        float z2 = (v3d->viewport.translate[2] +
+                                    v3d->viewport.scale[2]);
+                        clip.minimum_zw = MIN2(z1, z2);
+                        clip.maximum_zw = MAX2(z1, z2);
                 }
 
                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
@@ -466,39 +586,35 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
-        if (v3d->dirty & VC5_DIRTY_BLEND && v3d->blend->rt[0].blend_enable) {
-                struct pipe_blend_state *blend = v3d->blend;
+        if (v3d->dirty & VC5_DIRTY_BLEND) {
+                struct v3d_blend_state *blend = v3d->blend;
+
+                if (blend->blend_enables) {
+#if V3D_VERSION >= 40
+                        cl_emit(&job->bcl, BLEND_ENABLES, enables) {
+                                enables.mask = blend->blend_enables;
+                        }
+#endif
 
-                if (blend->independent_blend_enable) {
-                        for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
-                                emit_rt_blend(v3d, job, blend, i);
-                } else {
-                        emit_rt_blend(v3d, job, blend, 0);
+                        if (blend->base.independent_blend_enable) {
+                                for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
+                                        emit_rt_blend(v3d, job, &blend->base, i);
+                        } else {
+                                emit_rt_blend(v3d, job, &blend->base, 0);
+                        }
                 }
         }
 
         if (v3d->dirty & VC5_DIRTY_BLEND) {
-                struct pipe_blend_state *blend = v3d->blend;
-
-                cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
-                        if (blend->independent_blend_enable) {
-                                mask.render_target_0_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 0);
-                                mask.render_target_1_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[1].colormask, 1);
-                                mask.render_target_2_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[2].colormask, 2);
-                                mask.render_target_3_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[3].colormask, 3);
-                        } else {
-                                mask.render_target_0_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 0);
-                                mask.render_target_1_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 1);
-                                mask.render_target_2_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 2);
-                                mask.render_target_3_per_colour_component_write_masks =
-                                        translate_colormask(v3d, blend->rt[0].colormask, 3);
+                struct pipe_blend_state *blend = &v3d->blend->base;
+
+                cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
+                        for (int i = 0; i < 4; i++) {
+                                int rt = blend->independent_blend_enable ? i : 0;
+                                int rt_mask = blend->rt[rt].colormask;
+
+                                mask.mask |= translate_colormask(v3d, rt_mask,
+                                                                 i) << (4 * i);
                         }
                 }
         }
@@ -508,15 +624,15 @@ v3dX(emit_state)(struct pipe_context *pctx)
          */
         if (v3d->dirty & VC5_DIRTY_BLEND_COLOR ||
             (V3D_VERSION < 41 && (v3d->dirty & VC5_DIRTY_BLEND))) {
-                cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
-                        colour.red_f16 = (v3d->swap_color_rb ?
+                cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
+                        color.red_f16 = (v3d->swap_color_rb ?
                                           v3d->blend_color.hf[2] :
                                           v3d->blend_color.hf[0]);
-                        colour.green_f16 = v3d->blend_color.hf[1];
-                        colour.blue_f16 = (v3d->swap_color_rb ?
+                        color.green_f16 = v3d->blend_color.hf[1];
+                        color.blue_f16 = (v3d->swap_color_rb ?
                                            v3d->blend_color.hf[0] :
                                            v3d->blend_color.hf[2]);
-                        colour.alpha_f16 = v3d->blend_color.hf[3];
+                        color.alpha_f16 = v3d->blend_color.hf[3];
                 }
         }
 
@@ -525,7 +641,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
 
                 if (front->enabled) {
-                        cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
+                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
                                                v3d->zsa->stencil_front, config) {
                                 config.stencil_ref_value =
                                         v3d->stencil_ref.ref_value[0];
@@ -533,7 +649,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
 
                 if (back->enabled) {
-                        cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
+                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
                                                v3d->zsa->stencil_back, config) {
                                 config.stencil_ref_value =
                                         v3d->stencil_ref.ref_value[1];
@@ -546,83 +662,36 @@ v3dX(emit_state)(struct pipe_context *pctx)
          * the view, so we merge them together at draw time.
          */
         if (v3d->dirty & VC5_DIRTY_FRAGTEX)
-                emit_textures(v3d, &v3d->fragtex);
+                emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
+
+        if (v3d->dirty & VC5_DIRTY_GEOMTEX)
+                emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
 
         if (v3d->dirty & VC5_DIRTY_VERTTEX)
-                emit_textures(v3d, &v3d->verttex);
+                emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
 #endif
 
         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
-                bool emitted_any = false;
-
-                for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
-                        if (!v3d->prog.fs->prog_data.fs->flat_shade_flags[i])
-                                continue;
-
-                        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
-                                flags.varying_offset_v0 = i;
-
-                                if (emitted_any) {
-                                        flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                        flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                } else {
-                                        flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
-                                                ((i == 0) ?
-                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED :
-                                                 V3D_VARYING_FLAGS_ACTION_ZEROED);
-
-                                        flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_ZEROED;
-                                }
-
-                                flags.flat_shade_flags_for_varyings_v024 =
-                                        v3d->prog.fs->prog_data.fs->flat_shade_flags[i];
-                        }
-
-                        emitted_any = true;
-                }
-
-                if (!emitted_any) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->flat_shade_flags,
+                                        emit_flat_shade_flags)) {
                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
                 }
         }
 
 #if V3D_VERSION >= 40
-        if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
-                bool emitted_any = false;
-
-                for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->centroid_flags); i++) {
-                        if (!v3d->prog.fs->prog_data.fs->centroid_flags[i])
-                                continue;
-
-                        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
-                                flags.varying_offset_v0 = i;
-
-                                if (emitted_any) {
-                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_UNCHANGED;
-                                } else {
-                                        flags.action_for_centroid_flags_of_lower_numbered_varyings =
-                                                ((i == 0) ?
-                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED :
-                                                 V3D_VARYING_FLAGS_ACTION_ZEROED);
-
-                                        flags.action_for_centroid_flags_of_higher_numbered_varyings =
-                                                V3D_VARYING_FLAGS_ACTION_ZEROED;
-                                }
-
-                                flags.centroid_flags_for_varyings_v024 =
-                                        v3d->prog.fs->prog_data.fs->centroid_flags[i];
-                        }
-
-                        emitted_any = true;
+        if (v3d->dirty & VC5_DIRTY_NOPERSPECTIVE_FLAGS) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->noperspective_flags,
+                                        emit_noperspective_flags)) {
+                        cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
                 }
+        }
 
-                if (!emitted_any) {
+        if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
+                if (!emit_varying_flags(job,
+                                        v3d->prog.fs->prog_data.fs->centroid_flags,
+                                        emit_centroid_flags)) {
                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
                 }
         }
@@ -635,52 +704,57 @@ v3dX(emit_state)(struct pipe_context *pctx)
                           VC5_DIRTY_RASTERIZER |
                           VC5_DIRTY_PRIM_MODE)) {
                 struct v3d_streamout_stateobj *so = &v3d->streamout;
-
                 if (so->num_targets) {
                         bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
                                                 v3d->rasterizer->base.point_size_per_vertex);
+                        struct v3d_uncompiled_shader *tf_shader =
+                                get_tf_shader(v3d);
                         uint16_t *tf_specs = (psiz_per_vertex ?
-                                              v3d->prog.bind_vs->tf_specs_psiz :
-                                              v3d->prog.bind_vs->tf_specs);
+                                              tf_shader->tf_specs_psiz :
+                                              tf_shader->tf_specs);
 
 #if V3D_VERSION >= 40
-                        job->tf_enabled = (v3d->prog.bind_vs->num_tf_specs != 0 &&
-                                           v3d->active_queries);
+                        bool tf_enabled = v3d_transform_feedback_enabled(v3d);
+                        job->tf_enabled |= tf_enabled;
 
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                 tfe.number_of_16_bit_output_data_specs_following =
-                                        v3d->prog.bind_vs->num_tf_specs;
-                                tfe.enable = job->tf_enabled;
+                                        tf_shader->num_tf_specs;
+                                tfe.enable = tf_enabled;
                         };
 #else /* V3D_VERSION < 40 */
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
                                 tfe.number_of_32_bit_output_buffer_address_following =
                                         so->num_targets;
                                 tfe.number_of_16_bit_output_data_specs_following =
-                                        v3d->prog.bind_vs->num_tf_specs;
+                                        tf_shader->num_tf_specs;
                         };
 #endif /* V3D_VERSION < 40 */
-                        for (int i = 0; i < v3d->prog.bind_vs->num_tf_specs; i++) {
+                        for (int i = 0; i < tf_shader->num_tf_specs; i++) {
                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
                         }
-                } else if (job->tf_enabled) {
+                } else {
 #if V3D_VERSION >= 40
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                 tfe.enable = false;
                         };
-                        job->tf_enabled = false;
 #endif /* V3D_VERSION >= 40 */
                 }
         }
 
         /* Set up the trasnform feedback buffers. */
         if (v3d->dirty & VC5_DIRTY_STREAMOUT) {
+                struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
                 struct v3d_streamout_stateobj *so = &v3d->streamout;
                 for (int i = 0; i < so->num_targets; i++) {
                         const struct pipe_stream_output_target *target =
                                 so->targets[i];
                         struct v3d_resource *rsc = target ?
                                 v3d_resource(target->buffer) : NULL;
+                        struct pipe_shader_state *ss = &tf_shader->base;
+                        struct pipe_stream_output_info *info = &ss->stream_output;
+                        uint32_t offset = (v3d->streamout.offsets[i] *
+                                           info->stride[i] * 4);
 
 #if V3D_VERSION >= 40
                         if (!target)
@@ -689,9 +763,10 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
                                 output.buffer_address =
                                         cl_address(rsc->bo,
-                                                   target->buffer_offset);
+                                                   target->buffer_offset +
+                                                   offset);
                                 output.buffer_size_in_32_bit_words =
-                                        target->buffer_size >> 2;
+                                        (target->buffer_size - offset) >> 2;
                                 output.buffer_number = i;
                         }
 #else /* V3D_VERSION < 40 */
@@ -699,13 +774,14 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                 if (target) {
                                         output.address =
                                                 cl_address(rsc->bo,
-                                                           target->buffer_offset);
+                                                           target->buffer_offset +
+                                                           offset);
                                 }
                         };
 #endif /* V3D_VERSION < 40 */
                         if (target) {
-                                v3d_job_add_write_resource(v3d->job,
-                                                           target->buffer);
+                                v3d_job_add_tf_write_resource(v3d->job,
+                                                              target->buffer);
                         }
                         /* XXX: buffer_size? */
                 }
@@ -713,10 +789,21 @@ v3dX(emit_state)(struct pipe_context *pctx)
 
         if (v3d->dirty & VC5_DIRTY_OQ) {
                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
-                        job->oq_enabled = v3d->active_queries && v3d->current_oq;
-                        if (job->oq_enabled) {
+                        if (v3d->active_queries && v3d->current_oq) {
                                 counter.address = cl_address(v3d->current_oq, 0);
                         }
                 }
         }
+
+#if V3D_VERSION >= 40
+        if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
+                cl_emit(&job->bcl, SAMPLE_STATE, state) {
+                        /* Note: SampleCoverage was handled at the
+                         * state_tracker level by converting to sample_mask.
+                         */
+                        state.coverage = 1.0;
+                        state.mask = job->msaa ? v3d->sample_mask : 0xf;
+                }
+        }
+#endif
 }