src/gallium/drivers/v3d/v3dx_emit.c

   1 /*
   2  * Copyright © 2014-2017 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "util/u_format.h"
  25 #include "util/u_half.h"
  26 #include "v3d_context.h"
  27 #include "broadcom/common/v3d_macros.h"
  28 #include "broadcom/cle/v3dx_pack.h"
  29 #include "broadcom/compiler/v3d_compiler.h"
  30
  31 static uint8_t
  32 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
  33 {
  34         /* We may get a bad blendfactor when blending is disabled. */
  35         if (factor == 0)
  36                 return V3D_BLEND_FACTOR_ZERO;
  37
  38         switch (factor) {
  39         case PIPE_BLENDFACTOR_ZERO:
  40                 return V3D_BLEND_FACTOR_ZERO;
  41         case PIPE_BLENDFACTOR_ONE:
  42                 return V3D_BLEND_FACTOR_ONE;
  43         case PIPE_BLENDFACTOR_SRC_COLOR:
  44                 return V3D_BLEND_FACTOR_SRC_COLOR;
  45         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  46                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
  47         case PIPE_BLENDFACTOR_DST_COLOR:
  48                 return V3D_BLEND_FACTOR_DST_COLOR;
  49         case PIPE_BLENDFACTOR_INV_DST_COLOR:
  50                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
  51         case PIPE_BLENDFACTOR_SRC_ALPHA:
  52                 return V3D_BLEND_FACTOR_SRC_ALPHA;
  53         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  54                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
  55         case PIPE_BLENDFACTOR_DST_ALPHA:
  56                 return (dst_alpha_one ?
  57                         V3D_BLEND_FACTOR_ONE :
  58                         V3D_BLEND_FACTOR_DST_ALPHA);
  59         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  60                 return (dst_alpha_one ?
  61                         V3D_BLEND_FACTOR_ZERO :
  62                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
  63         case PIPE_BLENDFACTOR_CONST_COLOR:
  64                 return V3D_BLEND_FACTOR_CONST_COLOR;
  65         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  66                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
  67         case PIPE_BLENDFACTOR_CONST_ALPHA:
  68                 return V3D_BLEND_FACTOR_CONST_ALPHA;
  69         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  70                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
  71         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  72                 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
  73         default:
  74                 unreachable("Bad blend factor");
  75         }
  76 }
  77
  78 static inline uint16_t
  79 swizzled_border_color(const struct v3d_device_info *devinfo,
  80                       struct pipe_sampler_state *sampler,
  81                       struct v3d_sampler_view *sview,
  82                       int chan)
  83 {
  84         const struct util_format_description *desc =
  85                 util_format_description(sview->base.format);
  86         uint8_t swiz = chan;
  87
  88         /* If we're doing swizzling in the sampler, then only rearrange the
  89          * border color for the mismatch between the VC5 texture format and
  90          * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
  91          * the sampler's swizzle.
  92          *
  93          * For swizzling in the shader, we don't do any pre-swizzling of the
  94          * border color.
  95          */
  96         if (v3d_get_tex_return_size(devinfo, sview->base.format,
  97                                     sampler->compare_mode) != 32)
  98                 swiz = desc->swizzle[swiz];
  99
 100         switch (swiz) {
 101         case PIPE_SWIZZLE_0:
 102                 return util_float_to_half(0.0);
 103         case PIPE_SWIZZLE_1:
 104                 return util_float_to_half(1.0);
 105         default:
 106                 return util_float_to_half(sampler->border_color.f[swiz]);
 107         }
 108 }
 109
 110 #if V3D_VERSION < 40
 111 static uint32_t
 112 translate_swizzle(unsigned char pipe_swizzle)
 113 {
 114         switch (pipe_swizzle) {
 115         case PIPE_SWIZZLE_0:
 116                 return 0;
 117         case PIPE_SWIZZLE_1:
 118                 return 1;
 119         case PIPE_SWIZZLE_X:
 120         case PIPE_SWIZZLE_Y:
 121         case PIPE_SWIZZLE_Z:
 122         case PIPE_SWIZZLE_W:
 123                 return 2 + pipe_swizzle;
 124         default:
 125                 unreachable("unknown swizzle");
 126         }
 127 }
 128
 129 static void
 130 emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
 131                  int i)
 132 {
 133         struct v3d_job *job = v3d->job;
 134         struct pipe_sampler_state *psampler = stage_tex->samplers[i];
 135         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
 136         struct pipe_sampler_view *psview = stage_tex->textures[i];
 137         struct v3d_sampler_view *sview = v3d_sampler_view(psview);
 138         struct pipe_resource *prsc = psview->texture;
 139         struct v3d_resource *rsc = v3d_resource(prsc);
 140         const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
 141
 142         stage_tex->texture_state[i].offset =
 143                 v3d_cl_ensure_space(&job->indirect,
 144                                     cl_packet_length(TEXTURE_SHADER_STATE),
 145                                     32);
 146         v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
 147                              job->indirect.bo);
 148
 149         uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
 150                                                        psampler->compare_mode);
 151
 152         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
 153                 /* XXX */
 154                 .border_color_red = swizzled_border_color(devinfo, psampler,
 155                                                           sview, 0),
 156                 .border_color_green = swizzled_border_color(devinfo, psampler,
 157                                                             sview, 1),
 158                 .border_color_blue = swizzled_border_color(devinfo, psampler,
 159                                                            sview, 2),
 160                 .border_color_alpha = swizzled_border_color(devinfo, psampler,
 161                                                             sview, 3),
 162
 163                 /* In the normal texturing path, the LOD gets clamped between
 164                  * min/max, and the base_level field (set in the sampler view
 165                  * from first_level) only decides where the min/mag switch
 166                  * happens, so we need to use the LOD clamps to keep us
 167                  * between min and max.
 168                  *
 169                  * For txf, the LOD clamp is still used, despite GL not
 170                  * wanting that.  We will need to have a separate
 171                  * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
 172                  * support txf properly.
 173                  */
 174                 .min_level_of_detail = MIN2(psview->u.tex.first_level +
 175                                             MAX2(psampler->min_lod, 0),
 176                                             psview->u.tex.last_level),
 177                 .max_level_of_detail = MIN2(psview->u.tex.first_level +
 178                                             psampler->max_lod,
 179                                             psview->u.tex.last_level),
 180
 181                 .texture_base_pointer = cl_address(rsc->bo,
 182                                                    rsc->slices[0].offset),
 183
 184                 .output_32_bit = return_size == 32,
 185         };
 186
 187         /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
 188          * 32-bit, we leave swizzling up to the shader compiler.
 189          *
 190          * Note: Contrary to the docs, the swizzle still applies even if the
 191          * return size is 32.  It's just that you probably want to swizzle in
 192          * the shader, because you need the Y/Z/W channels to be defined.
 193          */
 194         if (return_size == 32) {
 195                 unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
 196                 unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
 197                 unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
 198                 unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
 199         } else {
 200                 unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
 201                 unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
 202                 unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
 203                 unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
 204         }
 205
 206         int min_img_filter = psampler->min_img_filter;
 207         int min_mip_filter = psampler->min_mip_filter;
 208         int mag_img_filter = psampler->mag_img_filter;
 209
 210         if (return_size == 32) {
 211                 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
 212                 min_img_filter = PIPE_TEX_FILTER_NEAREST;
 213                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
 214         }
 215
 216         bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
 217         switch (min_mip_filter) {
 218         case PIPE_TEX_MIPFILTER_NONE:
 219                 unpacked.filter += min_nearest ? 2 : 0;
 220                 break;
 221         case PIPE_TEX_MIPFILTER_NEAREST:
 222                 unpacked.filter += min_nearest ? 4 : 8;
 223                 break;
 224         case PIPE_TEX_MIPFILTER_LINEAR:
 225                 unpacked.filter += min_nearest ? 4 : 8;
 226                 unpacked.filter += 2;
 227                 break;
 228         }
 229
 230         if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
 231                 unpacked.filter++;
 232
 233         if (psampler->max_anisotropy > 8)
 234                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
 235         else if (psampler->max_anisotropy > 4)
 236                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
 237         else if (psampler->max_anisotropy > 2)
 238                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
 239         else if (psampler->max_anisotropy)
 240                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
 241
 242         uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
 243         cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
 244
 245         for (int i = 0; i < ARRAY_SIZE(packed); i++)
 246                 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
 247
 248         /* TMU indirect structs need to be 32b aligned. */
 249         v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
 250         cl_emit_prepacked(&job->indirect, &packed);
 251 }
 252
 253 static void
 254 emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
 255 {
 256         for (int i = 0; i < stage_tex->num_textures; i++) {
 257                 if (stage_tex->textures[i])
 258                         emit_one_texture(v3d, stage_tex, i);
 259         }
 260 }
 261 #endif /* V3D_VERSION < 40 */
 262
 263 static uint32_t
 264 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
 265 {
 266         if (v3d->swap_color_rb & (1 << rt)) {
 267                 colormask = ((colormask & (2 | 8)) |
 268                              ((colormask & 1) << 2) |
 269                              ((colormask & 4) >> 2));
 270         }
 271
 272         return (~colormask) & 0xf;
 273 }
 274
 275 static void
 276 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
 277               struct pipe_blend_state *blend, int rt)
 278 {
 279         cl_emit(&job->bcl, BLEND_CONFIG, config) {
 280                 struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
 281
 282 #if V3D_VERSION >= 40
 283                 config.render_target_mask = 1 << rt;
 284 #else
 285                 assert(rt == 0);
 286 #endif
 287
 288                 config.colour_blend_mode = rtblend->rgb_func;
 289                 config.colour_blend_dst_factor =
 290                         v3d_factor(rtblend->rgb_dst_factor,
 291                                    v3d->blend_dst_alpha_one);
 292                 config.colour_blend_src_factor =
 293                         v3d_factor(rtblend->rgb_src_factor,
 294                                    v3d->blend_dst_alpha_one);
 295
 296                 config.alpha_blend_mode = rtblend->alpha_func;
 297                 config.alpha_blend_dst_factor =
 298                         v3d_factor(rtblend->alpha_dst_factor,
 299                                    v3d->blend_dst_alpha_one);
 300                 config.alpha_blend_src_factor =
 301                         v3d_factor(rtblend->alpha_src_factor,
 302                                    v3d->blend_dst_alpha_one);
 303         }
 304 }
 305
 306 void
 307 v3dX(emit_state)(struct pipe_context *pctx)
 308 {
 309         struct v3d_context *v3d = v3d_context(pctx);
 310         struct v3d_job *job = v3d->job;
 311         bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
 312
 313         if (v3d->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
 314                           VC5_DIRTY_RASTERIZER)) {
 315                 float *vpscale = v3d->viewport.scale;
 316                 float *vptranslate = v3d->viewport.translate;
 317                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
 318                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
 319                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
 320                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
 321
 322                 /* Clip to the scissor if it's enabled, but still clip to the
 323                  * drawable regardless since that controls where the binner
 324                  * tries to put things.
 325                  *
 326                  * Additionally, always clip the rendering to the viewport,
 327                  * since the hardware does guardband clipping, meaning
 328                  * primitives would rasterize outside of the view volume.
 329                  */
 330                 uint32_t minx, miny, maxx, maxy;
 331                 if (!v3d->rasterizer->base.scissor) {
 332                         minx = MAX2(vp_minx, 0);
 333                         miny = MAX2(vp_miny, 0);
 334                         maxx = MIN2(vp_maxx, job->draw_width);
 335                         maxy = MIN2(vp_maxy, job->draw_height);
 336                 } else {
 337                         minx = MAX2(vp_minx, v3d->scissor.minx);
 338                         miny = MAX2(vp_miny, v3d->scissor.miny);
 339                         maxx = MIN2(vp_maxx, v3d->scissor.maxx);
 340                         maxy = MIN2(vp_maxy, v3d->scissor.maxy);
 341                 }
 342
 343                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
 344                         clip.clip_window_left_pixel_coordinate = minx;
 345                         clip.clip_window_bottom_pixel_coordinate = miny;
 346                         if (maxx > minx && maxy > miny) {
 347                                 clip.clip_window_width_in_pixels = maxx - minx;
 348                                 clip.clip_window_height_in_pixels = maxy - miny;
 349                         } else if (V3D_VERSION < 41) {
 350                                 /* The HW won't entirely clip out when scissor
 351                                  * w/h is 0.  Just treat it the same as
 352                                  * rasterizer discard.
 353                                  */
 354                                 rasterizer_discard = true;
 355                                 clip.clip_window_width_in_pixels = 1;
 356                                 clip.clip_window_height_in_pixels = 1;
 357                         }
 358                 }
 359
 360                 job->draw_min_x = MIN2(job->draw_min_x, minx);
 361                 job->draw_min_y = MIN2(job->draw_min_y, miny);
 362                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
 363                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
 364         }
 365
 366         if (v3d->dirty & (VC5_DIRTY_RASTERIZER |
 367                           VC5_DIRTY_ZSA |
 368                           VC5_DIRTY_BLEND |
 369                           VC5_DIRTY_COMPILED_FS)) {
 370                 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
 371                         config.enable_forward_facing_primitive =
 372                                 !rasterizer_discard &&
 373                                 !(v3d->rasterizer->base.cull_face &
 374                                   PIPE_FACE_FRONT);
 375                         config.enable_reverse_facing_primitive =
 376                                 !rasterizer_discard &&
 377                                 !(v3d->rasterizer->base.cull_face &
 378                                   PIPE_FACE_BACK);
 379                         /* This seems backwards, but it's what gets the
 380                          * clipflat test to pass.
 381                          */
 382                         config.clockwise_primitives =
 383                                 v3d->rasterizer->base.front_ccw;
 384
 385                         config.enable_depth_offset =
 386                                 v3d->rasterizer->base.offset_tri;
 387
 388                         /* V3D follows GL behavior where the sample mask only
 389                          * applies when MSAA is enabled.  Gallium has sample
 390                          * mask apply anyway, and the MSAA blit shaders will
 391                          * set sample mask without explicitly setting
 392                          * rasterizer oversample.  Just force it on here,
 393                          * since the blit shaders are the only way to have
 394                          * !multisample && samplemask != 0xf.
 395                          */
 396                         config.rasterizer_oversample_mode =
 397                                 v3d->rasterizer->base.multisample ||
 398                                 v3d->sample_mask != 0xf;
 399
 400                         config.direct3d_provoking_vertex =
 401                                 v3d->rasterizer->base.flatshade_first;
 402
 403                         config.blend_enable = v3d->blend->rt[0].blend_enable;
 404
 405                         /* Note: EZ state may update based on the compiled FS,
 406                          * along with ZSA
 407                          */
 408                         config.early_z_updates_enable =
 409                                 (job->ez_state != VC5_EZ_DISABLED);
 410                         if (v3d->zsa->base.depth.enabled) {
 411                                 config.z_updates_enable =
 412                                         v3d->zsa->base.depth.writemask;
 413                                 config.early_z_enable =
 414                                         config.early_z_updates_enable;
 415                                 config.depth_test_function =
 416                                         v3d->zsa->base.depth.func;
 417                         } else {
 418                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
 419                         }
 420
 421                         config.stencil_enable =
 422                                 v3d->zsa->base.stencil[0].enabled;
 423                 }
 424
 425         }
 426
 427         if (v3d->dirty & VC5_DIRTY_RASTERIZER &&
 428             v3d->rasterizer->base.offset_tri) {
 429                 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
 430                         depth.depth_offset_factor =
 431                                 v3d->rasterizer->offset_factor;
 432                         if (job->zsbuf &&
 433                             job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
 434                                 depth.depth_offset_units =
 435                                         v3d->rasterizer->z16_offset_units;
 436                         } else {
 437                                 depth.depth_offset_units =
 438                                         v3d->rasterizer->offset_units;
 439                         }
 440                 }
 441         }
 442
 443         if (v3d->dirty & VC5_DIRTY_RASTERIZER) {
 444                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
 445                         point_size.point_size = v3d->rasterizer->point_size;
 446                 }
 447
 448                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
 449                         line_width.line_width = v3d->rasterizer->base.line_width;
 450                 }
 451         }
 452
 453         if (v3d->dirty & VC5_DIRTY_VIEWPORT) {
 454                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
 455                         clip.viewport_half_width_in_1_256th_of_pixel =
 456                                 v3d->viewport.scale[0] * 256.0f;
 457                         clip.viewport_half_height_in_1_256th_of_pixel =
 458                                 v3d->viewport.scale[1] * 256.0f;
 459                 }
 460
 461                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
 462                         clip.viewport_z_offset_zc_to_zs =
 463                                 v3d->viewport.translate[2];
 464                         clip.viewport_z_scale_zc_to_zs =
 465                                 v3d->viewport.scale[2];
 466                 }
 467                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
 468                         clip.minimum_zw = (v3d->viewport.translate[2] -
 469                                            v3d->viewport.scale[2]);
 470                         clip.maximum_zw = (v3d->viewport.translate[2] +
 471                                            v3d->viewport.scale[2]);
 472                 }
 473
 474                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
 475                         vp.viewport_centre_x_coordinate =
 476                                 v3d->viewport.translate[0];
 477                         vp.viewport_centre_y_coordinate =
 478                                 v3d->viewport.translate[1];
 479                 }
 480         }
 481
 482         if (v3d->dirty & VC5_DIRTY_BLEND && v3d->blend->rt[0].blend_enable) {
 483                 struct pipe_blend_state *blend = v3d->blend;
 484
 485                 if (blend->independent_blend_enable) {
 486                         for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
 487                                 emit_rt_blend(v3d, job, blend, i);
 488                 } else {
 489                         emit_rt_blend(v3d, job, blend, 0);
 490                 }
 491         }
 492
 493         if (v3d->dirty & VC5_DIRTY_BLEND) {
 494                 struct pipe_blend_state *blend = v3d->blend;
 495
 496                 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
 497                         if (blend->independent_blend_enable) {
 498                                 mask.render_target_0_per_colour_component_write_masks =
 499                                         translate_colormask(v3d, blend->rt[0].colormask, 0);
 500                                 mask.render_target_1_per_colour_component_write_masks =
 501                                         translate_colormask(v3d, blend->rt[1].colormask, 1);
 502                                 mask.render_target_2_per_colour_component_write_masks =
 503                                         translate_colormask(v3d, blend->rt[2].colormask, 2);
 504                                 mask.render_target_3_per_colour_component_write_masks =
 505                                         translate_colormask(v3d, blend->rt[3].colormask, 3);
 506                         } else {
 507                                 mask.render_target_0_per_colour_component_write_masks =
 508                                         translate_colormask(v3d, blend->rt[0].colormask, 0);
 509                                 mask.render_target_1_per_colour_component_write_masks =
 510                                         translate_colormask(v3d, blend->rt[0].colormask, 1);
 511                                 mask.render_target_2_per_colour_component_write_masks =
 512                                         translate_colormask(v3d, blend->rt[0].colormask, 2);
 513                                 mask.render_target_3_per_colour_component_write_masks =
 514                                         translate_colormask(v3d, blend->rt[0].colormask, 3);
 515                         }
 516                 }
 517         }
 518
 519         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
 520          * color.
 521          */
 522         if (v3d->dirty & VC5_DIRTY_BLEND_COLOR ||
 523             (V3D_VERSION < 41 && (v3d->dirty & VC5_DIRTY_BLEND))) {
 524                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
 525                         colour.red_f16 = (v3d->swap_color_rb ?
 526                                           v3d->blend_color.hf[2] :
 527                                           v3d->blend_color.hf[0]);
 528                         colour.green_f16 = v3d->blend_color.hf[1];
 529                         colour.blue_f16 = (v3d->swap_color_rb ?
 530                                            v3d->blend_color.hf[0] :
 531                                            v3d->blend_color.hf[2]);
 532                         colour.alpha_f16 = v3d->blend_color.hf[3];
 533                 }
 534         }
 535
 536         if (v3d->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
 537                 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
 538                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
 539
 540                 if (front->enabled) {
 541                         cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
 542                                                v3d->zsa->stencil_front, config) {
 543                                 config.stencil_ref_value =
 544                                         v3d->stencil_ref.ref_value[0];
 545                         }
 546                 }
 547
 548                 if (back->enabled) {
 549                         cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
 550                                                v3d->zsa->stencil_back, config) {
 551                                 config.stencil_ref_value =
 552                                         v3d->stencil_ref.ref_value[1];
 553                         }
 554                 }
 555         }
 556
 557 #if V3D_VERSION < 40
 558         /* Pre-4.x, we have texture state that depends on both the sampler and
 559          * the view, so we merge them together at draw time.
 560          */
 561         if (v3d->dirty & VC5_DIRTY_FRAGTEX)
 562                 emit_textures(v3d, &v3d->fragtex);
 563
 564         if (v3d->dirty & VC5_DIRTY_VERTTEX)
 565                 emit_textures(v3d, &v3d->verttex);
 566 #endif
 567
 568         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
 569                 bool emitted_any = false;
 570
 571                 for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
 572                         if (!v3d->prog.fs->prog_data.fs->flat_shade_flags[i])
 573                                 continue;
 574
 575                         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
 576                                 flags.varying_offset_v0 = i;
 577
 578                                 if (emitted_any) {
 579                                         flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
 580                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
 581                                         flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
 582                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
 583                                 } else {
 584                                         flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
 585                                                 ((i == 0) ?
 586                                                  V3D_VARYING_FLAGS_ACTION_UNCHANGED :
 587                                                  V3D_VARYING_FLAGS_ACTION_ZEROED);
 588
 589                                         flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
 590                                                 V3D_VARYING_FLAGS_ACTION_ZEROED;
 591                                 }
 592
 593                                 flags.flat_shade_flags_for_varyings_v024 =
 594                                         v3d->prog.fs->prog_data.fs->flat_shade_flags[i];
 595                         }
 596
 597                         emitted_any = true;
 598                 }
 599
 600                 if (!emitted_any) {
 601                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
 602                 }
 603         }
 604
 605 #if V3D_VERSION >= 40
 606         if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
 607                 bool emitted_any = false;
 608
 609                 for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->centroid_flags); i++) {
 610                         if (!v3d->prog.fs->prog_data.fs->centroid_flags[i])
 611                                 continue;
 612
 613                         cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
 614                                 flags.varying_offset_v0 = i;
 615
 616                                 if (emitted_any) {
 617                                         flags.action_for_centroid_flags_of_lower_numbered_varyings =
 618                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
 619                                         flags.action_for_centroid_flags_of_higher_numbered_varyings =
 620                                                 V3D_VARYING_FLAGS_ACTION_UNCHANGED;
 621                                 } else {
 622                                         flags.action_for_centroid_flags_of_lower_numbered_varyings =
 623                                                 ((i == 0) ?
 624                                                  V3D_VARYING_FLAGS_ACTION_UNCHANGED :
 625                                                  V3D_VARYING_FLAGS_ACTION_ZEROED);
 626
 627                                         flags.action_for_centroid_flags_of_higher_numbered_varyings =
 628                                                 V3D_VARYING_FLAGS_ACTION_ZEROED;
 629                                 }
 630
 631                                 flags.centroid_flags_for_varyings_v024 =
 632                                         v3d->prog.fs->prog_data.fs->centroid_flags[i];
 633                         }
 634
 635                         emitted_any = true;
 636                 }
 637
 638                 if (!emitted_any) {
 639                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
 640                 }
 641         }
 642 #endif
 643
 644         /* Set up the transform feedback data specs (which VPM entries to
 645          * output to which buffers).
 646          */
 647         if (v3d->dirty & (VC5_DIRTY_STREAMOUT |
 648                           VC5_DIRTY_RASTERIZER |
 649                           VC5_DIRTY_PRIM_MODE)) {
 650                 struct v3d_streamout_stateobj *so = &v3d->streamout;
 651
 652                 if (so->num_targets) {
 653                         bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
 654                                                 v3d->rasterizer->base.point_size_per_vertex);
 655                         uint16_t *tf_specs = (psiz_per_vertex ?
 656                                               v3d->prog.bind_vs->tf_specs_psiz :
 657                                               v3d->prog.bind_vs->tf_specs);
 658
 659 #if V3D_VERSION >= 40
 660                         job->tf_enabled = (v3d->prog.bind_vs->num_tf_specs != 0 &&
 661                                            v3d->active_queries);
 662
 663                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
 664                                 tfe.number_of_16_bit_output_data_specs_following =
 665                                         v3d->prog.bind_vs->num_tf_specs;
 666                                 tfe.enable = job->tf_enabled;
 667                         };
 668 #else /* V3D_VERSION < 40 */
 669                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
 670                                 tfe.number_of_32_bit_output_buffer_address_following =
 671                                         so->num_targets;
 672                                 tfe.number_of_16_bit_output_data_specs_following =
 673                                         v3d->prog.bind_vs->num_tf_specs;
 674                         };
 675 #endif /* V3D_VERSION < 40 */
 676                         for (int i = 0; i < v3d->prog.bind_vs->num_tf_specs; i++) {
 677                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
 678                         }
 679                 } else if (job->tf_enabled) {
 680 #if V3D_VERSION >= 40
 681                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
 682                                 tfe.enable = false;
 683                         };
 684                         job->tf_enabled = false;
 685 #endif /* V3D_VERSION >= 40 */
 686                 }
 687         }
 688
 689         /* Set up the trasnform feedback buffers. */
 690         if (v3d->dirty & VC5_DIRTY_STREAMOUT) {
 691                 struct v3d_streamout_stateobj *so = &v3d->streamout;
 692                 for (int i = 0; i < so->num_targets; i++) {
 693                         const struct pipe_stream_output_target *target =
 694                                 so->targets[i];
 695                         struct v3d_resource *rsc = target ?
 696                                 v3d_resource(target->buffer) : NULL;
 697                         struct pipe_shader_state *vs = &v3d->prog.bind_vs->base;
 698                         struct pipe_stream_output_info *info = &vs->stream_output;
 699                         uint32_t offset = (v3d->streamout.offsets[i] *
 700                                            info->stride[i] * 4);
 701
 702 #if V3D_VERSION >= 40
 703                         if (!target)
 704                                 continue;
 705
 706                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
 707                                 output.buffer_address =
 708                                         cl_address(rsc->bo,
 709                                                    target->buffer_offset +
 710                                                    offset);
 711                                 output.buffer_size_in_32_bit_words =
 712                                         (target->buffer_size - offset) >> 2;
 713                                 output.buffer_number = i;
 714                         }
 715 #else /* V3D_VERSION < 40 */
 716                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
 717                                 if (target) {
 718                                         output.address =
 719                                                 cl_address(rsc->bo,
 720                                                            target->buffer_offset +
 721                                                            offset);
 722                                 }
 723                         };
 724 #endif /* V3D_VERSION < 40 */
 725                         if (target) {
 726                                 v3d_job_add_write_resource(v3d->job,
 727                                                            target->buffer);
 728                         }
 729                         /* XXX: buffer_size? */
 730                 }
 731         }
 732
 733         if (v3d->dirty & VC5_DIRTY_OQ) {
 734                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
 735                         job->oq_enabled = v3d->active_queries && v3d->current_oq;
 736                         if (job->oq_enabled) {
 737                                 counter.address = cl_address(v3d->current_oq, 0);
 738                         }
 739                 }
 740         }
 741
 742 #if V3D_VERSION >= 40
 743         if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
 744                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
 745                         /* Note: SampleCoverage was handled at the
 746                          * state_tracker level by converting to sample_mask.
 747                          */
 748                         state.coverage = fui(1.0) >> 16;
 749                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
 750                 }
 751         }
 752 #endif
 753 }