src/gallium/drivers/v3d/v3dx_emit.c

   1 /*
   2  * Copyright © 2014-2017 Broadcom
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 #include "util/format/u_format.h"
  25 #include "util/u_half.h"
  26 #include "v3d_context.h"
  27 #include "broadcom/common/v3d_macros.h"
  28 #include "broadcom/cle/v3dx_pack.h"
  29 #include "broadcom/compiler/v3d_compiler.h"
  30
  31 static uint8_t
  32 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
  33 {
  34         /* We may get a bad blendfactor when blending is disabled. */
  35         if (factor == 0)
  36                 return V3D_BLEND_FACTOR_ZERO;
  37
  38         switch (factor) {
  39         case PIPE_BLENDFACTOR_ZERO:
  40                 return V3D_BLEND_FACTOR_ZERO;
  41         case PIPE_BLENDFACTOR_ONE:
  42                 return V3D_BLEND_FACTOR_ONE;
  43         case PIPE_BLENDFACTOR_SRC_COLOR:
  44                 return V3D_BLEND_FACTOR_SRC_COLOR;
  45         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  46                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
  47         case PIPE_BLENDFACTOR_DST_COLOR:
  48                 return V3D_BLEND_FACTOR_DST_COLOR;
  49         case PIPE_BLENDFACTOR_INV_DST_COLOR:
  50                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
  51         case PIPE_BLENDFACTOR_SRC_ALPHA:
  52                 return V3D_BLEND_FACTOR_SRC_ALPHA;
  53         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  54                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
  55         case PIPE_BLENDFACTOR_DST_ALPHA:
  56                 return (dst_alpha_one ?
  57                         V3D_BLEND_FACTOR_ONE :
  58                         V3D_BLEND_FACTOR_DST_ALPHA);
  59         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  60                 return (dst_alpha_one ?
  61                         V3D_BLEND_FACTOR_ZERO :
  62                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
  63         case PIPE_BLENDFACTOR_CONST_COLOR:
  64                 return V3D_BLEND_FACTOR_CONST_COLOR;
  65         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  66                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
  67         case PIPE_BLENDFACTOR_CONST_ALPHA:
  68                 return V3D_BLEND_FACTOR_CONST_ALPHA;
  69         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  70                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
  71         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  72                 return (dst_alpha_one ?
  73                         V3D_BLEND_FACTOR_ZERO :
  74                         V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
  75         default:
  76                 unreachable("Bad blend factor");
  77         }
  78 }
  79
  80 static inline uint16_t
  81 swizzled_border_color(const struct v3d_device_info *devinfo,
  82                       struct pipe_sampler_state *sampler,
  83                       struct v3d_sampler_view *sview,
  84                       int chan)
  85 {
  86         const struct util_format_description *desc =
  87                 util_format_description(sview->base.format);
  88         uint8_t swiz = chan;
  89
  90         /* If we're doing swizzling in the sampler, then only rearrange the
  91          * border color for the mismatch between the VC5 texture format and
  92          * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
  93          * the sampler's swizzle.
  94          *
  95          * For swizzling in the shader, we don't do any pre-swizzling of the
  96          * border color.
  97          */
  98         if (v3d_get_tex_return_size(devinfo, sview->base.format,
  99                                     sampler->compare_mode) != 32)
 100                 swiz = desc->swizzle[swiz];
 101
 102         switch (swiz) {
 103         case PIPE_SWIZZLE_0:
 104                 return util_float_to_half(0.0);
 105         case PIPE_SWIZZLE_1:
 106                 return util_float_to_half(1.0);
 107         default:
 108                 return util_float_to_half(sampler->border_color.f[swiz]);
 109         }
 110 }
 111
 112 #if V3D_VERSION < 40
 113 static uint32_t
 114 translate_swizzle(unsigned char pipe_swizzle)
 115 {
 116         switch (pipe_swizzle) {
 117         case PIPE_SWIZZLE_0:
 118                 return 0;
 119         case PIPE_SWIZZLE_1:
 120                 return 1;
 121         case PIPE_SWIZZLE_X:
 122         case PIPE_SWIZZLE_Y:
 123         case PIPE_SWIZZLE_Z:
 124         case PIPE_SWIZZLE_W:
 125                 return 2 + pipe_swizzle;
 126         default:
 127                 unreachable("unknown swizzle");
 128         }
 129 }
 130
 131 static void
 132 emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
 133                  int i)
 134 {
 135         struct v3d_job *job = v3d->job;
 136         struct pipe_sampler_state *psampler = stage_tex->samplers[i];
 137         struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
 138         struct pipe_sampler_view *psview = stage_tex->textures[i];
 139         struct v3d_sampler_view *sview = v3d_sampler_view(psview);
 140         struct pipe_resource *prsc = psview->texture;
 141         struct v3d_resource *rsc = v3d_resource(prsc);
 142         const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
 143
 144         stage_tex->texture_state[i].offset =
 145                 v3d_cl_ensure_space(&job->indirect,
 146                                     cl_packet_length(TEXTURE_SHADER_STATE),
 147                                     32);
 148         v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
 149                              job->indirect.bo);
 150
 151         uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
 152                                                        psampler->compare_mode);
 153
 154         struct V3D33_TEXTURE_SHADER_STATE unpacked = {
 155                 /* XXX */
 156                 .border_color_red = swizzled_border_color(devinfo, psampler,
 157                                                           sview, 0),
 158                 .border_color_green = swizzled_border_color(devinfo, psampler,
 159                                                             sview, 1),
 160                 .border_color_blue = swizzled_border_color(devinfo, psampler,
 161                                                            sview, 2),
 162                 .border_color_alpha = swizzled_border_color(devinfo, psampler,
 163                                                             sview, 3),
 164
 165                 /* In the normal texturing path, the LOD gets clamped between
 166                  * min/max, and the base_level field (set in the sampler view
 167                  * from first_level) only decides where the min/mag switch
 168                  * happens, so we need to use the LOD clamps to keep us
 169                  * between min and max.
 170                  *
 171                  * For txf, the LOD clamp is still used, despite GL not
 172                  * wanting that.  We will need to have a separate
 173                  * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
 174                  * support txf properly.
 175                  */
 176                 .min_level_of_detail = MIN2(psview->u.tex.first_level +
 177                                             MAX2(psampler->min_lod, 0),
 178                                             psview->u.tex.last_level),
 179                 .max_level_of_detail = MIN2(psview->u.tex.first_level +
 180                                             psampler->max_lod,
 181                                             psview->u.tex.last_level),
 182
 183                 .texture_base_pointer = cl_address(rsc->bo,
 184                                                    rsc->slices[0].offset),
 185
 186                 .output_32_bit = return_size == 32,
 187         };
 188
 189         /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
 190          * 32-bit, we leave swizzling up to the shader compiler.
 191          *
 192          * Note: Contrary to the docs, the swizzle still applies even if the
 193          * return size is 32.  It's just that you probably want to swizzle in
 194          * the shader, because you need the Y/Z/W channels to be defined.
 195          */
 196         if (return_size == 32) {
 197                 unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
 198                 unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
 199                 unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
 200                 unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
 201         } else {
 202                 unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
 203                 unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
 204                 unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
 205                 unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
 206         }
 207
 208         int min_img_filter = psampler->min_img_filter;
 209         int min_mip_filter = psampler->min_mip_filter;
 210         int mag_img_filter = psampler->mag_img_filter;
 211
 212         if (return_size == 32) {
 213                 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
 214                 min_img_filter = PIPE_TEX_FILTER_NEAREST;
 215                 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
 216         }
 217
 218         bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
 219         switch (min_mip_filter) {
 220         case PIPE_TEX_MIPFILTER_NONE:
 221                 unpacked.filter += min_nearest ? 2 : 0;
 222                 break;
 223         case PIPE_TEX_MIPFILTER_NEAREST:
 224                 unpacked.filter += min_nearest ? 4 : 8;
 225                 break;
 226         case PIPE_TEX_MIPFILTER_LINEAR:
 227                 unpacked.filter += min_nearest ? 4 : 8;
 228                 unpacked.filter += 2;
 229                 break;
 230         }
 231
 232         if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
 233                 unpacked.filter++;
 234
 235         if (psampler->max_anisotropy > 8)
 236                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
 237         else if (psampler->max_anisotropy > 4)
 238                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
 239         else if (psampler->max_anisotropy > 2)
 240                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
 241         else if (psampler->max_anisotropy)
 242                 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
 243
 244         uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
 245         cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
 246
 247         for (int i = 0; i < ARRAY_SIZE(packed); i++)
 248                 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
 249
 250         /* TMU indirect structs need to be 32b aligned. */
 251         v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
 252         cl_emit_prepacked(&job->indirect, &packed);
 253 }
 254
 255 static void
 256 emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
 257 {
 258         for (int i = 0; i < stage_tex->num_textures; i++) {
 259                 if (stage_tex->textures[i])
 260                         emit_one_texture(v3d, stage_tex, i);
 261         }
 262 }
 263 #endif /* V3D_VERSION < 40 */
 264
 265 static uint32_t
 266 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
 267 {
 268         if (v3d->swap_color_rb & (1 << rt)) {
 269                 colormask = ((colormask & (2 | 8)) |
 270                              ((colormask & 1) << 2) |
 271                              ((colormask & 4) >> 2));
 272         }
 273
 274         return (~colormask) & 0xf;
 275 }
 276
 277 static void
 278 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
 279               struct pipe_blend_state *blend, int rt)
 280 {
 281         struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
 282
 283 #if V3D_VERSION >= 40
 284         /* We don't need to emit blend state for disabled RTs. */
 285         if (!rtblend->blend_enable)
 286                 return;
 287 #endif
 288
 289         cl_emit(&job->bcl, BLEND_CFG, config) {
 290 #if V3D_VERSION >= 40
 291                 if (blend->independent_blend_enable)
 292                         config.render_target_mask = 1 << rt;
 293                 else
 294                         config.render_target_mask = (1 << V3D_MAX_DRAW_BUFFERS) - 1;
 295 #else
 296                 assert(rt == 0);
 297 #endif
 298
 299                 config.color_blend_mode = rtblend->rgb_func;
 300                 config.color_blend_dst_factor =
 301                         v3d_factor(rtblend->rgb_dst_factor,
 302                                    v3d->blend_dst_alpha_one);
 303                 config.color_blend_src_factor =
 304                         v3d_factor(rtblend->rgb_src_factor,
 305                                    v3d->blend_dst_alpha_one);
 306
 307                 config.alpha_blend_mode = rtblend->alpha_func;
 308                 config.alpha_blend_dst_factor =
 309                         v3d_factor(rtblend->alpha_dst_factor,
 310                                    v3d->blend_dst_alpha_one);
 311                 config.alpha_blend_src_factor =
 312                         v3d_factor(rtblend->alpha_src_factor,
 313                                    v3d->blend_dst_alpha_one);
 314         }
 315 }
 316
 317 static void
 318 emit_flat_shade_flags(struct v3d_job *job,
 319                       int varying_offset,
 320                       uint32_t varyings,
 321                       enum V3DX(Varying_Flags_Action) lower,
 322                       enum V3DX(Varying_Flags_Action) higher)
 323 {
 324         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
 325                 flags.varying_offset_v0 = varying_offset;
 326                 flags.flat_shade_flags_for_varyings_v024 = varyings;
 327                 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
 328                         lower;
 329                 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
 330                         higher;
 331         }
 332 }
 333
 334 #if V3D_VERSION >= 40
 335 static void
 336 emit_noperspective_flags(struct v3d_job *job,
 337                          int varying_offset,
 338                          uint32_t varyings,
 339                          enum V3DX(Varying_Flags_Action) lower,
 340                          enum V3DX(Varying_Flags_Action) higher)
 341 {
 342         cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
 343                 flags.varying_offset_v0 = varying_offset;
 344                 flags.non_perspective_flags_for_varyings_v024 = varyings;
 345                 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
 346                         lower;
 347                 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
 348                         higher;
 349         }
 350 }
 351
 352 static void
 353 emit_centroid_flags(struct v3d_job *job,
 354                     int varying_offset,
 355                     uint32_t varyings,
 356                     enum V3DX(Varying_Flags_Action) lower,
 357                     enum V3DX(Varying_Flags_Action) higher)
 358 {
 359         cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
 360                 flags.varying_offset_v0 = varying_offset;
 361                 flags.centroid_flags_for_varyings_v024 = varyings;
 362                 flags.action_for_centroid_flags_of_lower_numbered_varyings =
 363                         lower;
 364                 flags.action_for_centroid_flags_of_higher_numbered_varyings =
 365                         higher;
 366         }
 367 }
 368 #endif /* V3D_VERSION >= 40 */
 369
 370 static bool
 371 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
 372                    void (*flag_emit_callback)(struct v3d_job *job,
 373                                               int varying_offset,
 374                                               uint32_t flags,
 375                                               enum V3DX(Varying_Flags_Action) lower,
 376                                               enum V3DX(Varying_Flags_Action) higher))
 377 {
 378         struct v3d_context *v3d = job->v3d;
 379         bool emitted_any = false;
 380
 381         for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
 382                 if (!flags[i])
 383                         continue;
 384
 385                 if (emitted_any) {
 386                         flag_emit_callback(job, i, flags[i],
 387                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
 388                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED);
 389                 } else if (i == 0) {
 390                         flag_emit_callback(job, i, flags[i],
 391                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
 392                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
 393                 } else {
 394                         flag_emit_callback(job, i, flags[i],
 395                                            V3D_VARYING_FLAGS_ACTION_ZEROED,
 396                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
 397                 }
 398                 emitted_any = true;
 399         }
 400
 401         return emitted_any;
 402 }
 403
 404 static inline struct v3d_uncompiled_shader *
 405 get_tf_shader(struct v3d_context *v3d)
 406 {
 407         if (v3d->prog.bind_gs)
 408                 return v3d->prog.bind_gs;
 409         else
 410                 return v3d->prog.bind_vs;
 411 }
 412
 413 void
 414 v3dX(emit_state)(struct pipe_context *pctx)
 415 {
 416         struct v3d_context *v3d = v3d_context(pctx);
 417         struct v3d_job *job = v3d->job;
 418         bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
 419
 420         if (v3d->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
 421                           VC5_DIRTY_RASTERIZER)) {
 422                 float *vpscale = v3d->viewport.scale;
 423                 float *vptranslate = v3d->viewport.translate;
 424                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
 425                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
 426                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
 427                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
 428
 429                 /* Clip to the scissor if it's enabled, but still clip to the
 430                  * drawable regardless since that controls where the binner
 431                  * tries to put things.
 432                  *
 433                  * Additionally, always clip the rendering to the viewport,
 434                  * since the hardware does guardband clipping, meaning
 435                  * primitives would rasterize outside of the view volume.
 436                  */
 437                 uint32_t minx, miny, maxx, maxy;
 438                 if (!v3d->rasterizer->base.scissor) {
 439                         minx = MAX2(vp_minx, 0);
 440                         miny = MAX2(vp_miny, 0);
 441                         maxx = MIN2(vp_maxx, job->draw_width);
 442                         maxy = MIN2(vp_maxy, job->draw_height);
 443                 } else {
 444                         minx = MAX2(vp_minx, v3d->scissor.minx);
 445                         miny = MAX2(vp_miny, v3d->scissor.miny);
 446                         maxx = MIN2(vp_maxx, v3d->scissor.maxx);
 447                         maxy = MIN2(vp_maxy, v3d->scissor.maxy);
 448                 }
 449
 450                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
 451                         clip.clip_window_left_pixel_coordinate = minx;
 452                         clip.clip_window_bottom_pixel_coordinate = miny;
 453                         if (maxx > minx && maxy > miny) {
 454                                 clip.clip_window_width_in_pixels = maxx - minx;
 455                                 clip.clip_window_height_in_pixels = maxy - miny;
 456                         } else if (V3D_VERSION < 41) {
 457                                 /* The HW won't entirely clip out when scissor
 458                                  * w/h is 0.  Just treat it the same as
 459                                  * rasterizer discard.
 460                                  */
 461                                 rasterizer_discard = true;
 462                                 clip.clip_window_width_in_pixels = 1;
 463                                 clip.clip_window_height_in_pixels = 1;
 464                         }
 465                 }
 466
 467                 job->draw_min_x = MIN2(job->draw_min_x, minx);
 468                 job->draw_min_y = MIN2(job->draw_min_y, miny);
 469                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
 470                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
 471         }
 472
 473         if (v3d->dirty & (VC5_DIRTY_RASTERIZER |
 474                           VC5_DIRTY_ZSA |
 475                           VC5_DIRTY_BLEND |
 476                           VC5_DIRTY_COMPILED_FS)) {
 477                 cl_emit(&job->bcl, CFG_BITS, config) {
 478                         config.enable_forward_facing_primitive =
 479                                 !rasterizer_discard &&
 480                                 !(v3d->rasterizer->base.cull_face &
 481                                   PIPE_FACE_FRONT);
 482                         config.enable_reverse_facing_primitive =
 483                                 !rasterizer_discard &&
 484                                 !(v3d->rasterizer->base.cull_face &
 485                                   PIPE_FACE_BACK);
 486                         /* This seems backwards, but it's what gets the
 487                          * clipflat test to pass.
 488                          */
 489                         config.clockwise_primitives =
 490                                 v3d->rasterizer->base.front_ccw;
 491
 492                         config.enable_depth_offset =
 493                                 v3d->rasterizer->base.offset_tri;
 494
 495                         /* V3D follows GL behavior where the sample mask only
 496                          * applies when MSAA is enabled.  Gallium has sample
 497                          * mask apply anyway, and the MSAA blit shaders will
 498                          * set sample mask without explicitly setting
 499                          * rasterizer oversample.  Just force it on here,
 500                          * since the blit shaders are the only way to have
 501                          * !multisample && samplemask != 0xf.
 502                          */
 503                         config.rasterizer_oversample_mode =
 504                                 v3d->rasterizer->base.multisample ||
 505                                 v3d->sample_mask != 0xf;
 506
 507                         config.direct3d_provoking_vertex =
 508                                 v3d->rasterizer->base.flatshade_first;
 509
 510                         config.blend_enable = v3d->blend->blend_enables;
 511
 512                         /* Note: EZ state may update based on the compiled FS,
 513                          * along with ZSA
 514                          */
 515                         config.early_z_updates_enable =
 516                                 (job->ez_state != VC5_EZ_DISABLED);
 517                         if (v3d->zsa->base.depth.enabled) {
 518                                 config.z_updates_enable =
 519                                         v3d->zsa->base.depth.writemask;
 520                                 config.early_z_enable =
 521                                         config.early_z_updates_enable;
 522                                 config.depth_test_function =
 523                                         v3d->zsa->base.depth.func;
 524                         } else {
 525                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
 526                         }
 527
 528                         config.stencil_enable =
 529                                 v3d->zsa->base.stencil[0].enabled;
 530                 }
 531
 532         }
 533
 534         if (v3d->dirty & VC5_DIRTY_RASTERIZER &&
 535             v3d->rasterizer->base.offset_tri) {
 536                 if (job->zsbuf &&
 537                     job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
 538                         cl_emit_prepacked_sized(&job->bcl,
 539                                                 v3d->rasterizer->depth_offset_z16,
 540                                                 cl_packet_length(DEPTH_OFFSET));
 541                 } else {
 542                         cl_emit_prepacked_sized(&job->bcl,
 543                                                 v3d->rasterizer->depth_offset,
 544                                                 cl_packet_length(DEPTH_OFFSET));
 545                 }
 546         }
 547
 548         if (v3d->dirty & VC5_DIRTY_RASTERIZER) {
 549                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
 550                         point_size.point_size = v3d->rasterizer->point_size;
 551                 }
 552
 553                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
 554                         line_width.line_width = v3d->rasterizer->base.line_width;
 555                 }
 556         }
 557
 558         if (v3d->dirty & VC5_DIRTY_VIEWPORT) {
 559                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
 560                         clip.viewport_half_width_in_1_256th_of_pixel =
 561                                 v3d->viewport.scale[0] * 256.0f;
 562                         clip.viewport_half_height_in_1_256th_of_pixel =
 563                                 v3d->viewport.scale[1] * 256.0f;
 564                 }
 565
 566                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
 567                         clip.viewport_z_offset_zc_to_zs =
 568                                 v3d->viewport.translate[2];
 569                         clip.viewport_z_scale_zc_to_zs =
 570                                 v3d->viewport.scale[2];
 571                 }
 572                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
 573                         float z1 = (v3d->viewport.translate[2] -
 574                                     v3d->viewport.scale[2]);
 575                         float z2 = (v3d->viewport.translate[2] +
 576                                     v3d->viewport.scale[2]);
 577                         clip.minimum_zw = MIN2(z1, z2);
 578                         clip.maximum_zw = MAX2(z1, z2);
 579                 }
 580
 581                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
 582                         vp.viewport_centre_x_coordinate =
 583                                 v3d->viewport.translate[0];
 584                         vp.viewport_centre_y_coordinate =
 585                                 v3d->viewport.translate[1];
 586                 }
 587         }
 588
 589         if (v3d->dirty & VC5_DIRTY_BLEND) {
 590                 struct v3d_blend_state *blend = v3d->blend;
 591
 592                 if (blend->blend_enables) {
 593 #if V3D_VERSION >= 40
 594                         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
 595                                 enables.mask = blend->blend_enables;
 596                         }
 597 #endif
 598
 599                         if (blend->base.independent_blend_enable) {
 600                                 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++)
 601                                         emit_rt_blend(v3d, job, &blend->base, i);
 602                         } else {
 603                                 emit_rt_blend(v3d, job, &blend->base, 0);
 604                         }
 605                 }
 606         }
 607
 608         if (v3d->dirty & VC5_DIRTY_BLEND) {
 609                 struct pipe_blend_state *blend = &v3d->blend->base;
 610
 611                 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
 612                         for (int i = 0; i < 4; i++) {
 613                                 int rt = blend->independent_blend_enable ? i : 0;
 614                                 int rt_mask = blend->rt[rt].colormask;
 615
 616                                 mask.mask |= translate_colormask(v3d, rt_mask,
 617                                                                  i) << (4 * i);
 618                         }
 619                 }
 620         }
 621
 622         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
 623          * color.
 624          */
 625         if (v3d->dirty & VC5_DIRTY_BLEND_COLOR ||
 626             (V3D_VERSION < 41 && (v3d->dirty & VC5_DIRTY_BLEND))) {
 627                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
 628                         color.red_f16 = (v3d->swap_color_rb ?
 629                                           v3d->blend_color.hf[2] :
 630                                           v3d->blend_color.hf[0]);
 631                         color.green_f16 = v3d->blend_color.hf[1];
 632                         color.blue_f16 = (v3d->swap_color_rb ?
 633                                            v3d->blend_color.hf[0] :
 634                                            v3d->blend_color.hf[2]);
 635                         color.alpha_f16 = v3d->blend_color.hf[3];
 636                 }
 637         }
 638
 639         if (v3d->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
 640                 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
 641                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
 642
 643                 if (front->enabled) {
 644                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
 645                                                v3d->zsa->stencil_front, config) {
 646                                 config.stencil_ref_value =
 647                                         v3d->stencil_ref.ref_value[0];
 648                         }
 649                 }
 650
 651                 if (back->enabled) {
 652                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
 653                                                v3d->zsa->stencil_back, config) {
 654                                 config.stencil_ref_value =
 655                                         v3d->stencil_ref.ref_value[1];
 656                         }
 657                 }
 658         }
 659
 660 #if V3D_VERSION < 40
 661         /* Pre-4.x, we have texture state that depends on both the sampler and
 662          * the view, so we merge them together at draw time.
 663          */
 664         if (v3d->dirty & VC5_DIRTY_FRAGTEX)
 665                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
 666
 667         if (v3d->dirty & VC5_DIRTY_GEOMTEX)
 668                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
 669
 670         if (v3d->dirty & VC5_DIRTY_VERTTEX)
 671                 emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
 672 #endif
 673
 674         if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
 675                 if (!emit_varying_flags(job,
 676                                         v3d->prog.fs->prog_data.fs->flat_shade_flags,
 677                                         emit_flat_shade_flags)) {
 678                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
 679                 }
 680         }
 681
 682 #if V3D_VERSION >= 40
 683         if (v3d->dirty & VC5_DIRTY_NOPERSPECTIVE_FLAGS) {
 684                 if (!emit_varying_flags(job,
 685                                         v3d->prog.fs->prog_data.fs->noperspective_flags,
 686                                         emit_noperspective_flags)) {
 687                         cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
 688                 }
 689         }
 690
 691         if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
 692                 if (!emit_varying_flags(job,
 693                                         v3d->prog.fs->prog_data.fs->centroid_flags,
 694                                         emit_centroid_flags)) {
 695                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
 696                 }
 697         }
 698 #endif
 699
 700         /* Set up the transform feedback data specs (which VPM entries to
 701          * output to which buffers).
 702          */
 703         if (v3d->dirty & (VC5_DIRTY_STREAMOUT |
 704                           VC5_DIRTY_RASTERIZER |
 705                           VC5_DIRTY_PRIM_MODE)) {
 706                 struct v3d_streamout_stateobj *so = &v3d->streamout;
 707                 if (so->num_targets) {
 708                         bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
 709                                                 v3d->rasterizer->base.point_size_per_vertex);
 710                         struct v3d_uncompiled_shader *tf_shader =
 711                                 get_tf_shader(v3d);
 712                         uint16_t *tf_specs = (psiz_per_vertex ?
 713                                               tf_shader->tf_specs_psiz :
 714                                               tf_shader->tf_specs);
 715
 716 #if V3D_VERSION >= 40
 717                         bool tf_enabled = v3d_transform_feedback_enabled(v3d);
 718                         job->tf_enabled |= tf_enabled;
 719
 720                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
 721                                 tfe.number_of_16_bit_output_data_specs_following =
 722                                         tf_shader->num_tf_specs;
 723                                 tfe.enable = tf_enabled;
 724                         };
 725 #else /* V3D_VERSION < 40 */
 726                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
 727                                 tfe.number_of_32_bit_output_buffer_address_following =
 728                                         so->num_targets;
 729                                 tfe.number_of_16_bit_output_data_specs_following =
 730                                         tf_shader->num_tf_specs;
 731                         };
 732 #endif /* V3D_VERSION < 40 */
 733                         for (int i = 0; i < tf_shader->num_tf_specs; i++) {
 734                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
 735                         }
 736                 } else {
 737 #if V3D_VERSION >= 40
 738                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
 739                                 tfe.enable = false;
 740                         };
 741 #endif /* V3D_VERSION >= 40 */
 742                 }
 743         }
 744
 745         /* Set up the trasnform feedback buffers. */
 746         if (v3d->dirty & VC5_DIRTY_STREAMOUT) {
 747                 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
 748                 struct v3d_streamout_stateobj *so = &v3d->streamout;
 749                 for (int i = 0; i < so->num_targets; i++) {
 750                         const struct pipe_stream_output_target *target =
 751                                 so->targets[i];
 752                         struct v3d_resource *rsc = target ?
 753                                 v3d_resource(target->buffer) : NULL;
 754                         struct pipe_shader_state *ss = &tf_shader->base;
 755                         struct pipe_stream_output_info *info = &ss->stream_output;
 756                         uint32_t offset = (v3d->streamout.offsets[i] *
 757                                            info->stride[i] * 4);
 758
 759 #if V3D_VERSION >= 40
 760                         if (!target)
 761                                 continue;
 762
 763                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
 764                                 output.buffer_address =
 765                                         cl_address(rsc->bo,
 766                                                    target->buffer_offset +
 767                                                    offset);
 768                                 output.buffer_size_in_32_bit_words =
 769                                         (target->buffer_size - offset) >> 2;
 770                                 output.buffer_number = i;
 771                         }
 772 #else /* V3D_VERSION < 40 */
 773                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
 774                                 if (target) {
 775                                         output.address =
 776                                                 cl_address(rsc->bo,
 777                                                            target->buffer_offset +
 778                                                            offset);
 779                                 }
 780                         };
 781 #endif /* V3D_VERSION < 40 */
 782                         if (target) {
 783                                 v3d_job_add_tf_write_resource(v3d->job,
 784                                                               target->buffer);
 785                         }
 786                         /* XXX: buffer_size? */
 787                 }
 788         }
 789
 790         if (v3d->dirty & VC5_DIRTY_OQ) {
 791                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
 792                         if (v3d->active_queries && v3d->current_oq) {
 793                                 counter.address = cl_address(v3d->current_oq, 0);
 794                         }
 795                 }
 796         }
 797
 798 #if V3D_VERSION >= 40
 799         if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
 800                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
 801                         /* Note: SampleCoverage was handled at the
 802                          * state_tracker level by converting to sample_mask.
 803                          */
 804                         state.coverage = 1.0;
 805                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
 806                 }
 807         }
 808 #endif
 809 }