2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/format/u_format.h"
25 #include "util/u_half.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/compiler/v3d_compiler.h"
32 v3d_factor(enum pipe_blendfactor factor
, bool dst_alpha_one
)
34 /* We may get a bad blendfactor when blending is disabled. */
36 return V3D_BLEND_FACTOR_ZERO
;
39 case PIPE_BLENDFACTOR_ZERO
:
40 return V3D_BLEND_FACTOR_ZERO
;
41 case PIPE_BLENDFACTOR_ONE
:
42 return V3D_BLEND_FACTOR_ONE
;
43 case PIPE_BLENDFACTOR_SRC_COLOR
:
44 return V3D_BLEND_FACTOR_SRC_COLOR
;
45 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
46 return V3D_BLEND_FACTOR_INV_SRC_COLOR
;
47 case PIPE_BLENDFACTOR_DST_COLOR
:
48 return V3D_BLEND_FACTOR_DST_COLOR
;
49 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
50 return V3D_BLEND_FACTOR_INV_DST_COLOR
;
51 case PIPE_BLENDFACTOR_SRC_ALPHA
:
52 return V3D_BLEND_FACTOR_SRC_ALPHA
;
53 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
54 return V3D_BLEND_FACTOR_INV_SRC_ALPHA
;
55 case PIPE_BLENDFACTOR_DST_ALPHA
:
56 return (dst_alpha_one
?
57 V3D_BLEND_FACTOR_ONE
:
58 V3D_BLEND_FACTOR_DST_ALPHA
);
59 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
60 return (dst_alpha_one
?
61 V3D_BLEND_FACTOR_ZERO
:
62 V3D_BLEND_FACTOR_INV_DST_ALPHA
);
63 case PIPE_BLENDFACTOR_CONST_COLOR
:
64 return V3D_BLEND_FACTOR_CONST_COLOR
;
65 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
66 return V3D_BLEND_FACTOR_INV_CONST_COLOR
;
67 case PIPE_BLENDFACTOR_CONST_ALPHA
:
68 return V3D_BLEND_FACTOR_CONST_ALPHA
;
69 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
70 return V3D_BLEND_FACTOR_INV_CONST_ALPHA
;
71 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
72 return (dst_alpha_one
?
73 V3D_BLEND_FACTOR_ZERO
:
74 V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE
);
76 unreachable("Bad blend factor");
80 static inline uint16_t
81 swizzled_border_color(const struct v3d_device_info
*devinfo
,
82 struct pipe_sampler_state
*sampler
,
83 struct v3d_sampler_view
*sview
,
86 const struct util_format_description
*desc
=
87 util_format_description(sview
->base
.format
);
90 /* If we're doing swizzling in the sampler, then only rearrange the
91 * border color for the mismatch between the VC5 texture format and
92 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
93 * the sampler's swizzle.
95 * For swizzling in the shader, we don't do any pre-swizzling of the
98 if (v3d_get_tex_return_size(devinfo
, sview
->base
.format
,
99 sampler
->compare_mode
) != 32)
100 swiz
= desc
->swizzle
[swiz
];
104 return util_float_to_half(0.0);
106 return util_float_to_half(1.0);
108 return util_float_to_half(sampler
->border_color
.f
[swiz
]);
114 translate_swizzle(unsigned char pipe_swizzle
)
116 switch (pipe_swizzle
) {
125 return 2 + pipe_swizzle
;
127 unreachable("unknown swizzle");
132 emit_one_texture(struct v3d_context
*v3d
, struct v3d_texture_stateobj
*stage_tex
,
135 struct v3d_job
*job
= v3d
->job
;
136 struct pipe_sampler_state
*psampler
= stage_tex
->samplers
[i
];
137 struct v3d_sampler_state
*sampler
= v3d_sampler_state(psampler
);
138 struct pipe_sampler_view
*psview
= stage_tex
->textures
[i
];
139 struct v3d_sampler_view
*sview
= v3d_sampler_view(psview
);
140 struct pipe_resource
*prsc
= psview
->texture
;
141 struct v3d_resource
*rsc
= v3d_resource(prsc
);
142 const struct v3d_device_info
*devinfo
= &v3d
->screen
->devinfo
;
144 stage_tex
->texture_state
[i
].offset
=
145 v3d_cl_ensure_space(&job
->indirect
,
146 cl_packet_length(TEXTURE_SHADER_STATE
),
148 v3d_bo_set_reference(&stage_tex
->texture_state
[i
].bo
,
151 uint32_t return_size
= v3d_get_tex_return_size(devinfo
, psview
->format
,
152 psampler
->compare_mode
);
154 struct V3D33_TEXTURE_SHADER_STATE unpacked
= {
156 .border_color_red
= swizzled_border_color(devinfo
, psampler
,
158 .border_color_green
= swizzled_border_color(devinfo
, psampler
,
160 .border_color_blue
= swizzled_border_color(devinfo
, psampler
,
162 .border_color_alpha
= swizzled_border_color(devinfo
, psampler
,
165 /* In the normal texturing path, the LOD gets clamped between
166 * min/max, and the base_level field (set in the sampler view
167 * from first_level) only decides where the min/mag switch
168 * happens, so we need to use the LOD clamps to keep us
169 * between min and max.
171 * For txf, the LOD clamp is still used, despite GL not
172 * wanting that. We will need to have a separate
173 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
174 * support txf properly.
176 .min_level_of_detail
= MIN2(psview
->u
.tex
.first_level
+
177 MAX2(psampler
->min_lod
, 0),
178 psview
->u
.tex
.last_level
),
179 .max_level_of_detail
= MIN2(psview
->u
.tex
.first_level
+
181 psview
->u
.tex
.last_level
),
183 .texture_base_pointer
= cl_address(rsc
->bo
,
184 rsc
->slices
[0].offset
),
186 .output_32_bit
= return_size
== 32,
189 /* Set up the sampler swizzle if we're doing 16-bit sampling. For
190 * 32-bit, we leave swizzling up to the shader compiler.
192 * Note: Contrary to the docs, the swizzle still applies even if the
193 * return size is 32. It's just that you probably want to swizzle in
194 * the shader, because you need the Y/Z/W channels to be defined.
196 if (return_size
== 32) {
197 unpacked
.swizzle_r
= translate_swizzle(PIPE_SWIZZLE_X
);
198 unpacked
.swizzle_g
= translate_swizzle(PIPE_SWIZZLE_Y
);
199 unpacked
.swizzle_b
= translate_swizzle(PIPE_SWIZZLE_Z
);
200 unpacked
.swizzle_a
= translate_swizzle(PIPE_SWIZZLE_W
);
202 unpacked
.swizzle_r
= translate_swizzle(sview
->swizzle
[0]);
203 unpacked
.swizzle_g
= translate_swizzle(sview
->swizzle
[1]);
204 unpacked
.swizzle_b
= translate_swizzle(sview
->swizzle
[2]);
205 unpacked
.swizzle_a
= translate_swizzle(sview
->swizzle
[3]);
208 int min_img_filter
= psampler
->min_img_filter
;
209 int min_mip_filter
= psampler
->min_mip_filter
;
210 int mag_img_filter
= psampler
->mag_img_filter
;
212 if (return_size
== 32) {
213 min_mip_filter
= PIPE_TEX_MIPFILTER_NEAREST
;
214 min_img_filter
= PIPE_TEX_FILTER_NEAREST
;
215 mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
218 bool min_nearest
= min_img_filter
== PIPE_TEX_FILTER_NEAREST
;
219 switch (min_mip_filter
) {
220 case PIPE_TEX_MIPFILTER_NONE
:
221 unpacked
.filter
+= min_nearest
? 2 : 0;
223 case PIPE_TEX_MIPFILTER_NEAREST
:
224 unpacked
.filter
+= min_nearest
? 4 : 8;
226 case PIPE_TEX_MIPFILTER_LINEAR
:
227 unpacked
.filter
+= min_nearest
? 4 : 8;
228 unpacked
.filter
+= 2;
232 if (mag_img_filter
== PIPE_TEX_FILTER_NEAREST
)
235 if (psampler
->max_anisotropy
> 8)
236 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_16_1
;
237 else if (psampler
->max_anisotropy
> 4)
238 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_8_1
;
239 else if (psampler
->max_anisotropy
> 2)
240 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_4_1
;
241 else if (psampler
->max_anisotropy
)
242 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_2_1
;
244 uint8_t packed
[cl_packet_length(TEXTURE_SHADER_STATE
)];
245 cl_packet_pack(TEXTURE_SHADER_STATE
)(&job
->indirect
, packed
, &unpacked
);
247 for (int i
= 0; i
< ARRAY_SIZE(packed
); i
++)
248 packed
[i
] |= sview
->texture_shader_state
[i
] | sampler
->texture_shader_state
[i
];
250 /* TMU indirect structs need to be 32b aligned. */
251 v3d_cl_ensure_space(&job
->indirect
, ARRAY_SIZE(packed
), 32);
252 cl_emit_prepacked(&job
->indirect
, &packed
);
256 emit_textures(struct v3d_context
*v3d
, struct v3d_texture_stateobj
*stage_tex
)
258 for (int i
= 0; i
< stage_tex
->num_textures
; i
++) {
259 if (stage_tex
->textures
[i
])
260 emit_one_texture(v3d
, stage_tex
, i
);
263 #endif /* V3D_VERSION < 40 */
266 translate_colormask(struct v3d_context
*v3d
, uint32_t colormask
, int rt
)
268 if (v3d
->swap_color_rb
& (1 << rt
)) {
269 colormask
= ((colormask
& (2 | 8)) |
270 ((colormask
& 1) << 2) |
271 ((colormask
& 4) >> 2));
274 return (~colormask
) & 0xf;
278 emit_rt_blend(struct v3d_context
*v3d
, struct v3d_job
*job
,
279 struct pipe_blend_state
*blend
, int rt
)
281 struct pipe_rt_blend_state
*rtblend
= &blend
->rt
[rt
];
283 #if V3D_VERSION >= 40
284 /* We don't need to emit blend state for disabled RTs. */
285 if (!rtblend
->blend_enable
)
289 cl_emit(&job
->bcl
, BLEND_CFG
, config
) {
290 #if V3D_VERSION >= 40
291 if (blend
->independent_blend_enable
)
292 config
.render_target_mask
= 1 << rt
;
294 config
.render_target_mask
= (1 << V3D_MAX_DRAW_BUFFERS
) - 1;
299 config
.color_blend_mode
= rtblend
->rgb_func
;
300 config
.color_blend_dst_factor
=
301 v3d_factor(rtblend
->rgb_dst_factor
,
302 v3d
->blend_dst_alpha_one
);
303 config
.color_blend_src_factor
=
304 v3d_factor(rtblend
->rgb_src_factor
,
305 v3d
->blend_dst_alpha_one
);
307 config
.alpha_blend_mode
= rtblend
->alpha_func
;
308 config
.alpha_blend_dst_factor
=
309 v3d_factor(rtblend
->alpha_dst_factor
,
310 v3d
->blend_dst_alpha_one
);
311 config
.alpha_blend_src_factor
=
312 v3d_factor(rtblend
->alpha_src_factor
,
313 v3d
->blend_dst_alpha_one
);
318 emit_flat_shade_flags(struct v3d_job
*job
,
321 enum V3DX(Varying_Flags_Action
) lower
,
322 enum V3DX(Varying_Flags_Action
) higher
)
324 cl_emit(&job
->bcl
, FLAT_SHADE_FLAGS
, flags
) {
325 flags
.varying_offset_v0
= varying_offset
;
326 flags
.flat_shade_flags_for_varyings_v024
= varyings
;
327 flags
.action_for_flat_shade_flags_of_lower_numbered_varyings
=
329 flags
.action_for_flat_shade_flags_of_higher_numbered_varyings
=
334 #if V3D_VERSION >= 40
336 emit_noperspective_flags(struct v3d_job
*job
,
339 enum V3DX(Varying_Flags_Action
) lower
,
340 enum V3DX(Varying_Flags_Action
) higher
)
342 cl_emit(&job
->bcl
, NON_PERSPECTIVE_FLAGS
, flags
) {
343 flags
.varying_offset_v0
= varying_offset
;
344 flags
.non_perspective_flags_for_varyings_v024
= varyings
;
345 flags
.action_for_non_perspective_flags_of_lower_numbered_varyings
=
347 flags
.action_for_non_perspective_flags_of_higher_numbered_varyings
=
353 emit_centroid_flags(struct v3d_job
*job
,
356 enum V3DX(Varying_Flags_Action
) lower
,
357 enum V3DX(Varying_Flags_Action
) higher
)
359 cl_emit(&job
->bcl
, CENTROID_FLAGS
, flags
) {
360 flags
.varying_offset_v0
= varying_offset
;
361 flags
.centroid_flags_for_varyings_v024
= varyings
;
362 flags
.action_for_centroid_flags_of_lower_numbered_varyings
=
364 flags
.action_for_centroid_flags_of_higher_numbered_varyings
=
368 #endif /* V3D_VERSION >= 40 */
371 emit_varying_flags(struct v3d_job
*job
, uint32_t *flags
,
372 void (*flag_emit_callback
)(struct v3d_job
*job
,
375 enum V3DX(Varying_Flags_Action
) lower
,
376 enum V3DX(Varying_Flags_Action
) higher
))
378 struct v3d_context
*v3d
= job
->v3d
;
379 bool emitted_any
= false;
381 for (int i
= 0; i
< ARRAY_SIZE(v3d
->prog
.fs
->prog_data
.fs
->flat_shade_flags
); i
++) {
386 flag_emit_callback(job
, i
, flags
[i
],
387 V3D_VARYING_FLAGS_ACTION_UNCHANGED
,
388 V3D_VARYING_FLAGS_ACTION_UNCHANGED
);
390 flag_emit_callback(job
, i
, flags
[i
],
391 V3D_VARYING_FLAGS_ACTION_UNCHANGED
,
392 V3D_VARYING_FLAGS_ACTION_ZEROED
);
394 flag_emit_callback(job
, i
, flags
[i
],
395 V3D_VARYING_FLAGS_ACTION_ZEROED
,
396 V3D_VARYING_FLAGS_ACTION_ZEROED
);
404 static inline struct v3d_uncompiled_shader
*
405 get_tf_shader(struct v3d_context
*v3d
)
407 if (v3d
->prog
.bind_gs
)
408 return v3d
->prog
.bind_gs
;
410 return v3d
->prog
.bind_vs
;
414 v3dX(emit_state
)(struct pipe_context
*pctx
)
416 struct v3d_context
*v3d
= v3d_context(pctx
);
417 struct v3d_job
*job
= v3d
->job
;
418 bool rasterizer_discard
= v3d
->rasterizer
->base
.rasterizer_discard
;
420 if (v3d
->dirty
& (VC5_DIRTY_SCISSOR
| VC5_DIRTY_VIEWPORT
|
421 VC5_DIRTY_RASTERIZER
)) {
422 float *vpscale
= v3d
->viewport
.scale
;
423 float *vptranslate
= v3d
->viewport
.translate
;
424 float vp_minx
= -fabsf(vpscale
[0]) + vptranslate
[0];
425 float vp_maxx
= fabsf(vpscale
[0]) + vptranslate
[0];
426 float vp_miny
= -fabsf(vpscale
[1]) + vptranslate
[1];
427 float vp_maxy
= fabsf(vpscale
[1]) + vptranslate
[1];
429 /* Clip to the scissor if it's enabled, but still clip to the
430 * drawable regardless since that controls where the binner
431 * tries to put things.
433 * Additionally, always clip the rendering to the viewport,
434 * since the hardware does guardband clipping, meaning
435 * primitives would rasterize outside of the view volume.
437 uint32_t minx
, miny
, maxx
, maxy
;
438 if (!v3d
->rasterizer
->base
.scissor
) {
439 minx
= MAX2(vp_minx
, 0);
440 miny
= MAX2(vp_miny
, 0);
441 maxx
= MIN2(vp_maxx
, job
->draw_width
);
442 maxy
= MIN2(vp_maxy
, job
->draw_height
);
444 minx
= MAX2(vp_minx
, v3d
->scissor
.minx
);
445 miny
= MAX2(vp_miny
, v3d
->scissor
.miny
);
446 maxx
= MIN2(vp_maxx
, v3d
->scissor
.maxx
);
447 maxy
= MIN2(vp_maxy
, v3d
->scissor
.maxy
);
450 cl_emit(&job
->bcl
, CLIP_WINDOW
, clip
) {
451 clip
.clip_window_left_pixel_coordinate
= minx
;
452 clip
.clip_window_bottom_pixel_coordinate
= miny
;
453 if (maxx
> minx
&& maxy
> miny
) {
454 clip
.clip_window_width_in_pixels
= maxx
- minx
;
455 clip
.clip_window_height_in_pixels
= maxy
- miny
;
456 } else if (V3D_VERSION
< 41) {
457 /* The HW won't entirely clip out when scissor
458 * w/h is 0. Just treat it the same as
459 * rasterizer discard.
461 rasterizer_discard
= true;
462 clip
.clip_window_width_in_pixels
= 1;
463 clip
.clip_window_height_in_pixels
= 1;
467 job
->draw_min_x
= MIN2(job
->draw_min_x
, minx
);
468 job
->draw_min_y
= MIN2(job
->draw_min_y
, miny
);
469 job
->draw_max_x
= MAX2(job
->draw_max_x
, maxx
);
470 job
->draw_max_y
= MAX2(job
->draw_max_y
, maxy
);
473 if (v3d
->dirty
& (VC5_DIRTY_RASTERIZER
|
476 VC5_DIRTY_COMPILED_FS
)) {
477 cl_emit(&job
->bcl
, CFG_BITS
, config
) {
478 config
.enable_forward_facing_primitive
=
479 !rasterizer_discard
&&
480 !(v3d
->rasterizer
->base
.cull_face
&
482 config
.enable_reverse_facing_primitive
=
483 !rasterizer_discard
&&
484 !(v3d
->rasterizer
->base
.cull_face
&
486 /* This seems backwards, but it's what gets the
487 * clipflat test to pass.
489 config
.clockwise_primitives
=
490 v3d
->rasterizer
->base
.front_ccw
;
492 config
.enable_depth_offset
=
493 v3d
->rasterizer
->base
.offset_tri
;
495 /* V3D follows GL behavior where the sample mask only
496 * applies when MSAA is enabled. Gallium has sample
497 * mask apply anyway, and the MSAA blit shaders will
498 * set sample mask without explicitly setting
499 * rasterizer oversample. Just force it on here,
500 * since the blit shaders are the only way to have
501 * !multisample && samplemask != 0xf.
503 config
.rasterizer_oversample_mode
=
504 v3d
->rasterizer
->base
.multisample
||
505 v3d
->sample_mask
!= 0xf;
507 config
.direct3d_provoking_vertex
=
508 v3d
->rasterizer
->base
.flatshade_first
;
510 config
.blend_enable
= v3d
->blend
->blend_enables
;
512 /* Note: EZ state may update based on the compiled FS,
515 config
.early_z_updates_enable
=
516 (job
->ez_state
!= VC5_EZ_DISABLED
);
517 if (v3d
->zsa
->base
.depth
.enabled
) {
518 config
.z_updates_enable
=
519 v3d
->zsa
->base
.depth
.writemask
;
520 config
.early_z_enable
=
521 config
.early_z_updates_enable
;
522 config
.depth_test_function
=
523 v3d
->zsa
->base
.depth
.func
;
525 config
.depth_test_function
= PIPE_FUNC_ALWAYS
;
528 config
.stencil_enable
=
529 v3d
->zsa
->base
.stencil
[0].enabled
;
534 if (v3d
->dirty
& VC5_DIRTY_RASTERIZER
&&
535 v3d
->rasterizer
->base
.offset_tri
) {
537 job
->zsbuf
->format
== PIPE_FORMAT_Z16_UNORM
) {
538 cl_emit_prepacked_sized(&job
->bcl
,
539 v3d
->rasterizer
->depth_offset_z16
,
540 cl_packet_length(DEPTH_OFFSET
));
542 cl_emit_prepacked_sized(&job
->bcl
,
543 v3d
->rasterizer
->depth_offset
,
544 cl_packet_length(DEPTH_OFFSET
));
548 if (v3d
->dirty
& VC5_DIRTY_RASTERIZER
) {
549 cl_emit(&job
->bcl
, POINT_SIZE
, point_size
) {
550 point_size
.point_size
= v3d
->rasterizer
->point_size
;
553 cl_emit(&job
->bcl
, LINE_WIDTH
, line_width
) {
554 line_width
.line_width
= v3d
->rasterizer
->base
.line_width
;
558 if (v3d
->dirty
& VC5_DIRTY_VIEWPORT
) {
559 cl_emit(&job
->bcl
, CLIPPER_XY_SCALING
, clip
) {
560 clip
.viewport_half_width_in_1_256th_of_pixel
=
561 v3d
->viewport
.scale
[0] * 256.0f
;
562 clip
.viewport_half_height_in_1_256th_of_pixel
=
563 v3d
->viewport
.scale
[1] * 256.0f
;
566 cl_emit(&job
->bcl
, CLIPPER_Z_SCALE_AND_OFFSET
, clip
) {
567 clip
.viewport_z_offset_zc_to_zs
=
568 v3d
->viewport
.translate
[2];
569 clip
.viewport_z_scale_zc_to_zs
=
570 v3d
->viewport
.scale
[2];
572 cl_emit(&job
->bcl
, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES
, clip
) {
573 float z1
= (v3d
->viewport
.translate
[2] -
574 v3d
->viewport
.scale
[2]);
575 float z2
= (v3d
->viewport
.translate
[2] +
576 v3d
->viewport
.scale
[2]);
577 clip
.minimum_zw
= MIN2(z1
, z2
);
578 clip
.maximum_zw
= MAX2(z1
, z2
);
581 cl_emit(&job
->bcl
, VIEWPORT_OFFSET
, vp
) {
582 vp
.viewport_centre_x_coordinate
=
583 v3d
->viewport
.translate
[0];
584 vp
.viewport_centre_y_coordinate
=
585 v3d
->viewport
.translate
[1];
589 if (v3d
->dirty
& VC5_DIRTY_BLEND
) {
590 struct v3d_blend_state
*blend
= v3d
->blend
;
592 if (blend
->blend_enables
) {
593 #if V3D_VERSION >= 40
594 cl_emit(&job
->bcl
, BLEND_ENABLES
, enables
) {
595 enables
.mask
= blend
->blend_enables
;
599 if (blend
->base
.independent_blend_enable
) {
600 for (int i
= 0; i
< V3D_MAX_DRAW_BUFFERS
; i
++)
601 emit_rt_blend(v3d
, job
, &blend
->base
, i
);
603 emit_rt_blend(v3d
, job
, &blend
->base
, 0);
608 if (v3d
->dirty
& VC5_DIRTY_BLEND
) {
609 struct pipe_blend_state
*blend
= &v3d
->blend
->base
;
611 cl_emit(&job
->bcl
, COLOR_WRITE_MASKS
, mask
) {
612 for (int i
= 0; i
< 4; i
++) {
613 int rt
= blend
->independent_blend_enable
? i
: 0;
614 int rt_mask
= blend
->rt
[rt
].colormask
;
616 mask
.mask
|= translate_colormask(v3d
, rt_mask
,
622 /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
625 if (v3d
->dirty
& VC5_DIRTY_BLEND_COLOR
||
626 (V3D_VERSION
< 41 && (v3d
->dirty
& VC5_DIRTY_BLEND
))) {
627 cl_emit(&job
->bcl
, BLEND_CONSTANT_COLOR
, color
) {
628 color
.red_f16
= (v3d
->swap_color_rb
?
629 v3d
->blend_color
.hf
[2] :
630 v3d
->blend_color
.hf
[0]);
631 color
.green_f16
= v3d
->blend_color
.hf
[1];
632 color
.blue_f16
= (v3d
->swap_color_rb
?
633 v3d
->blend_color
.hf
[0] :
634 v3d
->blend_color
.hf
[2]);
635 color
.alpha_f16
= v3d
->blend_color
.hf
[3];
639 if (v3d
->dirty
& (VC5_DIRTY_ZSA
| VC5_DIRTY_STENCIL_REF
)) {
640 struct pipe_stencil_state
*front
= &v3d
->zsa
->base
.stencil
[0];
641 struct pipe_stencil_state
*back
= &v3d
->zsa
->base
.stencil
[1];
643 if (front
->enabled
) {
644 cl_emit_with_prepacked(&job
->bcl
, STENCIL_CFG
,
645 v3d
->zsa
->stencil_front
, config
) {
646 config
.stencil_ref_value
=
647 v3d
->stencil_ref
.ref_value
[0];
652 cl_emit_with_prepacked(&job
->bcl
, STENCIL_CFG
,
653 v3d
->zsa
->stencil_back
, config
) {
654 config
.stencil_ref_value
=
655 v3d
->stencil_ref
.ref_value
[1];
661 /* Pre-4.x, we have texture state that depends on both the sampler and
662 * the view, so we merge them together at draw time.
664 if (v3d
->dirty
& VC5_DIRTY_FRAGTEX
)
665 emit_textures(v3d
, &v3d
->tex
[PIPE_SHADER_FRAGMENT
]);
667 if (v3d
->dirty
& VC5_DIRTY_GEOMTEX
)
668 emit_textures(v3d
, &v3d
->tex
[PIPE_SHADER_GEOMETRY
]);
670 if (v3d
->dirty
& VC5_DIRTY_VERTTEX
)
671 emit_textures(v3d
, &v3d
->tex
[PIPE_SHADER_VERTEX
]);
674 if (v3d
->dirty
& VC5_DIRTY_FLAT_SHADE_FLAGS
) {
675 if (!emit_varying_flags(job
,
676 v3d
->prog
.fs
->prog_data
.fs
->flat_shade_flags
,
677 emit_flat_shade_flags
)) {
678 cl_emit(&job
->bcl
, ZERO_ALL_FLAT_SHADE_FLAGS
, flags
);
682 #if V3D_VERSION >= 40
683 if (v3d
->dirty
& VC5_DIRTY_NOPERSPECTIVE_FLAGS
) {
684 if (!emit_varying_flags(job
,
685 v3d
->prog
.fs
->prog_data
.fs
->noperspective_flags
,
686 emit_noperspective_flags
)) {
687 cl_emit(&job
->bcl
, ZERO_ALL_NON_PERSPECTIVE_FLAGS
, flags
);
691 if (v3d
->dirty
& VC5_DIRTY_CENTROID_FLAGS
) {
692 if (!emit_varying_flags(job
,
693 v3d
->prog
.fs
->prog_data
.fs
->centroid_flags
,
694 emit_centroid_flags
)) {
695 cl_emit(&job
->bcl
, ZERO_ALL_CENTROID_FLAGS
, flags
);
700 /* Set up the transform feedback data specs (which VPM entries to
701 * output to which buffers).
703 if (v3d
->dirty
& (VC5_DIRTY_STREAMOUT
|
704 VC5_DIRTY_RASTERIZER
|
705 VC5_DIRTY_PRIM_MODE
)) {
706 struct v3d_streamout_stateobj
*so
= &v3d
->streamout
;
707 if (so
->num_targets
) {
708 bool psiz_per_vertex
= (v3d
->prim_mode
== PIPE_PRIM_POINTS
&&
709 v3d
->rasterizer
->base
.point_size_per_vertex
);
710 struct v3d_uncompiled_shader
*tf_shader
=
712 uint16_t *tf_specs
= (psiz_per_vertex
?
713 tf_shader
->tf_specs_psiz
:
714 tf_shader
->tf_specs
);
716 #if V3D_VERSION >= 40
717 bool tf_enabled
= v3d_transform_feedback_enabled(v3d
);
718 job
->tf_enabled
|= tf_enabled
;
720 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_SPECS
, tfe
) {
721 tfe
.number_of_16_bit_output_data_specs_following
=
722 tf_shader
->num_tf_specs
;
723 tfe
.enable
= tf_enabled
;
725 #else /* V3D_VERSION < 40 */
726 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_ENABLE
, tfe
) {
727 tfe
.number_of_32_bit_output_buffer_address_following
=
729 tfe
.number_of_16_bit_output_data_specs_following
=
730 tf_shader
->num_tf_specs
;
732 #endif /* V3D_VERSION < 40 */
733 for (int i
= 0; i
< tf_shader
->num_tf_specs
; i
++) {
734 cl_emit_prepacked(&job
->bcl
, &tf_specs
[i
]);
737 #if V3D_VERSION >= 40
738 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_SPECS
, tfe
) {
741 #endif /* V3D_VERSION >= 40 */
745 /* Set up the trasnform feedback buffers. */
746 if (v3d
->dirty
& VC5_DIRTY_STREAMOUT
) {
747 struct v3d_uncompiled_shader
*tf_shader
= get_tf_shader(v3d
);
748 struct v3d_streamout_stateobj
*so
= &v3d
->streamout
;
749 for (int i
= 0; i
< so
->num_targets
; i
++) {
750 const struct pipe_stream_output_target
*target
=
752 struct v3d_resource
*rsc
= target
?
753 v3d_resource(target
->buffer
) : NULL
;
754 struct pipe_shader_state
*ss
= &tf_shader
->base
;
755 struct pipe_stream_output_info
*info
= &ss
->stream_output
;
756 uint32_t offset
= (v3d
->streamout
.offsets
[i
] *
757 info
->stride
[i
] * 4);
759 #if V3D_VERSION >= 40
763 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_BUFFER
, output
) {
764 output
.buffer_address
=
766 target
->buffer_offset
+
768 output
.buffer_size_in_32_bit_words
=
769 (target
->buffer_size
- offset
) >> 2;
770 output
.buffer_number
= i
;
772 #else /* V3D_VERSION < 40 */
773 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS
, output
) {
777 target
->buffer_offset
+
781 #endif /* V3D_VERSION < 40 */
783 v3d_job_add_tf_write_resource(v3d
->job
,
786 /* XXX: buffer_size? */
790 if (v3d
->dirty
& VC5_DIRTY_OQ
) {
791 cl_emit(&job
->bcl
, OCCLUSION_QUERY_COUNTER
, counter
) {
792 if (v3d
->active_queries
&& v3d
->current_oq
) {
793 counter
.address
= cl_address(v3d
->current_oq
, 0);
798 #if V3D_VERSION >= 40
799 if (v3d
->dirty
& VC5_DIRTY_SAMPLE_STATE
) {
800 cl_emit(&job
->bcl
, SAMPLE_STATE
, state
) {
801 /* Note: SampleCoverage was handled at the
802 * state_tracker level by converting to sample_mask.
804 state
.coverage
= 1.0;
805 state
.mask
= job
->msaa
? v3d
->sample_mask
: 0xf;