2 * Copyright © 2014-2017 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "vc5_context.h"
27 #include "broadcom/cle/v3d_packet_v33_pack.h"
28 #include "broadcom/compiler/v3d_compiler.h"
31 vc5_factor(enum pipe_blendfactor factor
, bool dst_alpha_one
)
33 /* We may get a bad blendfactor when blending is disabled. */
35 return V3D_BLEND_FACTOR_ZERO
;
38 case PIPE_BLENDFACTOR_ZERO
:
39 return V3D_BLEND_FACTOR_ZERO
;
40 case PIPE_BLENDFACTOR_ONE
:
41 return V3D_BLEND_FACTOR_ONE
;
42 case PIPE_BLENDFACTOR_SRC_COLOR
:
43 return V3D_BLEND_FACTOR_SRC_COLOR
;
44 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
45 return V3D_BLEND_FACTOR_INV_SRC_COLOR
;
46 case PIPE_BLENDFACTOR_DST_COLOR
:
47 return V3D_BLEND_FACTOR_DST_COLOR
;
48 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
49 return V3D_BLEND_FACTOR_INV_DST_COLOR
;
50 case PIPE_BLENDFACTOR_SRC_ALPHA
:
51 return V3D_BLEND_FACTOR_SRC_ALPHA
;
52 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
53 return V3D_BLEND_FACTOR_INV_SRC_ALPHA
;
54 case PIPE_BLENDFACTOR_DST_ALPHA
:
55 return (dst_alpha_one
?
56 V3D_BLEND_FACTOR_ONE
:
57 V3D_BLEND_FACTOR_DST_ALPHA
);
58 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
59 return (dst_alpha_one
?
60 V3D_BLEND_FACTOR_ZERO
:
61 V3D_BLEND_FACTOR_INV_DST_ALPHA
);
62 case PIPE_BLENDFACTOR_CONST_COLOR
:
63 return V3D_BLEND_FACTOR_CONST_COLOR
;
64 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
65 return V3D_BLEND_FACTOR_INV_CONST_COLOR
;
66 case PIPE_BLENDFACTOR_CONST_ALPHA
:
67 return V3D_BLEND_FACTOR_CONST_ALPHA
;
68 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
69 return V3D_BLEND_FACTOR_INV_CONST_ALPHA
;
70 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
71 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE
;
73 unreachable("Bad blend factor");
77 static inline uint16_t
78 swizzled_border_color(struct pipe_sampler_state
*sampler
,
79 struct vc5_sampler_view
*sview
,
82 const struct util_format_description
*desc
=
83 util_format_description(sview
->base
.format
);
86 /* If we're doing swizzling in the sampler, then only rearrange the
87 * border color for the mismatch between the VC5 texture format and
88 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
89 * the sampler's swizzle.
91 * For swizzling in the shader, we don't do any pre-swizzling of the
94 if (vc5_get_tex_return_size(sview
->base
.format
) != 32)
95 swiz
= desc
->swizzle
[swiz
];
99 return util_float_to_half(0.0);
101 return util_float_to_half(1.0);
103 return util_float_to_half(sampler
->border_color
.f
[swiz
]);
108 emit_one_texture(struct vc5_context
*vc5
, struct vc5_texture_stateobj
*stage_tex
,
111 struct vc5_job
*job
= vc5
->job
;
112 struct pipe_sampler_state
*psampler
= stage_tex
->samplers
[i
];
113 struct vc5_sampler_state
*sampler
= vc5_sampler_state(psampler
);
114 struct pipe_sampler_view
*psview
= stage_tex
->textures
[i
];
115 struct vc5_sampler_view
*sview
= vc5_sampler_view(psview
);
116 struct pipe_resource
*prsc
= psview
->texture
;
117 struct vc5_resource
*rsc
= vc5_resource(prsc
);
119 stage_tex
->texture_state
[i
].offset
=
120 vc5_cl_ensure_space(&job
->indirect
,
121 cl_packet_length(TEXTURE_SHADER_STATE
),
123 vc5_bo_set_reference(&stage_tex
->texture_state
[i
].bo
,
126 struct V3D33_TEXTURE_SHADER_STATE unpacked
= {
128 .border_color_red
= swizzled_border_color(psampler
, sview
, 0),
129 .border_color_green
= swizzled_border_color(psampler
, sview
, 1),
130 .border_color_blue
= swizzled_border_color(psampler
, sview
, 2),
131 .border_color_alpha
= swizzled_border_color(psampler
, sview
, 3),
133 /* In the normal texturing path, the LOD gets clamped between
134 * min/max, and the base_level field (set in the sampler view
135 * from first_level) only decides where the min/mag switch
136 * happens, so we need to use the LOD clamps to keep us
137 * between min and max.
139 * For txf, the LOD clamp is still used, despite GL not
140 * wanting that. We will need to have a separate
141 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
142 * support txf properly.
144 .min_level_of_detail
= MIN2(psview
->u
.tex
.first_level
+
145 MAX2(psampler
->min_lod
, 0),
146 psview
->u
.tex
.last_level
),
147 .max_level_of_detail
= MIN2(psview
->u
.tex
.first_level
+
149 psview
->u
.tex
.last_level
),
151 .texture_base_pointer
= cl_address(rsc
->bo
,
152 rsc
->slices
[0].offset
),
155 int min_img_filter
= psampler
->min_img_filter
;
156 int min_mip_filter
= psampler
->min_mip_filter
;
157 int mag_img_filter
= psampler
->mag_img_filter
;
159 if (vc5_get_tex_return_size(psview
->format
) == 32) {
160 min_mip_filter
= PIPE_TEX_MIPFILTER_NEAREST
;
161 mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
162 mag_img_filter
= PIPE_TEX_FILTER_NEAREST
;
165 bool min_nearest
= min_img_filter
== PIPE_TEX_FILTER_NEAREST
;
166 switch (min_mip_filter
) {
167 case PIPE_TEX_MIPFILTER_NONE
:
168 unpacked
.filter
+= min_nearest
? 2 : 0;
170 case PIPE_TEX_MIPFILTER_NEAREST
:
171 unpacked
.filter
+= min_nearest
? 4 : 8;
173 case PIPE_TEX_MIPFILTER_LINEAR
:
174 unpacked
.filter
+= min_nearest
? 4 : 8;
175 unpacked
.filter
+= 2;
179 if (mag_img_filter
== PIPE_TEX_FILTER_NEAREST
)
182 if (psampler
->max_anisotropy
> 8)
183 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_16_1
;
184 else if (psampler
->max_anisotropy
> 4)
185 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_8_1
;
186 else if (psampler
->max_anisotropy
> 2)
187 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_4_1
;
188 else if (psampler
->max_anisotropy
)
189 unpacked
.filter
= V3D_TMU_FILTER_ANISOTROPIC_2_1
;
191 uint8_t packed
[cl_packet_length(TEXTURE_SHADER_STATE
)];
192 cl_packet_pack(TEXTURE_SHADER_STATE
)(&job
->indirect
, packed
, &unpacked
);
194 for (int i
= 0; i
< ARRAY_SIZE(packed
); i
++)
195 packed
[i
] |= sview
->texture_shader_state
[i
] | sampler
->texture_shader_state
[i
];
197 /* TMU indirect structs need to be 32b aligned. */
198 vc5_cl_ensure_space(&job
->indirect
, ARRAY_SIZE(packed
), 32);
199 cl_emit_prepacked(&job
->indirect
, &packed
);
203 emit_textures(struct vc5_context
*vc5
, struct vc5_texture_stateobj
*stage_tex
)
205 for (int i
= 0; i
< stage_tex
->num_textures
; i
++) {
206 if (stage_tex
->textures
[i
])
207 emit_one_texture(vc5
, stage_tex
, i
);
212 translate_colormask(struct vc5_context
*vc5
, uint32_t colormask
, int rt
)
214 if (vc5
->swap_color_rb
& (1 << rt
)) {
215 colormask
= ((colormask
& (2 | 8)) |
216 ((colormask
& 1) << 2) |
217 ((colormask
& 4) >> 2));
220 return (~colormask
) & 0xf;
224 vc5_emit_state(struct pipe_context
*pctx
)
226 struct vc5_context
*vc5
= vc5_context(pctx
);
227 struct vc5_job
*job
= vc5
->job
;
229 if (vc5
->dirty
& (VC5_DIRTY_SCISSOR
| VC5_DIRTY_VIEWPORT
|
230 VC5_DIRTY_RASTERIZER
)) {
231 float *vpscale
= vc5
->viewport
.scale
;
232 float *vptranslate
= vc5
->viewport
.translate
;
233 float vp_minx
= -fabsf(vpscale
[0]) + vptranslate
[0];
234 float vp_maxx
= fabsf(vpscale
[0]) + vptranslate
[0];
235 float vp_miny
= -fabsf(vpscale
[1]) + vptranslate
[1];
236 float vp_maxy
= fabsf(vpscale
[1]) + vptranslate
[1];
238 /* Clip to the scissor if it's enabled, but still clip to the
239 * drawable regardless since that controls where the binner
240 * tries to put things.
242 * Additionally, always clip the rendering to the viewport,
243 * since the hardware does guardband clipping, meaning
244 * primitives would rasterize outside of the view volume.
246 uint32_t minx
, miny
, maxx
, maxy
;
247 if (!vc5
->rasterizer
->base
.scissor
) {
248 minx
= MAX2(vp_minx
, 0);
249 miny
= MAX2(vp_miny
, 0);
250 maxx
= MIN2(vp_maxx
, job
->draw_width
);
251 maxy
= MIN2(vp_maxy
, job
->draw_height
);
253 minx
= MAX2(vp_minx
, vc5
->scissor
.minx
);
254 miny
= MAX2(vp_miny
, vc5
->scissor
.miny
);
255 maxx
= MIN2(vp_maxx
, vc5
->scissor
.maxx
);
256 maxy
= MIN2(vp_maxy
, vc5
->scissor
.maxy
);
259 cl_emit(&job
->bcl
, CLIP_WINDOW
, clip
) {
260 clip
.clip_window_left_pixel_coordinate
= minx
;
261 clip
.clip_window_bottom_pixel_coordinate
= miny
;
262 clip
.clip_window_width_in_pixels
= maxx
- minx
;
263 clip
.clip_window_height_in_pixels
= maxy
- miny
;
266 job
->draw_min_x
= MIN2(job
->draw_min_x
, minx
);
267 job
->draw_min_y
= MIN2(job
->draw_min_y
, miny
);
268 job
->draw_max_x
= MAX2(job
->draw_max_x
, maxx
);
269 job
->draw_max_y
= MAX2(job
->draw_max_y
, maxy
);
272 if (vc5
->dirty
& (VC5_DIRTY_RASTERIZER
|
275 VC5_DIRTY_COMPILED_FS
)) {
276 cl_emit(&job
->bcl
, CONFIGURATION_BITS
, config
) {
277 config
.enable_forward_facing_primitive
=
278 !vc5
->rasterizer
->base
.rasterizer_discard
&&
279 !(vc5
->rasterizer
->base
.cull_face
&
281 config
.enable_reverse_facing_primitive
=
282 !vc5
->rasterizer
->base
.rasterizer_discard
&&
283 !(vc5
->rasterizer
->base
.cull_face
&
285 /* This seems backwards, but it's what gets the
286 * clipflat test to pass.
288 config
.clockwise_primitives
=
289 vc5
->rasterizer
->base
.front_ccw
;
291 config
.enable_depth_offset
=
292 vc5
->rasterizer
->base
.offset_tri
;
294 config
.rasterizer_oversample_mode
=
295 vc5
->rasterizer
->base
.multisample
;
297 config
.direct3d_provoking_vertex
=
298 vc5
->rasterizer
->base
.flatshade_first
;
300 config
.blend_enable
= vc5
->blend
->rt
[0].blend_enable
;
302 config
.early_z_updates_enable
= true;
303 if (vc5
->zsa
->base
.depth
.enabled
) {
304 config
.z_updates_enable
=
305 vc5
->zsa
->base
.depth
.writemask
;
306 config
.early_z_enable
=
307 (vc5
->zsa
->early_z_enable
&&
308 !vc5
->prog
.fs
->prog_data
.fs
->writes_z
);
309 config
.depth_test_function
=
310 vc5
->zsa
->base
.depth
.func
;
312 config
.depth_test_function
= PIPE_FUNC_ALWAYS
;
315 config
.stencil_enable
=
316 vc5
->zsa
->base
.stencil
[0].enabled
;
321 if (vc5
->dirty
& VC5_DIRTY_RASTERIZER
&&
322 vc5
->rasterizer
->base
.offset_tri
) {
323 cl_emit(&job
->bcl
, DEPTH_OFFSET
, depth
) {
324 depth
.depth_offset_factor
=
325 vc5
->rasterizer
->offset_factor
;
326 depth
.depth_offset_units
=
327 vc5
->rasterizer
->offset_units
;
331 if (vc5
->dirty
& VC5_DIRTY_RASTERIZER
) {
332 cl_emit(&job
->bcl
, POINT_SIZE
, point_size
) {
333 point_size
.point_size
= vc5
->rasterizer
->point_size
;
336 cl_emit(&job
->bcl
, LINE_WIDTH
, line_width
) {
337 line_width
.line_width
= vc5
->rasterizer
->base
.line_width
;
341 if (vc5
->dirty
& VC5_DIRTY_VIEWPORT
) {
342 cl_emit(&job
->bcl
, CLIPPER_XY_SCALING
, clip
) {
343 clip
.viewport_half_width_in_1_256th_of_pixel
=
344 vc5
->viewport
.scale
[0] * 256.0f
;
345 clip
.viewport_half_height_in_1_256th_of_pixel
=
346 vc5
->viewport
.scale
[1] * 256.0f
;
349 cl_emit(&job
->bcl
, CLIPPER_Z_SCALE_AND_OFFSET
, clip
) {
350 clip
.viewport_z_offset_zc_to_zs
=
351 vc5
->viewport
.translate
[2];
352 clip
.viewport_z_scale_zc_to_zs
=
353 vc5
->viewport
.scale
[2];
355 cl_emit(&job
->bcl
, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES
, clip
) {
356 clip
.minimum_zw
= (vc5
->viewport
.translate
[2] -
357 vc5
->viewport
.scale
[2]);
358 clip
.maximum_zw
= (vc5
->viewport
.translate
[2] +
359 vc5
->viewport
.scale
[2]);
362 cl_emit(&job
->bcl
, VIEWPORT_OFFSET
, vp
) {
363 vp
.viewport_centre_x_coordinate
=
364 vc5
->viewport
.translate
[0];
365 vp
.viewport_centre_y_coordinate
=
366 vc5
->viewport
.translate
[1];
370 if (vc5
->dirty
& VC5_DIRTY_BLEND
&& vc5
->blend
->rt
[0].blend_enable
) {
371 struct pipe_blend_state
*blend
= vc5
->blend
;
373 cl_emit(&job
->bcl
, BLEND_CONFIG
, config
) {
374 struct pipe_rt_blend_state
*rtblend
= &blend
->rt
[0];
376 config
.colour_blend_mode
= rtblend
->rgb_func
;
377 config
.colour_blend_dst_factor
=
378 vc5_factor(rtblend
->rgb_dst_factor
,
379 vc5
->blend_dst_alpha_one
);
380 config
.colour_blend_src_factor
=
381 vc5_factor(rtblend
->rgb_src_factor
,
382 vc5
->blend_dst_alpha_one
);
384 config
.alpha_blend_mode
= rtblend
->alpha_func
;
385 config
.alpha_blend_dst_factor
=
386 vc5_factor(rtblend
->alpha_dst_factor
,
387 vc5
->blend_dst_alpha_one
);
388 config
.alpha_blend_src_factor
=
389 vc5_factor(rtblend
->alpha_src_factor
,
390 vc5
->blend_dst_alpha_one
);
394 if (vc5
->dirty
& VC5_DIRTY_BLEND
) {
395 struct pipe_blend_state
*blend
= vc5
->blend
;
397 cl_emit(&job
->bcl
, COLOUR_WRITE_MASKS
, mask
) {
398 if (blend
->independent_blend_enable
) {
399 mask
.render_target_0_per_colour_component_write_masks
=
400 translate_colormask(vc5
, blend
->rt
[0].colormask
, 0);
401 mask
.render_target_1_per_colour_component_write_masks
=
402 translate_colormask(vc5
, blend
->rt
[1].colormask
, 1);
403 mask
.render_target_2_per_colour_component_write_masks
=
404 translate_colormask(vc5
, blend
->rt
[2].colormask
, 2);
405 mask
.render_target_3_per_colour_component_write_masks
=
406 translate_colormask(vc5
, blend
->rt
[3].colormask
, 3);
408 mask
.render_target_0_per_colour_component_write_masks
=
409 translate_colormask(vc5
, blend
->rt
[0].colormask
, 0);
410 mask
.render_target_1_per_colour_component_write_masks
=
411 translate_colormask(vc5
, blend
->rt
[0].colormask
, 1);
412 mask
.render_target_2_per_colour_component_write_masks
=
413 translate_colormask(vc5
, blend
->rt
[0].colormask
, 2);
414 mask
.render_target_3_per_colour_component_write_masks
=
415 translate_colormask(vc5
, blend
->rt
[0].colormask
, 3);
420 if (vc5
->dirty
& VC5_DIRTY_BLEND_COLOR
) {
421 cl_emit(&job
->bcl
, BLEND_CONSTANT_COLOUR
, colour
) {
422 colour
.red_f16
= (vc5
->swap_color_rb
?
423 vc5
->blend_color
.hf
[2] :
424 vc5
->blend_color
.hf
[0]);
425 colour
.green_f16
= vc5
->blend_color
.hf
[1];
426 colour
.blue_f16
= (vc5
->swap_color_rb
?
427 vc5
->blend_color
.hf
[0] :
428 vc5
->blend_color
.hf
[2]);
429 colour
.alpha_f16
= vc5
->blend_color
.hf
[3];
433 if (vc5
->dirty
& (VC5_DIRTY_ZSA
| VC5_DIRTY_STENCIL_REF
)) {
434 struct pipe_stencil_state
*front
= &vc5
->zsa
->base
.stencil
[0];
435 struct pipe_stencil_state
*back
= &vc5
->zsa
->base
.stencil
[1];
437 if (front
->enabled
) {
438 cl_emit_with_prepacked(&job
->bcl
, STENCIL_CONFIG
,
439 vc5
->zsa
->stencil_front
, config
) {
440 config
.stencil_ref_value
=
441 vc5
->stencil_ref
.ref_value
[0];
446 cl_emit_with_prepacked(&job
->bcl
, STENCIL_CONFIG
,
447 vc5
->zsa
->stencil_back
, config
) {
448 config
.stencil_ref_value
=
449 vc5
->stencil_ref
.ref_value
[1];
454 if (vc5
->dirty
& VC5_DIRTY_FRAGTEX
)
455 emit_textures(vc5
, &vc5
->fragtex
);
457 if (vc5
->dirty
& VC5_DIRTY_VERTTEX
)
458 emit_textures(vc5
, &vc5
->verttex
);
460 if (vc5
->dirty
& VC5_DIRTY_FLAT_SHADE_FLAGS
) {
461 /* XXX: Need to handle more than 24 entries. */
462 cl_emit(&job
->bcl
, FLAT_SHADE_FLAGS
, flags
) {
463 flags
.varying_offset_v0
= 0;
465 flags
.flat_shade_flags_for_varyings_v024
=
466 vc5
->prog
.fs
->prog_data
.fs
->flat_shade_flags
[0] & 0xfffff;
468 if (vc5
->rasterizer
->base
.flatshade
) {
469 flags
.flat_shade_flags_for_varyings_v024
|=
470 vc5
->prog
.fs
->prog_data
.fs
->shade_model_flags
[0] & 0xfffff;
475 if (vc5
->dirty
& VC5_DIRTY_STREAMOUT
) {
476 struct vc5_streamout_stateobj
*so
= &vc5
->streamout
;
478 if (so
->num_targets
) {
479 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_ENABLE
, tfe
) {
480 tfe
.number_of_32_bit_output_buffer_address_following
=
482 tfe
.number_of_16_bit_output_data_specs_following
=
483 vc5
->prog
.bind_vs
->num_tf_specs
;
486 for (int i
= 0; i
< vc5
->prog
.bind_vs
->num_tf_specs
; i
++) {
487 cl_emit_prepacked(&job
->bcl
,
488 &vc5
->prog
.bind_vs
->tf_specs
[i
]);
491 for (int i
= 0; i
< so
->num_targets
; i
++) {
492 const struct pipe_stream_output_target
*target
=
494 struct vc5_resource
*rsc
=
495 vc5_resource(target
->buffer
);
497 cl_emit(&job
->bcl
, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS
, output
) {
500 target
->buffer_offset
);
503 vc5_job_add_write_resource(vc5
->job
,
505 /* XXX: buffer_size? */
512 if (vc5
->dirty
& VC5_DIRTY_OQ
) {
513 cl_emit(&job
->bcl
, OCCLUSION_QUERY_COUNTER
, counter
) {
514 job
->oq_enabled
= vc5
->active_queries
&& vc5
->current_oq
;
515 if (job
->oq_enabled
) {
516 counter
.address
= cl_address(vc5
->current_oq
, 0);