broadcom/vc5: Do BGRA vs RGBA swapping for the BLEND_CONSTANT_COLOR.
[mesa.git] / src / gallium / drivers / vc5 / vc5_emit.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "vc5_context.h"
27 #include "broadcom/cle/v3d_packet_v33_pack.h"
28 #include "broadcom/compiler/v3d_compiler.h"
29
30 static uint8_t
31 vc5_factor(enum pipe_blendfactor factor)
32 {
33 /* We may get a bad blendfactor when blending is disabled. */
34 if (factor == 0)
35 return V3D_BLEND_FACTOR_ZERO;
36
37 switch (factor) {
38 case PIPE_BLENDFACTOR_ZERO:
39 return V3D_BLEND_FACTOR_ZERO;
40 case PIPE_BLENDFACTOR_ONE:
41 return V3D_BLEND_FACTOR_ONE;
42 case PIPE_BLENDFACTOR_SRC_COLOR:
43 return V3D_BLEND_FACTOR_SRC_COLOR;
44 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
45 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
46 case PIPE_BLENDFACTOR_DST_COLOR:
47 return V3D_BLEND_FACTOR_DST_COLOR;
48 case PIPE_BLENDFACTOR_INV_DST_COLOR:
49 return V3D_BLEND_FACTOR_INV_DST_COLOR;
50 case PIPE_BLENDFACTOR_SRC_ALPHA:
51 return V3D_BLEND_FACTOR_SRC_ALPHA;
52 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
53 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
54 case PIPE_BLENDFACTOR_DST_ALPHA:
55 return V3D_BLEND_FACTOR_DST_ALPHA;
56 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
57 return V3D_BLEND_FACTOR_INV_DST_ALPHA;
58 case PIPE_BLENDFACTOR_CONST_COLOR:
59 return V3D_BLEND_FACTOR_CONST_COLOR;
60 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
61 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
62 case PIPE_BLENDFACTOR_CONST_ALPHA:
63 return V3D_BLEND_FACTOR_CONST_ALPHA;
64 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
65 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
66 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
67 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
68 default:
69 unreachable("Bad blend factor");
70 }
71 }
72
73 static inline uint16_t
74 swizzled_border_color(struct pipe_sampler_state *sampler,
75 struct vc5_sampler_view *sview,
76 int chan)
77 {
78 const struct util_format_description *desc =
79 util_format_description(sview->base.format);
80 uint8_t swiz = chan;
81
82 /* If we're doing swizzling in the sampler, then only rearrange the
83 * border color for the mismatch between the VC5 texture format and
84 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
85 * the sampler's swizzle.
86 *
87 * For swizzling in the shader, we don't do any pre-swizzling of the
88 * border color.
89 */
90 if (vc5_get_tex_return_size(sview->base.format) != 32)
91 swiz = desc->swizzle[swiz];
92
93 switch (swiz) {
94 case PIPE_SWIZZLE_0:
95 return util_float_to_half(0.0);
96 case PIPE_SWIZZLE_1:
97 return util_float_to_half(1.0);
98 default:
99 return util_float_to_half(sampler->border_color.f[swiz]);
100 }
101 }
102
103 static void
104 emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
105 int i)
106 {
107 struct vc5_job *job = vc5->job;
108 struct pipe_sampler_state *psampler = stage_tex->samplers[i];
109 struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
110 struct pipe_sampler_view *psview = stage_tex->textures[i];
111 struct vc5_sampler_view *sview = vc5_sampler_view(psview);
112 struct pipe_resource *prsc = psview->texture;
113 struct vc5_resource *rsc = vc5_resource(prsc);
114
115 stage_tex->texture_state[i].offset =
116 vc5_cl_ensure_space(&job->indirect,
117 cl_packet_length(TEXTURE_SHADER_STATE),
118 32);
119 vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
120 job->indirect.bo);
121
122 struct V3D33_TEXTURE_SHADER_STATE unpacked = {
123 /* XXX */
124 .border_color_red = swizzled_border_color(psampler, sview, 0),
125 .border_color_green = swizzled_border_color(psampler, sview, 1),
126 .border_color_blue = swizzled_border_color(psampler, sview, 2),
127 .border_color_alpha = swizzled_border_color(psampler, sview, 3),
128
129 /* XXX: Disable min/maxlod for txf */
130 .max_level_of_detail = MIN2(MIN2(psampler->max_lod,
131 VC5_MAX_MIP_LEVELS),
132 psview->u.tex.last_level),
133
134 .texture_base_pointer = cl_address(rsc->bo,
135 rsc->slices[0].offset),
136 };
137
138 int min_img_filter = psampler->min_img_filter;
139 int min_mip_filter = psampler->min_mip_filter;
140 int mag_img_filter = psampler->mag_img_filter;
141
142 if (vc5_get_tex_return_size(psview->format) == 32) {
143 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
144 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
145 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
146 }
147
148 bool min_nearest = (min_img_filter == PIPE_TEX_FILTER_NEAREST);
149 switch (min_mip_filter) {
150 case PIPE_TEX_MIPFILTER_NONE:
151 unpacked.minification_filter = 0 + min_nearest;
152 break;
153 case PIPE_TEX_MIPFILTER_NEAREST:
154 unpacked.minification_filter = 2 + !min_nearest;
155 break;
156 case PIPE_TEX_MIPFILTER_LINEAR:
157 unpacked.minification_filter = 4 + !min_nearest;
158 break;
159 }
160 unpacked.magnification_filter = (mag_img_filter ==
161 PIPE_TEX_FILTER_NEAREST);
162
163 uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
164 cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
165
166 for (int i = 0; i < ARRAY_SIZE(packed); i++)
167 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
168
169 cl_emit_prepacked(&job->indirect, &packed);
170 }
171
172 static void
173 emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
174 {
175 for (int i = 0; i < stage_tex->num_textures; i++)
176 emit_one_texture(vc5, stage_tex, i);
177 }
178
179 void
180 vc5_emit_state(struct pipe_context *pctx)
181 {
182 struct vc5_context *vc5 = vc5_context(pctx);
183 struct vc5_job *job = vc5->job;
184
185 if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
186 VC5_DIRTY_RASTERIZER)) {
187 float *vpscale = vc5->viewport.scale;
188 float *vptranslate = vc5->viewport.translate;
189 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
190 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
191 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
192 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
193
194 /* Clip to the scissor if it's enabled, but still clip to the
195 * drawable regardless since that controls where the binner
196 * tries to put things.
197 *
198 * Additionally, always clip the rendering to the viewport,
199 * since the hardware does guardband clipping, meaning
200 * primitives would rasterize outside of the view volume.
201 */
202 uint32_t minx, miny, maxx, maxy;
203 if (!vc5->rasterizer->base.scissor) {
204 minx = MAX2(vp_minx, 0);
205 miny = MAX2(vp_miny, 0);
206 maxx = MIN2(vp_maxx, job->draw_width);
207 maxy = MIN2(vp_maxy, job->draw_height);
208 } else {
209 minx = MAX2(vp_minx, vc5->scissor.minx);
210 miny = MAX2(vp_miny, vc5->scissor.miny);
211 maxx = MIN2(vp_maxx, vc5->scissor.maxx);
212 maxy = MIN2(vp_maxy, vc5->scissor.maxy);
213 }
214
215 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
216 clip.clip_window_left_pixel_coordinate = minx;
217 clip.clip_window_bottom_pixel_coordinate = miny;
218 clip.clip_window_width_in_pixels = maxx - minx;
219 clip.clip_window_height_in_pixels = maxy - miny;
220 }
221
222 job->draw_min_x = MIN2(job->draw_min_x, minx);
223 job->draw_min_y = MIN2(job->draw_min_y, miny);
224 job->draw_max_x = MAX2(job->draw_max_x, maxx);
225 job->draw_max_y = MAX2(job->draw_max_y, maxy);
226 }
227
228 if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
229 VC5_DIRTY_ZSA |
230 VC5_DIRTY_BLEND |
231 VC5_DIRTY_COMPILED_FS)) {
232 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
233 config.enable_forward_facing_primitive =
234 !(vc5->rasterizer->base.cull_face &
235 PIPE_FACE_FRONT);
236 config.enable_reverse_facing_primitive =
237 !(vc5->rasterizer->base.cull_face &
238 PIPE_FACE_BACK);
239 /* This seems backwards, but it's what gets the
240 * clipflat test to pass.
241 */
242 config.clockwise_primitives =
243 vc5->rasterizer->base.front_ccw;
244
245 config.enable_depth_offset =
246 vc5->rasterizer->base.offset_tri;
247
248 config.rasterizer_oversample_mode =
249 vc5->rasterizer->base.multisample;
250
251 config.direct3d_provoking_vertex =
252 vc5->rasterizer->base.flatshade_first;
253
254 config.blend_enable = vc5->blend->rt[0].blend_enable;
255
256 config.early_z_updates_enable = true;
257 if (vc5->zsa->base.depth.enabled) {
258 config.z_updates_enable =
259 vc5->zsa->base.depth.writemask;
260 config.early_z_enable =
261 vc5->zsa->early_z_enable;
262 config.depth_test_function =
263 vc5->zsa->base.depth.func;
264 } else {
265 config.depth_test_function = PIPE_FUNC_ALWAYS;
266 }
267
268 config.stencil_enable =
269 vc5->zsa->base.stencil[0].enabled;
270 }
271
272 }
273
274 if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
275 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
276 depth.depth_offset_factor =
277 vc5->rasterizer->offset_factor;
278 depth.depth_offset_units =
279 vc5->rasterizer->offset_units;
280 }
281
282 cl_emit(&job->bcl, POINT_SIZE, point_size) {
283 point_size.point_size = vc5->rasterizer->point_size;
284 }
285
286 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
287 line_width.line_width = vc5->rasterizer->base.line_width;
288 }
289 }
290
291 if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
292 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
293 clip.viewport_half_width_in_1_256th_of_pixel =
294 vc5->viewport.scale[0] * 256.0f;
295 clip.viewport_half_height_in_1_256th_of_pixel =
296 vc5->viewport.scale[1] * 256.0f;
297 }
298
299 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
300 clip.viewport_z_offset_zc_to_zs =
301 vc5->viewport.translate[2];
302 clip.viewport_z_scale_zc_to_zs =
303 vc5->viewport.scale[2];
304 }
305 if (0 /* XXX */) {
306 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
307 clip.minimum_zw = (vc5->viewport.translate[2] -
308 vc5->viewport.scale[2]);
309 clip.maximum_zw = (vc5->viewport.translate[2] +
310 vc5->viewport.scale[2]);
311 }
312 }
313
314 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
315 vp.viewport_centre_x_coordinate =
316 vc5->viewport.translate[0];
317 vp.viewport_centre_y_coordinate =
318 vc5->viewport.translate[1];
319 }
320 }
321
322 if (vc5->dirty & VC5_DIRTY_BLEND) {
323 struct pipe_blend_state *blend = vc5->blend;
324
325 cl_emit(&job->bcl, BLEND_CONFIG, config) {
326 struct pipe_rt_blend_state *rtblend = &blend->rt[0];
327
328 config.colour_blend_mode = rtblend->rgb_func;
329 config.colour_blend_dst_factor =
330 vc5_factor(rtblend->rgb_dst_factor);
331 config.colour_blend_src_factor =
332 vc5_factor(rtblend->rgb_src_factor);
333
334 config.alpha_blend_mode = rtblend->alpha_func;
335 config.alpha_blend_dst_factor =
336 vc5_factor(rtblend->alpha_dst_factor);
337 config.alpha_blend_src_factor =
338 vc5_factor(rtblend->alpha_src_factor);
339 }
340
341 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
342 if (blend->independent_blend_enable) {
343 mask.render_target_0_per_colour_component_write_masks =
344 (~blend->rt[0].colormask) & 0xf;
345 mask.render_target_1_per_colour_component_write_masks =
346 (~blend->rt[1].colormask) & 0xf;
347 mask.render_target_2_per_colour_component_write_masks =
348 (~blend->rt[2].colormask) & 0xf;
349 mask.render_target_3_per_colour_component_write_masks =
350 (~blend->rt[3].colormask) & 0xf;
351 } else {
352 uint8_t colormask = (~blend->rt[0].colormask) & 0xf;
353 mask.render_target_0_per_colour_component_write_masks = colormask;
354 mask.render_target_1_per_colour_component_write_masks = colormask;
355 mask.render_target_2_per_colour_component_write_masks = colormask;
356 mask.render_target_3_per_colour_component_write_masks = colormask;
357 }
358 }
359 }
360
361 if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
362 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
363 colour.red_f16 = (vc5->swap_color_rb ?
364 vc5->blend_color.hf[2] :
365 vc5->blend_color.hf[0]);
366 colour.green_f16 = vc5->blend_color.hf[1];
367 colour.blue_f16 = (vc5->swap_color_rb ?
368 vc5->blend_color.hf[0] :
369 vc5->blend_color.hf[2]);
370 colour.alpha_f16 = vc5->blend_color.hf[3];
371 }
372 }
373
374 if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
375 struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
376 struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
377
378 cl_emit(&job->bcl, STENCIL_CONFIG, config) {
379 config.front_config = true;
380 config.back_config = !back->enabled;
381
382 config.stencil_write_mask = front->writemask;
383 config.stencil_test_mask = front->valuemask;
384
385 config.stencil_test_function = front->func;
386 config.stencil_pass_op = front->zpass_op;
387 config.depth_test_fail_op = front->zfail_op;
388 config.stencil_test_fail_op = front->fail_op;
389
390 config.stencil_ref_value = vc5->stencil_ref.ref_value[0];
391 }
392
393 if (back->enabled) {
394 cl_emit(&job->bcl, STENCIL_CONFIG, config) {
395 config.front_config = false;
396 config.back_config = true;
397
398 config.stencil_write_mask = back->writemask;
399 config.stencil_test_mask = back->valuemask;
400
401 config.stencil_test_function = back->func;
402 config.stencil_pass_op = back->zpass_op;
403 config.depth_test_fail_op = back->zfail_op;
404 config.stencil_test_fail_op = back->fail_op;
405
406 config.stencil_ref_value =
407 vc5->stencil_ref.ref_value[1];
408 }
409 }
410 }
411
412 if (vc5->dirty & VC5_DIRTY_FRAGTEX)
413 emit_textures(vc5, &vc5->fragtex);
414
415 if (vc5->dirty & VC5_DIRTY_VERTTEX)
416 emit_textures(vc5, &vc5->verttex);
417
418 if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
419 /* XXX: Need to handle more than 24 entries. */
420 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
421 flags.varying_offset_v0 = 0;
422
423 flags.flat_shade_flags_for_varyings_v024 =
424 vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
425
426 if (vc5->rasterizer->base.flatshade) {
427 flags.flat_shade_flags_for_varyings_v024 |=
428 vc5->prog.fs->prog_data.fs->shade_model_flags[0] & 0xfffff;
429 }
430 }
431 }
432
433 if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
434 struct vc5_streamout_stateobj *so = &vc5->streamout;
435
436 if (so->num_targets) {
437 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
438 tfe.number_of_32_bit_output_buffer_address_following =
439 so->num_targets;
440 tfe.number_of_16_bit_output_data_specs_following =
441 vc5->prog.bind_vs->num_tf_specs;
442 };
443
444 for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
445 cl_emit_prepacked(&job->bcl,
446 &vc5->prog.bind_vs->tf_specs[i]);
447 }
448
449 for (int i = 0; i < so->num_targets; i++) {
450 const struct pipe_stream_output_target *target =
451 so->targets[i];
452 struct vc5_resource *rsc =
453 vc5_resource(target->buffer);
454
455 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
456 output.address =
457 cl_address(rsc->bo,
458 target->buffer_offset);
459 };
460
461 vc5_job_add_write_resource(vc5->job,
462 target->buffer);
463 /* XXX: buffer_size? */
464 }
465 } else {
466 /* XXX? */
467 }
468 }
469 }