ee4849a296f2237ea98e41bec3aa15209eaa464d
[mesa.git] / src / gallium / drivers / v3d / v3dx_emit.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/compiler/v3d_compiler.h"
30
31 static uint8_t
32 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
33 {
34 /* We may get a bad blendfactor when blending is disabled. */
35 if (factor == 0)
36 return V3D_BLEND_FACTOR_ZERO;
37
38 switch (factor) {
39 case PIPE_BLENDFACTOR_ZERO:
40 return V3D_BLEND_FACTOR_ZERO;
41 case PIPE_BLENDFACTOR_ONE:
42 return V3D_BLEND_FACTOR_ONE;
43 case PIPE_BLENDFACTOR_SRC_COLOR:
44 return V3D_BLEND_FACTOR_SRC_COLOR;
45 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
46 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
47 case PIPE_BLENDFACTOR_DST_COLOR:
48 return V3D_BLEND_FACTOR_DST_COLOR;
49 case PIPE_BLENDFACTOR_INV_DST_COLOR:
50 return V3D_BLEND_FACTOR_INV_DST_COLOR;
51 case PIPE_BLENDFACTOR_SRC_ALPHA:
52 return V3D_BLEND_FACTOR_SRC_ALPHA;
53 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
54 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
55 case PIPE_BLENDFACTOR_DST_ALPHA:
56 return (dst_alpha_one ?
57 V3D_BLEND_FACTOR_ONE :
58 V3D_BLEND_FACTOR_DST_ALPHA);
59 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
60 return (dst_alpha_one ?
61 V3D_BLEND_FACTOR_ZERO :
62 V3D_BLEND_FACTOR_INV_DST_ALPHA);
63 case PIPE_BLENDFACTOR_CONST_COLOR:
64 return V3D_BLEND_FACTOR_CONST_COLOR;
65 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
66 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
67 case PIPE_BLENDFACTOR_CONST_ALPHA:
68 return V3D_BLEND_FACTOR_CONST_ALPHA;
69 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
70 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
71 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
72 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
73 default:
74 unreachable("Bad blend factor");
75 }
76 }
77
78 static inline uint16_t
79 swizzled_border_color(const struct v3d_device_info *devinfo,
80 struct pipe_sampler_state *sampler,
81 struct v3d_sampler_view *sview,
82 int chan)
83 {
84 const struct util_format_description *desc =
85 util_format_description(sview->base.format);
86 uint8_t swiz = chan;
87
88 /* If we're doing swizzling in the sampler, then only rearrange the
89 * border color for the mismatch between the VC5 texture format and
90 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
91 * the sampler's swizzle.
92 *
93 * For swizzling in the shader, we don't do any pre-swizzling of the
94 * border color.
95 */
96 if (v3d_get_tex_return_size(devinfo, sview->base.format,
97 sampler->compare_mode) != 32)
98 swiz = desc->swizzle[swiz];
99
100 switch (swiz) {
101 case PIPE_SWIZZLE_0:
102 return util_float_to_half(0.0);
103 case PIPE_SWIZZLE_1:
104 return util_float_to_half(1.0);
105 default:
106 return util_float_to_half(sampler->border_color.f[swiz]);
107 }
108 }
109
110 #if V3D_VERSION < 40
111 static uint32_t
112 translate_swizzle(unsigned char pipe_swizzle)
113 {
114 switch (pipe_swizzle) {
115 case PIPE_SWIZZLE_0:
116 return 0;
117 case PIPE_SWIZZLE_1:
118 return 1;
119 case PIPE_SWIZZLE_X:
120 case PIPE_SWIZZLE_Y:
121 case PIPE_SWIZZLE_Z:
122 case PIPE_SWIZZLE_W:
123 return 2 + pipe_swizzle;
124 default:
125 unreachable("unknown swizzle");
126 }
127 }
128
129 static void
130 emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
131 int i)
132 {
133 struct v3d_job *job = v3d->job;
134 struct pipe_sampler_state *psampler = stage_tex->samplers[i];
135 struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
136 struct pipe_sampler_view *psview = stage_tex->textures[i];
137 struct v3d_sampler_view *sview = v3d_sampler_view(psview);
138 struct pipe_resource *prsc = psview->texture;
139 struct v3d_resource *rsc = v3d_resource(prsc);
140 const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
141
142 stage_tex->texture_state[i].offset =
143 v3d_cl_ensure_space(&job->indirect,
144 cl_packet_length(TEXTURE_SHADER_STATE),
145 32);
146 v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
147 job->indirect.bo);
148
149 uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format,
150 psampler->compare_mode);
151
152 struct V3D33_TEXTURE_SHADER_STATE unpacked = {
153 /* XXX */
154 .border_color_red = swizzled_border_color(devinfo, psampler,
155 sview, 0),
156 .border_color_green = swizzled_border_color(devinfo, psampler,
157 sview, 1),
158 .border_color_blue = swizzled_border_color(devinfo, psampler,
159 sview, 2),
160 .border_color_alpha = swizzled_border_color(devinfo, psampler,
161 sview, 3),
162
163 /* In the normal texturing path, the LOD gets clamped between
164 * min/max, and the base_level field (set in the sampler view
165 * from first_level) only decides where the min/mag switch
166 * happens, so we need to use the LOD clamps to keep us
167 * between min and max.
168 *
169 * For txf, the LOD clamp is still used, despite GL not
170 * wanting that. We will need to have a separate
171 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
172 * support txf properly.
173 */
174 .min_level_of_detail = MIN2(psview->u.tex.first_level +
175 MAX2(psampler->min_lod, 0),
176 psview->u.tex.last_level),
177 .max_level_of_detail = MIN2(psview->u.tex.first_level +
178 psampler->max_lod,
179 psview->u.tex.last_level),
180
181 .texture_base_pointer = cl_address(rsc->bo,
182 rsc->slices[0].offset),
183
184 .output_32_bit = return_size == 32,
185 };
186
187 /* Set up the sampler swizzle if we're doing 16-bit sampling. For
188 * 32-bit, we leave swizzling up to the shader compiler.
189 *
190 * Note: Contrary to the docs, the swizzle still applies even if the
191 * return size is 32. It's just that you probably want to swizzle in
192 * the shader, because you need the Y/Z/W channels to be defined.
193 */
194 if (return_size == 32) {
195 unpacked.swizzle_r = translate_swizzle(PIPE_SWIZZLE_X);
196 unpacked.swizzle_g = translate_swizzle(PIPE_SWIZZLE_Y);
197 unpacked.swizzle_b = translate_swizzle(PIPE_SWIZZLE_Z);
198 unpacked.swizzle_a = translate_swizzle(PIPE_SWIZZLE_W);
199 } else {
200 unpacked.swizzle_r = translate_swizzle(sview->swizzle[0]);
201 unpacked.swizzle_g = translate_swizzle(sview->swizzle[1]);
202 unpacked.swizzle_b = translate_swizzle(sview->swizzle[2]);
203 unpacked.swizzle_a = translate_swizzle(sview->swizzle[3]);
204 }
205
206 int min_img_filter = psampler->min_img_filter;
207 int min_mip_filter = psampler->min_mip_filter;
208 int mag_img_filter = psampler->mag_img_filter;
209
210 if (return_size == 32) {
211 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
212 min_img_filter = PIPE_TEX_FILTER_NEAREST;
213 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
214 }
215
216 bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
217 switch (min_mip_filter) {
218 case PIPE_TEX_MIPFILTER_NONE:
219 unpacked.filter += min_nearest ? 2 : 0;
220 break;
221 case PIPE_TEX_MIPFILTER_NEAREST:
222 unpacked.filter += min_nearest ? 4 : 8;
223 break;
224 case PIPE_TEX_MIPFILTER_LINEAR:
225 unpacked.filter += min_nearest ? 4 : 8;
226 unpacked.filter += 2;
227 break;
228 }
229
230 if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
231 unpacked.filter++;
232
233 if (psampler->max_anisotropy > 8)
234 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
235 else if (psampler->max_anisotropy > 4)
236 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
237 else if (psampler->max_anisotropy > 2)
238 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
239 else if (psampler->max_anisotropy)
240 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
241
242 uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
243 cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
244
245 for (int i = 0; i < ARRAY_SIZE(packed); i++)
246 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
247
248 /* TMU indirect structs need to be 32b aligned. */
249 v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
250 cl_emit_prepacked(&job->indirect, &packed);
251 }
252
253 static void
254 emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
255 {
256 for (int i = 0; i < stage_tex->num_textures; i++) {
257 if (stage_tex->textures[i])
258 emit_one_texture(v3d, stage_tex, i);
259 }
260 }
261 #endif /* V3D_VERSION < 40 */
262
263 static uint32_t
264 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
265 {
266 if (v3d->swap_color_rb & (1 << rt)) {
267 colormask = ((colormask & (2 | 8)) |
268 ((colormask & 1) << 2) |
269 ((colormask & 4) >> 2));
270 }
271
272 return (~colormask) & 0xf;
273 }
274
275 static void
276 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
277 struct pipe_blend_state *blend, int rt)
278 {
279 struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
280
281 #if V3D_VERSION >= 40
282 /* We don't need to emit blend state for disabled RTs. */
283 if (!rtblend->blend_enable)
284 return;
285 #endif
286
287 cl_emit(&job->bcl, BLEND_CONFIG, config) {
288 #if V3D_VERSION >= 40
289 if (blend->independent_blend_enable)
290 config.render_target_mask = 1 << rt;
291 else
292 config.render_target_mask = (1 << VC5_MAX_DRAW_BUFFERS) - 1;
293 #else
294 assert(rt == 0);
295 #endif
296
297 config.colour_blend_mode = rtblend->rgb_func;
298 config.colour_blend_dst_factor =
299 v3d_factor(rtblend->rgb_dst_factor,
300 v3d->blend_dst_alpha_one);
301 config.colour_blend_src_factor =
302 v3d_factor(rtblend->rgb_src_factor,
303 v3d->blend_dst_alpha_one);
304
305 config.alpha_blend_mode = rtblend->alpha_func;
306 config.alpha_blend_dst_factor =
307 v3d_factor(rtblend->alpha_dst_factor,
308 v3d->blend_dst_alpha_one);
309 config.alpha_blend_src_factor =
310 v3d_factor(rtblend->alpha_src_factor,
311 v3d->blend_dst_alpha_one);
312 }
313 }
314
315 static void
316 emit_flat_shade_flags(struct v3d_job *job,
317 int varying_offset,
318 uint32_t varyings,
319 enum V3DX(Varying_Flags_Action) lower,
320 enum V3DX(Varying_Flags_Action) higher)
321 {
322 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
323 flags.varying_offset_v0 = varying_offset;
324 flags.flat_shade_flags_for_varyings_v024 = varyings;
325 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
326 lower;
327 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
328 higher;
329 }
330 }
331
332 #if V3D_VERSION >= 40
333 static void
334 emit_noperspective_flags(struct v3d_job *job,
335 int varying_offset,
336 uint32_t varyings,
337 enum V3DX(Varying_Flags_Action) lower,
338 enum V3DX(Varying_Flags_Action) higher)
339 {
340 cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
341 flags.varying_offset_v0 = varying_offset;
342 flags.non_perspective_flags_for_varyings_v024 = varyings;
343 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
344 lower;
345 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
346 higher;
347 }
348 }
349
350 static void
351 emit_centroid_flags(struct v3d_job *job,
352 int varying_offset,
353 uint32_t varyings,
354 enum V3DX(Varying_Flags_Action) lower,
355 enum V3DX(Varying_Flags_Action) higher)
356 {
357 cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
358 flags.varying_offset_v0 = varying_offset;
359 flags.centroid_flags_for_varyings_v024 = varyings;
360 flags.action_for_centroid_flags_of_lower_numbered_varyings =
361 lower;
362 flags.action_for_centroid_flags_of_higher_numbered_varyings =
363 higher;
364 }
365 }
366 #endif /* V3D_VERSION >= 40 */
367
368 static bool
369 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
370 void (*flag_emit_callback)(struct v3d_job *job,
371 int varying_offset,
372 uint32_t flags,
373 enum V3DX(Varying_Flags_Action) lower,
374 enum V3DX(Varying_Flags_Action) higher))
375 {
376 struct v3d_context *v3d = job->v3d;
377 bool emitted_any = false;
378
379 for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
380 if (!flags[i])
381 continue;
382
383 if (emitted_any) {
384 flag_emit_callback(job, i, flags[i],
385 V3D_VARYING_FLAGS_ACTION_UNCHANGED,
386 V3D_VARYING_FLAGS_ACTION_UNCHANGED);
387 } else if (i == 0) {
388 flag_emit_callback(job, i, flags[i],
389 V3D_VARYING_FLAGS_ACTION_UNCHANGED,
390 V3D_VARYING_FLAGS_ACTION_ZEROED);
391 } else {
392 flag_emit_callback(job, i, flags[i],
393 V3D_VARYING_FLAGS_ACTION_ZEROED,
394 V3D_VARYING_FLAGS_ACTION_ZEROED);
395 }
396 emitted_any = true;
397 }
398
399 return emitted_any;
400 }
401
402 void
403 v3dX(emit_state)(struct pipe_context *pctx)
404 {
405 struct v3d_context *v3d = v3d_context(pctx);
406 struct v3d_job *job = v3d->job;
407 bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
408
409 if (v3d->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
410 VC5_DIRTY_RASTERIZER)) {
411 float *vpscale = v3d->viewport.scale;
412 float *vptranslate = v3d->viewport.translate;
413 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
414 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
415 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
416 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
417
418 /* Clip to the scissor if it's enabled, but still clip to the
419 * drawable regardless since that controls where the binner
420 * tries to put things.
421 *
422 * Additionally, always clip the rendering to the viewport,
423 * since the hardware does guardband clipping, meaning
424 * primitives would rasterize outside of the view volume.
425 */
426 uint32_t minx, miny, maxx, maxy;
427 if (!v3d->rasterizer->base.scissor) {
428 minx = MAX2(vp_minx, 0);
429 miny = MAX2(vp_miny, 0);
430 maxx = MIN2(vp_maxx, job->draw_width);
431 maxy = MIN2(vp_maxy, job->draw_height);
432 } else {
433 minx = MAX2(vp_minx, v3d->scissor.minx);
434 miny = MAX2(vp_miny, v3d->scissor.miny);
435 maxx = MIN2(vp_maxx, v3d->scissor.maxx);
436 maxy = MIN2(vp_maxy, v3d->scissor.maxy);
437 }
438
439 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
440 clip.clip_window_left_pixel_coordinate = minx;
441 clip.clip_window_bottom_pixel_coordinate = miny;
442 if (maxx > minx && maxy > miny) {
443 clip.clip_window_width_in_pixels = maxx - minx;
444 clip.clip_window_height_in_pixels = maxy - miny;
445 } else if (V3D_VERSION < 41) {
446 /* The HW won't entirely clip out when scissor
447 * w/h is 0. Just treat it the same as
448 * rasterizer discard.
449 */
450 rasterizer_discard = true;
451 clip.clip_window_width_in_pixels = 1;
452 clip.clip_window_height_in_pixels = 1;
453 }
454 }
455
456 job->draw_min_x = MIN2(job->draw_min_x, minx);
457 job->draw_min_y = MIN2(job->draw_min_y, miny);
458 job->draw_max_x = MAX2(job->draw_max_x, maxx);
459 job->draw_max_y = MAX2(job->draw_max_y, maxy);
460 }
461
462 if (v3d->dirty & (VC5_DIRTY_RASTERIZER |
463 VC5_DIRTY_ZSA |
464 VC5_DIRTY_BLEND |
465 VC5_DIRTY_COMPILED_FS)) {
466 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
467 config.enable_forward_facing_primitive =
468 !rasterizer_discard &&
469 !(v3d->rasterizer->base.cull_face &
470 PIPE_FACE_FRONT);
471 config.enable_reverse_facing_primitive =
472 !rasterizer_discard &&
473 !(v3d->rasterizer->base.cull_face &
474 PIPE_FACE_BACK);
475 /* This seems backwards, but it's what gets the
476 * clipflat test to pass.
477 */
478 config.clockwise_primitives =
479 v3d->rasterizer->base.front_ccw;
480
481 config.enable_depth_offset =
482 v3d->rasterizer->base.offset_tri;
483
484 /* V3D follows GL behavior where the sample mask only
485 * applies when MSAA is enabled. Gallium has sample
486 * mask apply anyway, and the MSAA blit shaders will
487 * set sample mask without explicitly setting
488 * rasterizer oversample. Just force it on here,
489 * since the blit shaders are the only way to have
490 * !multisample && samplemask != 0xf.
491 */
492 config.rasterizer_oversample_mode =
493 v3d->rasterizer->base.multisample ||
494 v3d->sample_mask != 0xf;
495
496 config.direct3d_provoking_vertex =
497 v3d->rasterizer->base.flatshade_first;
498
499 config.blend_enable = v3d->blend->blend_enables;
500
501 /* Note: EZ state may update based on the compiled FS,
502 * along with ZSA
503 */
504 config.early_z_updates_enable =
505 (job->ez_state != VC5_EZ_DISABLED);
506 if (v3d->zsa->base.depth.enabled) {
507 config.z_updates_enable =
508 v3d->zsa->base.depth.writemask;
509 config.early_z_enable =
510 config.early_z_updates_enable;
511 config.depth_test_function =
512 v3d->zsa->base.depth.func;
513 } else {
514 config.depth_test_function = PIPE_FUNC_ALWAYS;
515 }
516
517 config.stencil_enable =
518 v3d->zsa->base.stencil[0].enabled;
519 }
520
521 }
522
523 if (v3d->dirty & VC5_DIRTY_RASTERIZER &&
524 v3d->rasterizer->base.offset_tri) {
525 if (job->zsbuf &&
526 job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
527 cl_emit_prepacked_sized(&job->bcl,
528 v3d->rasterizer->depth_offset_z16,
529 cl_packet_length(DEPTH_OFFSET));
530 } else {
531 cl_emit_prepacked_sized(&job->bcl,
532 v3d->rasterizer->depth_offset,
533 cl_packet_length(DEPTH_OFFSET));
534 }
535 }
536
537 if (v3d->dirty & VC5_DIRTY_RASTERIZER) {
538 cl_emit(&job->bcl, POINT_SIZE, point_size) {
539 point_size.point_size = v3d->rasterizer->point_size;
540 }
541
542 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
543 line_width.line_width = v3d->rasterizer->base.line_width;
544 }
545 }
546
547 if (v3d->dirty & VC5_DIRTY_VIEWPORT) {
548 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
549 clip.viewport_half_width_in_1_256th_of_pixel =
550 v3d->viewport.scale[0] * 256.0f;
551 clip.viewport_half_height_in_1_256th_of_pixel =
552 v3d->viewport.scale[1] * 256.0f;
553 }
554
555 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
556 clip.viewport_z_offset_zc_to_zs =
557 v3d->viewport.translate[2];
558 clip.viewport_z_scale_zc_to_zs =
559 v3d->viewport.scale[2];
560 }
561 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
562 float z1 = (v3d->viewport.translate[2] -
563 v3d->viewport.scale[2]);
564 float z2 = (v3d->viewport.translate[2] +
565 v3d->viewport.scale[2]);
566 clip.minimum_zw = MIN2(z1, z2);
567 clip.maximum_zw = MAX2(z1, z2);
568 }
569
570 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
571 vp.viewport_centre_x_coordinate =
572 v3d->viewport.translate[0];
573 vp.viewport_centre_y_coordinate =
574 v3d->viewport.translate[1];
575 }
576 }
577
578 if (v3d->dirty & VC5_DIRTY_BLEND) {
579 struct v3d_blend_state *blend = v3d->blend;
580
581 if (blend->blend_enables) {
582 #if V3D_VERSION >= 40
583 cl_emit(&job->bcl, BLEND_ENABLES, enables) {
584 enables.mask = blend->blend_enables;
585 }
586 #endif
587
588 if (blend->base.independent_blend_enable) {
589 for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++)
590 emit_rt_blend(v3d, job, &blend->base, i);
591 } else {
592 emit_rt_blend(v3d, job, &blend->base, 0);
593 }
594 }
595 }
596
597 if (v3d->dirty & VC5_DIRTY_BLEND) {
598 struct pipe_blend_state *blend = &v3d->blend->base;
599
600 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
601 for (int i = 0; i < 4; i++) {
602 int rt = blend->independent_blend_enable ? i : 0;
603 int rt_mask = blend->rt[rt].colormask;
604
605 mask.mask |= translate_colormask(v3d, rt_mask,
606 i) << (4 * i);
607 }
608 }
609 }
610
611 /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
612 * color.
613 */
614 if (v3d->dirty & VC5_DIRTY_BLEND_COLOR ||
615 (V3D_VERSION < 41 && (v3d->dirty & VC5_DIRTY_BLEND))) {
616 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
617 colour.red_f16 = (v3d->swap_color_rb ?
618 v3d->blend_color.hf[2] :
619 v3d->blend_color.hf[0]);
620 colour.green_f16 = v3d->blend_color.hf[1];
621 colour.blue_f16 = (v3d->swap_color_rb ?
622 v3d->blend_color.hf[0] :
623 v3d->blend_color.hf[2]);
624 colour.alpha_f16 = v3d->blend_color.hf[3];
625 }
626 }
627
628 if (v3d->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
629 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
630 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
631
632 if (front->enabled) {
633 cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
634 v3d->zsa->stencil_front, config) {
635 config.stencil_ref_value =
636 v3d->stencil_ref.ref_value[0];
637 }
638 }
639
640 if (back->enabled) {
641 cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
642 v3d->zsa->stencil_back, config) {
643 config.stencil_ref_value =
644 v3d->stencil_ref.ref_value[1];
645 }
646 }
647 }
648
649 #if V3D_VERSION < 40
650 /* Pre-4.x, we have texture state that depends on both the sampler and
651 * the view, so we merge them together at draw time.
652 */
653 if (v3d->dirty & VC5_DIRTY_FRAGTEX)
654 emit_textures(v3d, &v3d->fragtex);
655
656 if (v3d->dirty & VC5_DIRTY_VERTTEX)
657 emit_textures(v3d, &v3d->verttex);
658 #endif
659
660 if (v3d->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
661 if (!emit_varying_flags(job,
662 v3d->prog.fs->prog_data.fs->flat_shade_flags,
663 emit_flat_shade_flags)) {
664 cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
665 }
666 }
667
668 #if V3D_VERSION >= 40
669 if (v3d->dirty & VC5_DIRTY_NOPERSPECTIVE_FLAGS) {
670 if (!emit_varying_flags(job,
671 v3d->prog.fs->prog_data.fs->noperspective_flags,
672 emit_noperspective_flags)) {
673 cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
674 }
675 }
676
677 if (v3d->dirty & VC5_DIRTY_CENTROID_FLAGS) {
678 if (!emit_varying_flags(job,
679 v3d->prog.fs->prog_data.fs->centroid_flags,
680 emit_centroid_flags)) {
681 cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
682 }
683 }
684 #endif
685
686 /* Set up the transform feedback data specs (which VPM entries to
687 * output to which buffers).
688 */
689 if (v3d->dirty & (VC5_DIRTY_STREAMOUT |
690 VC5_DIRTY_RASTERIZER |
691 VC5_DIRTY_PRIM_MODE)) {
692 struct v3d_streamout_stateobj *so = &v3d->streamout;
693
694 if (so->num_targets) {
695 bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS &&
696 v3d->rasterizer->base.point_size_per_vertex);
697 uint16_t *tf_specs = (psiz_per_vertex ?
698 v3d->prog.bind_vs->tf_specs_psiz :
699 v3d->prog.bind_vs->tf_specs);
700
701 #if V3D_VERSION >= 40
702 job->tf_enabled = (v3d->prog.bind_vs->num_tf_specs != 0 &&
703 v3d->active_queries);
704
705 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
706 tfe.number_of_16_bit_output_data_specs_following =
707 v3d->prog.bind_vs->num_tf_specs;
708 tfe.enable = job->tf_enabled;
709 };
710 #else /* V3D_VERSION < 40 */
711 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
712 tfe.number_of_32_bit_output_buffer_address_following =
713 so->num_targets;
714 tfe.number_of_16_bit_output_data_specs_following =
715 v3d->prog.bind_vs->num_tf_specs;
716 };
717 #endif /* V3D_VERSION < 40 */
718 for (int i = 0; i < v3d->prog.bind_vs->num_tf_specs; i++) {
719 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
720 }
721 } else if (job->tf_enabled) {
722 #if V3D_VERSION >= 40
723 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
724 tfe.enable = false;
725 };
726 job->tf_enabled = false;
727 #endif /* V3D_VERSION >= 40 */
728 }
729 }
730
731 /* Set up the trasnform feedback buffers. */
732 if (v3d->dirty & VC5_DIRTY_STREAMOUT) {
733 struct v3d_streamout_stateobj *so = &v3d->streamout;
734 for (int i = 0; i < so->num_targets; i++) {
735 const struct pipe_stream_output_target *target =
736 so->targets[i];
737 struct v3d_resource *rsc = target ?
738 v3d_resource(target->buffer) : NULL;
739 struct pipe_shader_state *vs = &v3d->prog.bind_vs->base;
740 struct pipe_stream_output_info *info = &vs->stream_output;
741 uint32_t offset = (v3d->streamout.offsets[i] *
742 info->stride[i] * 4);
743
744 #if V3D_VERSION >= 40
745 if (!target)
746 continue;
747
748 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
749 output.buffer_address =
750 cl_address(rsc->bo,
751 target->buffer_offset +
752 offset);
753 output.buffer_size_in_32_bit_words =
754 (target->buffer_size - offset) >> 2;
755 output.buffer_number = i;
756 }
757 #else /* V3D_VERSION < 40 */
758 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
759 if (target) {
760 output.address =
761 cl_address(rsc->bo,
762 target->buffer_offset +
763 offset);
764 }
765 };
766 #endif /* V3D_VERSION < 40 */
767 if (target) {
768 v3d_job_add_write_resource(v3d->job,
769 target->buffer);
770 }
771 /* XXX: buffer_size? */
772 }
773 }
774
775 if (v3d->dirty & VC5_DIRTY_OQ) {
776 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
777 job->oq_enabled = v3d->active_queries && v3d->current_oq;
778 if (job->oq_enabled) {
779 counter.address = cl_address(v3d->current_oq, 0);
780 }
781 }
782 }
783
784 #if V3D_VERSION >= 40
785 if (v3d->dirty & VC5_DIRTY_SAMPLE_STATE) {
786 cl_emit(&job->bcl, SAMPLE_STATE, state) {
787 /* Note: SampleCoverage was handled at the
788 * state_tracker level by converting to sample_mask.
789 */
790 state.coverage = fui(1.0) >> 16;
791 state.mask = job->msaa ? v3d->sample_mask : 0xf;
792 }
793 }
794 #endif
795 }