nv50/ir: when merging immediates/consts, load directly
[mesa.git] / src / gallium / drivers / vc5 / vc5_emit.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/u_format.h"
25 #include "util/u_half.h"
26 #include "vc5_context.h"
27 #include "broadcom/cle/v3d_packet_v33_pack.h"
28 #include "broadcom/compiler/v3d_compiler.h"
29
30 static uint8_t
31 vc5_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
32 {
33 /* We may get a bad blendfactor when blending is disabled. */
34 if (factor == 0)
35 return V3D_BLEND_FACTOR_ZERO;
36
37 switch (factor) {
38 case PIPE_BLENDFACTOR_ZERO:
39 return V3D_BLEND_FACTOR_ZERO;
40 case PIPE_BLENDFACTOR_ONE:
41 return V3D_BLEND_FACTOR_ONE;
42 case PIPE_BLENDFACTOR_SRC_COLOR:
43 return V3D_BLEND_FACTOR_SRC_COLOR;
44 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
45 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
46 case PIPE_BLENDFACTOR_DST_COLOR:
47 return V3D_BLEND_FACTOR_DST_COLOR;
48 case PIPE_BLENDFACTOR_INV_DST_COLOR:
49 return V3D_BLEND_FACTOR_INV_DST_COLOR;
50 case PIPE_BLENDFACTOR_SRC_ALPHA:
51 return V3D_BLEND_FACTOR_SRC_ALPHA;
52 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
53 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
54 case PIPE_BLENDFACTOR_DST_ALPHA:
55 return (dst_alpha_one ?
56 V3D_BLEND_FACTOR_ONE :
57 V3D_BLEND_FACTOR_DST_ALPHA);
58 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
59 return (dst_alpha_one ?
60 V3D_BLEND_FACTOR_ZERO :
61 V3D_BLEND_FACTOR_INV_DST_ALPHA);
62 case PIPE_BLENDFACTOR_CONST_COLOR:
63 return V3D_BLEND_FACTOR_CONST_COLOR;
64 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
65 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
66 case PIPE_BLENDFACTOR_CONST_ALPHA:
67 return V3D_BLEND_FACTOR_CONST_ALPHA;
68 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
69 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
70 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
71 return V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE;
72 default:
73 unreachable("Bad blend factor");
74 }
75 }
76
77 static inline uint16_t
78 swizzled_border_color(struct pipe_sampler_state *sampler,
79 struct vc5_sampler_view *sview,
80 int chan)
81 {
82 const struct util_format_description *desc =
83 util_format_description(sview->base.format);
84 uint8_t swiz = chan;
85
86 /* If we're doing swizzling in the sampler, then only rearrange the
87 * border color for the mismatch between the VC5 texture format and
88 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
89 * the sampler's swizzle.
90 *
91 * For swizzling in the shader, we don't do any pre-swizzling of the
92 * border color.
93 */
94 if (vc5_get_tex_return_size(sview->base.format) != 32)
95 swiz = desc->swizzle[swiz];
96
97 switch (swiz) {
98 case PIPE_SWIZZLE_0:
99 return util_float_to_half(0.0);
100 case PIPE_SWIZZLE_1:
101 return util_float_to_half(1.0);
102 default:
103 return util_float_to_half(sampler->border_color.f[swiz]);
104 }
105 }
106
107 static void
108 emit_one_texture(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex,
109 int i)
110 {
111 struct vc5_job *job = vc5->job;
112 struct pipe_sampler_state *psampler = stage_tex->samplers[i];
113 struct vc5_sampler_state *sampler = vc5_sampler_state(psampler);
114 struct pipe_sampler_view *psview = stage_tex->textures[i];
115 struct vc5_sampler_view *sview = vc5_sampler_view(psview);
116 struct pipe_resource *prsc = psview->texture;
117 struct vc5_resource *rsc = vc5_resource(prsc);
118
119 stage_tex->texture_state[i].offset =
120 vc5_cl_ensure_space(&job->indirect,
121 cl_packet_length(TEXTURE_SHADER_STATE),
122 32);
123 vc5_bo_set_reference(&stage_tex->texture_state[i].bo,
124 job->indirect.bo);
125
126 struct V3D33_TEXTURE_SHADER_STATE unpacked = {
127 /* XXX */
128 .border_color_red = swizzled_border_color(psampler, sview, 0),
129 .border_color_green = swizzled_border_color(psampler, sview, 1),
130 .border_color_blue = swizzled_border_color(psampler, sview, 2),
131 .border_color_alpha = swizzled_border_color(psampler, sview, 3),
132
133 /* In the normal texturing path, the LOD gets clamped between
134 * min/max, and the base_level field (set in the sampler view
135 * from first_level) only decides where the min/mag switch
136 * happens, so we need to use the LOD clamps to keep us
137 * between min and max.
138 *
139 * For txf, the LOD clamp is still used, despite GL not
140 * wanting that. We will need to have a separate
141 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
142 * support txf properly.
143 */
144 .min_level_of_detail = MIN2(psview->u.tex.first_level +
145 MAX2(psampler->min_lod, 0),
146 psview->u.tex.last_level),
147 .max_level_of_detail = MIN2(psview->u.tex.first_level +
148 psampler->max_lod,
149 psview->u.tex.last_level),
150
151 .texture_base_pointer = cl_address(rsc->bo,
152 rsc->slices[0].offset),
153 };
154
155 int min_img_filter = psampler->min_img_filter;
156 int min_mip_filter = psampler->min_mip_filter;
157 int mag_img_filter = psampler->mag_img_filter;
158
159 if (vc5_get_tex_return_size(psview->format) == 32) {
160 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
161 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
162 mag_img_filter = PIPE_TEX_FILTER_NEAREST;
163 }
164
165 bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
166 switch (min_mip_filter) {
167 case PIPE_TEX_MIPFILTER_NONE:
168 unpacked.filter += min_nearest ? 2 : 0;
169 break;
170 case PIPE_TEX_MIPFILTER_NEAREST:
171 unpacked.filter += min_nearest ? 4 : 8;
172 break;
173 case PIPE_TEX_MIPFILTER_LINEAR:
174 unpacked.filter += min_nearest ? 4 : 8;
175 unpacked.filter += 2;
176 break;
177 }
178
179 if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
180 unpacked.filter++;
181
182 if (psampler->max_anisotropy > 8)
183 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
184 else if (psampler->max_anisotropy > 4)
185 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
186 else if (psampler->max_anisotropy > 2)
187 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
188 else if (psampler->max_anisotropy)
189 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
190
191 uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
192 cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
193
194 for (int i = 0; i < ARRAY_SIZE(packed); i++)
195 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
196
197 /* TMU indirect structs need to be 32b aligned. */
198 vc5_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
199 cl_emit_prepacked(&job->indirect, &packed);
200 }
201
202 static void
203 emit_textures(struct vc5_context *vc5, struct vc5_texture_stateobj *stage_tex)
204 {
205 for (int i = 0; i < stage_tex->num_textures; i++) {
206 if (stage_tex->textures[i])
207 emit_one_texture(vc5, stage_tex, i);
208 }
209 }
210
211 static uint32_t
212 translate_colormask(struct vc5_context *vc5, uint32_t colormask, int rt)
213 {
214 if (vc5->swap_color_rb & (1 << rt)) {
215 colormask = ((colormask & (2 | 8)) |
216 ((colormask & 1) << 2) |
217 ((colormask & 4) >> 2));
218 }
219
220 return (~colormask) & 0xf;
221 }
222
223 void
224 vc5_emit_state(struct pipe_context *pctx)
225 {
226 struct vc5_context *vc5 = vc5_context(pctx);
227 struct vc5_job *job = vc5->job;
228
229 if (vc5->dirty & (VC5_DIRTY_SCISSOR | VC5_DIRTY_VIEWPORT |
230 VC5_DIRTY_RASTERIZER)) {
231 float *vpscale = vc5->viewport.scale;
232 float *vptranslate = vc5->viewport.translate;
233 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
234 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
235 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
236 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
237
238 /* Clip to the scissor if it's enabled, but still clip to the
239 * drawable regardless since that controls where the binner
240 * tries to put things.
241 *
242 * Additionally, always clip the rendering to the viewport,
243 * since the hardware does guardband clipping, meaning
244 * primitives would rasterize outside of the view volume.
245 */
246 uint32_t minx, miny, maxx, maxy;
247 if (!vc5->rasterizer->base.scissor) {
248 minx = MAX2(vp_minx, 0);
249 miny = MAX2(vp_miny, 0);
250 maxx = MIN2(vp_maxx, job->draw_width);
251 maxy = MIN2(vp_maxy, job->draw_height);
252 } else {
253 minx = MAX2(vp_minx, vc5->scissor.minx);
254 miny = MAX2(vp_miny, vc5->scissor.miny);
255 maxx = MIN2(vp_maxx, vc5->scissor.maxx);
256 maxy = MIN2(vp_maxy, vc5->scissor.maxy);
257 }
258
259 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
260 clip.clip_window_left_pixel_coordinate = minx;
261 clip.clip_window_bottom_pixel_coordinate = miny;
262 clip.clip_window_width_in_pixels = maxx - minx;
263 clip.clip_window_height_in_pixels = maxy - miny;
264 }
265
266 job->draw_min_x = MIN2(job->draw_min_x, minx);
267 job->draw_min_y = MIN2(job->draw_min_y, miny);
268 job->draw_max_x = MAX2(job->draw_max_x, maxx);
269 job->draw_max_y = MAX2(job->draw_max_y, maxy);
270 }
271
272 if (vc5->dirty & (VC5_DIRTY_RASTERIZER |
273 VC5_DIRTY_ZSA |
274 VC5_DIRTY_BLEND |
275 VC5_DIRTY_COMPILED_FS)) {
276 cl_emit(&job->bcl, CONFIGURATION_BITS, config) {
277 config.enable_forward_facing_primitive =
278 !vc5->rasterizer->base.rasterizer_discard &&
279 !(vc5->rasterizer->base.cull_face &
280 PIPE_FACE_FRONT);
281 config.enable_reverse_facing_primitive =
282 !vc5->rasterizer->base.rasterizer_discard &&
283 !(vc5->rasterizer->base.cull_face &
284 PIPE_FACE_BACK);
285 /* This seems backwards, but it's what gets the
286 * clipflat test to pass.
287 */
288 config.clockwise_primitives =
289 vc5->rasterizer->base.front_ccw;
290
291 config.enable_depth_offset =
292 vc5->rasterizer->base.offset_tri;
293
294 config.rasterizer_oversample_mode =
295 vc5->rasterizer->base.multisample;
296
297 config.direct3d_provoking_vertex =
298 vc5->rasterizer->base.flatshade_first;
299
300 config.blend_enable = vc5->blend->rt[0].blend_enable;
301
302 config.early_z_updates_enable = true;
303 if (vc5->zsa->base.depth.enabled) {
304 config.z_updates_enable =
305 vc5->zsa->base.depth.writemask;
306 config.early_z_enable =
307 (vc5->zsa->early_z_enable &&
308 !vc5->prog.fs->prog_data.fs->writes_z);
309 config.depth_test_function =
310 vc5->zsa->base.depth.func;
311 } else {
312 config.depth_test_function = PIPE_FUNC_ALWAYS;
313 }
314
315 config.stencil_enable =
316 vc5->zsa->base.stencil[0].enabled;
317 }
318
319 }
320
321 if (vc5->dirty & VC5_DIRTY_RASTERIZER &&
322 vc5->rasterizer->base.offset_tri) {
323 cl_emit(&job->bcl, DEPTH_OFFSET, depth) {
324 depth.depth_offset_factor =
325 vc5->rasterizer->offset_factor;
326 depth.depth_offset_units =
327 vc5->rasterizer->offset_units;
328 }
329 }
330
331 if (vc5->dirty & VC5_DIRTY_RASTERIZER) {
332 cl_emit(&job->bcl, POINT_SIZE, point_size) {
333 point_size.point_size = vc5->rasterizer->point_size;
334 }
335
336 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
337 line_width.line_width = vc5->rasterizer->base.line_width;
338 }
339 }
340
341 if (vc5->dirty & VC5_DIRTY_VIEWPORT) {
342 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
343 clip.viewport_half_width_in_1_256th_of_pixel =
344 vc5->viewport.scale[0] * 256.0f;
345 clip.viewport_half_height_in_1_256th_of_pixel =
346 vc5->viewport.scale[1] * 256.0f;
347 }
348
349 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
350 clip.viewport_z_offset_zc_to_zs =
351 vc5->viewport.translate[2];
352 clip.viewport_z_scale_zc_to_zs =
353 vc5->viewport.scale[2];
354 }
355 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
356 clip.minimum_zw = (vc5->viewport.translate[2] -
357 vc5->viewport.scale[2]);
358 clip.maximum_zw = (vc5->viewport.translate[2] +
359 vc5->viewport.scale[2]);
360 }
361
362 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
363 vp.viewport_centre_x_coordinate =
364 vc5->viewport.translate[0];
365 vp.viewport_centre_y_coordinate =
366 vc5->viewport.translate[1];
367 }
368 }
369
370 if (vc5->dirty & VC5_DIRTY_BLEND && vc5->blend->rt[0].blend_enable) {
371 struct pipe_blend_state *blend = vc5->blend;
372
373 cl_emit(&job->bcl, BLEND_CONFIG, config) {
374 struct pipe_rt_blend_state *rtblend = &blend->rt[0];
375
376 config.colour_blend_mode = rtblend->rgb_func;
377 config.colour_blend_dst_factor =
378 vc5_factor(rtblend->rgb_dst_factor,
379 vc5->blend_dst_alpha_one);
380 config.colour_blend_src_factor =
381 vc5_factor(rtblend->rgb_src_factor,
382 vc5->blend_dst_alpha_one);
383
384 config.alpha_blend_mode = rtblend->alpha_func;
385 config.alpha_blend_dst_factor =
386 vc5_factor(rtblend->alpha_dst_factor,
387 vc5->blend_dst_alpha_one);
388 config.alpha_blend_src_factor =
389 vc5_factor(rtblend->alpha_src_factor,
390 vc5->blend_dst_alpha_one);
391 }
392 }
393
394 if (vc5->dirty & VC5_DIRTY_BLEND) {
395 struct pipe_blend_state *blend = vc5->blend;
396
397 cl_emit(&job->bcl, COLOUR_WRITE_MASKS, mask) {
398 if (blend->independent_blend_enable) {
399 mask.render_target_0_per_colour_component_write_masks =
400 translate_colormask(vc5, blend->rt[0].colormask, 0);
401 mask.render_target_1_per_colour_component_write_masks =
402 translate_colormask(vc5, blend->rt[1].colormask, 1);
403 mask.render_target_2_per_colour_component_write_masks =
404 translate_colormask(vc5, blend->rt[2].colormask, 2);
405 mask.render_target_3_per_colour_component_write_masks =
406 translate_colormask(vc5, blend->rt[3].colormask, 3);
407 } else {
408 mask.render_target_0_per_colour_component_write_masks =
409 translate_colormask(vc5, blend->rt[0].colormask, 0);
410 mask.render_target_1_per_colour_component_write_masks =
411 translate_colormask(vc5, blend->rt[0].colormask, 1);
412 mask.render_target_2_per_colour_component_write_masks =
413 translate_colormask(vc5, blend->rt[0].colormask, 2);
414 mask.render_target_3_per_colour_component_write_masks =
415 translate_colormask(vc5, blend->rt[0].colormask, 3);
416 }
417 }
418 }
419
420 if (vc5->dirty & VC5_DIRTY_BLEND_COLOR) {
421 cl_emit(&job->bcl, BLEND_CONSTANT_COLOUR, colour) {
422 colour.red_f16 = (vc5->swap_color_rb ?
423 vc5->blend_color.hf[2] :
424 vc5->blend_color.hf[0]);
425 colour.green_f16 = vc5->blend_color.hf[1];
426 colour.blue_f16 = (vc5->swap_color_rb ?
427 vc5->blend_color.hf[0] :
428 vc5->blend_color.hf[2]);
429 colour.alpha_f16 = vc5->blend_color.hf[3];
430 }
431 }
432
433 if (vc5->dirty & (VC5_DIRTY_ZSA | VC5_DIRTY_STENCIL_REF)) {
434 struct pipe_stencil_state *front = &vc5->zsa->base.stencil[0];
435 struct pipe_stencil_state *back = &vc5->zsa->base.stencil[1];
436
437 if (front->enabled) {
438 cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
439 vc5->zsa->stencil_front, config) {
440 config.stencil_ref_value =
441 vc5->stencil_ref.ref_value[0];
442 }
443 }
444
445 if (back->enabled) {
446 cl_emit_with_prepacked(&job->bcl, STENCIL_CONFIG,
447 vc5->zsa->stencil_back, config) {
448 config.stencil_ref_value =
449 vc5->stencil_ref.ref_value[1];
450 }
451 }
452 }
453
454 if (vc5->dirty & VC5_DIRTY_FRAGTEX)
455 emit_textures(vc5, &vc5->fragtex);
456
457 if (vc5->dirty & VC5_DIRTY_VERTTEX)
458 emit_textures(vc5, &vc5->verttex);
459
460 if (vc5->dirty & VC5_DIRTY_FLAT_SHADE_FLAGS) {
461 /* XXX: Need to handle more than 24 entries. */
462 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
463 flags.varying_offset_v0 = 0;
464
465 flags.flat_shade_flags_for_varyings_v024 =
466 vc5->prog.fs->prog_data.fs->flat_shade_flags[0] & 0xfffff;
467
468 if (vc5->rasterizer->base.flatshade) {
469 flags.flat_shade_flags_for_varyings_v024 |=
470 vc5->prog.fs->prog_data.fs->shade_model_flags[0] & 0xfffff;
471 }
472 }
473 }
474
475 if (vc5->dirty & VC5_DIRTY_STREAMOUT) {
476 struct vc5_streamout_stateobj *so = &vc5->streamout;
477
478 if (so->num_targets) {
479 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
480 tfe.number_of_32_bit_output_buffer_address_following =
481 so->num_targets;
482 tfe.number_of_16_bit_output_data_specs_following =
483 vc5->prog.bind_vs->num_tf_specs;
484 };
485
486 for (int i = 0; i < vc5->prog.bind_vs->num_tf_specs; i++) {
487 cl_emit_prepacked(&job->bcl,
488 &vc5->prog.bind_vs->tf_specs[i]);
489 }
490
491 for (int i = 0; i < so->num_targets; i++) {
492 const struct pipe_stream_output_target *target =
493 so->targets[i];
494 struct vc5_resource *rsc =
495 vc5_resource(target->buffer);
496
497 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
498 output.address =
499 cl_address(rsc->bo,
500 target->buffer_offset);
501 };
502
503 vc5_job_add_write_resource(vc5->job,
504 target->buffer);
505 /* XXX: buffer_size? */
506 }
507 } else {
508 /* XXX? */
509 }
510 }
511
512 if (vc5->dirty & VC5_DIRTY_OQ) {
513 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
514 job->oq_enabled = vc5->active_queries && vc5->current_oq;
515 if (job->oq_enabled) {
516 counter.address = cl_address(vc5->current_oq, 0);
517 }
518 }
519 }
520 }