v3d: Add SSBO/atomic counters support.
[mesa.git] / src / gallium / drivers / v3d / v3dx_state.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "pipe/p_state.h"
26 #include "util/u_format.h"
27 #include "util/u_framebuffer.h"
28 #include "util/u_inlines.h"
29 #include "util/u_math.h"
30 #include "util/u_memory.h"
31 #include "util/u_half.h"
32 #include "util/u_helpers.h"
33 #include "util/u_upload_mgr.h"
34
35 #include "v3d_context.h"
36 #include "v3d_tiling.h"
37 #include "broadcom/common/v3d_macros.h"
38 #include "broadcom/cle/v3dx_pack.h"
39
40 static void
41 v3d_generic_cso_state_delete(struct pipe_context *pctx, void *hwcso)
42 {
43 free(hwcso);
44 }
45
46 static void
47 v3d_set_blend_color(struct pipe_context *pctx,
48 const struct pipe_blend_color *blend_color)
49 {
50 struct v3d_context *v3d = v3d_context(pctx);
51 v3d->blend_color.f = *blend_color;
52 for (int i = 0; i < 4; i++) {
53 v3d->blend_color.hf[i] =
54 util_float_to_half(blend_color->color[i]);
55 }
56 v3d->dirty |= VC5_DIRTY_BLEND_COLOR;
57 }
58
59 static void
60 v3d_set_stencil_ref(struct pipe_context *pctx,
61 const struct pipe_stencil_ref *stencil_ref)
62 {
63 struct v3d_context *v3d = v3d_context(pctx);
64 v3d->stencil_ref = *stencil_ref;
65 v3d->dirty |= VC5_DIRTY_STENCIL_REF;
66 }
67
68 static void
69 v3d_set_clip_state(struct pipe_context *pctx,
70 const struct pipe_clip_state *clip)
71 {
72 struct v3d_context *v3d = v3d_context(pctx);
73 v3d->clip = *clip;
74 v3d->dirty |= VC5_DIRTY_CLIP;
75 }
76
77 static void
78 v3d_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
79 {
80 struct v3d_context *v3d = v3d_context(pctx);
81 v3d->sample_mask = sample_mask & ((1 << VC5_MAX_SAMPLES) - 1);
82 v3d->dirty |= VC5_DIRTY_SAMPLE_STATE;
83 }
84
85 static void *
86 v3d_create_rasterizer_state(struct pipe_context *pctx,
87 const struct pipe_rasterizer_state *cso)
88 {
89 struct v3d_rasterizer_state *so;
90
91 so = CALLOC_STRUCT(v3d_rasterizer_state);
92 if (!so)
93 return NULL;
94
95 so->base = *cso;
96
97 /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835,
98 * BCM21553).
99 */
100 so->point_size = MAX2(cso->point_size, .125f);
101
102 STATIC_ASSERT(sizeof(so->depth_offset) >=
103 cl_packet_length(DEPTH_OFFSET));
104 v3dx_pack(&so->depth_offset, DEPTH_OFFSET, depth) {
105 depth.depth_offset_factor = cso->offset_scale;
106 depth.depth_offset_units = cso->offset_units;
107 }
108
109 /* The HW treats polygon offset units based on a Z24 buffer, so we
110 * need to scale up offset_units if we're only Z16.
111 */
112 v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) {
113 depth.depth_offset_factor = cso->offset_scale;
114 depth.depth_offset_units = cso->offset_units * 256.0;
115 }
116
117 return so;
118 }
119
120 /* Blend state is baked into shaders. */
121 static void *
122 v3d_create_blend_state(struct pipe_context *pctx,
123 const struct pipe_blend_state *cso)
124 {
125 struct v3d_blend_state *so;
126
127 so = CALLOC_STRUCT(v3d_blend_state);
128 if (!so)
129 return NULL;
130
131 so->base = *cso;
132
133 if (cso->independent_blend_enable) {
134 for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
135 so->blend_enables |= cso->rt[i].blend_enable << i;
136
137 /* V3D 4.x is when we got independent blend enables. */
138 assert(V3D_VERSION >= 40 ||
139 cso->rt[i].blend_enable == cso->rt[0].blend_enable);
140 }
141 } else {
142 if (cso->rt[0].blend_enable)
143 so->blend_enables = (1 << VC5_MAX_DRAW_BUFFERS) - 1;
144 }
145
146 return so;
147 }
148
149 static uint32_t
150 translate_stencil_op(enum pipe_stencil_op op)
151 {
152 switch (op) {
153 case PIPE_STENCIL_OP_KEEP: return V3D_STENCIL_OP_KEEP;
154 case PIPE_STENCIL_OP_ZERO: return V3D_STENCIL_OP_ZERO;
155 case PIPE_STENCIL_OP_REPLACE: return V3D_STENCIL_OP_REPLACE;
156 case PIPE_STENCIL_OP_INCR: return V3D_STENCIL_OP_INCR;
157 case PIPE_STENCIL_OP_DECR: return V3D_STENCIL_OP_DECR;
158 case PIPE_STENCIL_OP_INCR_WRAP: return V3D_STENCIL_OP_INCWRAP;
159 case PIPE_STENCIL_OP_DECR_WRAP: return V3D_STENCIL_OP_DECWRAP;
160 case PIPE_STENCIL_OP_INVERT: return V3D_STENCIL_OP_INVERT;
161 }
162 unreachable("bad stencil op");
163 }
164
165 static void *
166 v3d_create_depth_stencil_alpha_state(struct pipe_context *pctx,
167 const struct pipe_depth_stencil_alpha_state *cso)
168 {
169 struct v3d_depth_stencil_alpha_state *so;
170
171 so = CALLOC_STRUCT(v3d_depth_stencil_alpha_state);
172 if (!so)
173 return NULL;
174
175 so->base = *cso;
176
177 if (cso->depth.enabled) {
178 switch (cso->depth.func) {
179 case PIPE_FUNC_LESS:
180 case PIPE_FUNC_LEQUAL:
181 so->ez_state = VC5_EZ_LT_LE;
182 break;
183 case PIPE_FUNC_GREATER:
184 case PIPE_FUNC_GEQUAL:
185 so->ez_state = VC5_EZ_GT_GE;
186 break;
187 case PIPE_FUNC_NEVER:
188 case PIPE_FUNC_EQUAL:
189 so->ez_state = VC5_EZ_UNDECIDED;
190 break;
191 default:
192 so->ez_state = VC5_EZ_DISABLED;
193 break;
194 }
195
196 /* If stencil is enabled and it's not a no-op, then it would
197 * break EZ updates.
198 */
199 if (cso->stencil[0].enabled &&
200 (cso->stencil[0].zfail_op != PIPE_STENCIL_OP_KEEP ||
201 cso->stencil[0].func != PIPE_FUNC_ALWAYS ||
202 (cso->stencil[1].enabled &&
203 (cso->stencil[1].zfail_op != PIPE_STENCIL_OP_KEEP &&
204 cso->stencil[1].func != PIPE_FUNC_ALWAYS)))) {
205 so->ez_state = VC5_EZ_DISABLED;
206 }
207 }
208
209 const struct pipe_stencil_state *front = &cso->stencil[0];
210 const struct pipe_stencil_state *back = &cso->stencil[1];
211
212 if (front->enabled) {
213 STATIC_ASSERT(sizeof(so->stencil_front) >=
214 cl_packet_length(STENCIL_CFG));
215 v3dx_pack(&so->stencil_front, STENCIL_CFG, config) {
216 config.front_config = true;
217 /* If !back->enabled, then the front values should be
218 * used for both front and back-facing primitives.
219 */
220 config.back_config = !back->enabled;
221
222 config.stencil_write_mask = front->writemask;
223 config.stencil_test_mask = front->valuemask;
224
225 config.stencil_test_function = front->func;
226 config.stencil_pass_op =
227 translate_stencil_op(front->zpass_op);
228 config.depth_test_fail_op =
229 translate_stencil_op(front->zfail_op);
230 config.stencil_test_fail_op =
231 translate_stencil_op(front->fail_op);
232 }
233 }
234 if (back->enabled) {
235 STATIC_ASSERT(sizeof(so->stencil_back) >=
236 cl_packet_length(STENCIL_CFG));
237 v3dx_pack(&so->stencil_back, STENCIL_CFG, config) {
238 config.front_config = false;
239 config.back_config = true;
240
241 config.stencil_write_mask = back->writemask;
242 config.stencil_test_mask = back->valuemask;
243
244 config.stencil_test_function = back->func;
245 config.stencil_pass_op =
246 translate_stencil_op(back->zpass_op);
247 config.depth_test_fail_op =
248 translate_stencil_op(back->zfail_op);
249 config.stencil_test_fail_op =
250 translate_stencil_op(back->fail_op);
251 }
252 }
253
254 return so;
255 }
256
257 static void
258 v3d_set_polygon_stipple(struct pipe_context *pctx,
259 const struct pipe_poly_stipple *stipple)
260 {
261 struct v3d_context *v3d = v3d_context(pctx);
262 v3d->stipple = *stipple;
263 v3d->dirty |= VC5_DIRTY_STIPPLE;
264 }
265
266 static void
267 v3d_set_scissor_states(struct pipe_context *pctx,
268 unsigned start_slot,
269 unsigned num_scissors,
270 const struct pipe_scissor_state *scissor)
271 {
272 struct v3d_context *v3d = v3d_context(pctx);
273
274 v3d->scissor = *scissor;
275 v3d->dirty |= VC5_DIRTY_SCISSOR;
276 }
277
278 static void
279 v3d_set_viewport_states(struct pipe_context *pctx,
280 unsigned start_slot,
281 unsigned num_viewports,
282 const struct pipe_viewport_state *viewport)
283 {
284 struct v3d_context *v3d = v3d_context(pctx);
285 v3d->viewport = *viewport;
286 v3d->dirty |= VC5_DIRTY_VIEWPORT;
287 }
288
289 static void
290 v3d_set_vertex_buffers(struct pipe_context *pctx,
291 unsigned start_slot, unsigned count,
292 const struct pipe_vertex_buffer *vb)
293 {
294 struct v3d_context *v3d = v3d_context(pctx);
295 struct v3d_vertexbuf_stateobj *so = &v3d->vertexbuf;
296
297 util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb,
298 start_slot, count);
299 so->count = util_last_bit(so->enabled_mask);
300
301 v3d->dirty |= VC5_DIRTY_VTXBUF;
302 }
303
304 static void
305 v3d_blend_state_bind(struct pipe_context *pctx, void *hwcso)
306 {
307 struct v3d_context *v3d = v3d_context(pctx);
308 v3d->blend = hwcso;
309 v3d->dirty |= VC5_DIRTY_BLEND;
310 }
311
312 static void
313 v3d_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
314 {
315 struct v3d_context *v3d = v3d_context(pctx);
316 v3d->rasterizer = hwcso;
317 v3d->dirty |= VC5_DIRTY_RASTERIZER;
318 }
319
320 static void
321 v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
322 {
323 struct v3d_context *v3d = v3d_context(pctx);
324 v3d->zsa = hwcso;
325 v3d->dirty |= VC5_DIRTY_ZSA;
326 }
327
328 static void *
329 v3d_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
330 const struct pipe_vertex_element *elements)
331 {
332 struct v3d_context *v3d = v3d_context(pctx);
333 struct v3d_vertex_stateobj *so = CALLOC_STRUCT(v3d_vertex_stateobj);
334
335 if (!so)
336 return NULL;
337
338 memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
339 so->num_elements = num_elements;
340
341 for (int i = 0; i < so->num_elements; i++) {
342 const struct pipe_vertex_element *elem = &elements[i];
343 const struct util_format_description *desc =
344 util_format_description(elem->src_format);
345 uint32_t r_size = desc->channel[0].size;
346
347 const uint32_t size =
348 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
349
350 v3dx_pack(&so->attrs[i * size],
351 GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
352 /* vec_size == 0 means 4 */
353 attr.vec_size = desc->nr_channels & 3;
354 attr.signed_int_type = (desc->channel[0].type ==
355 UTIL_FORMAT_TYPE_SIGNED);
356
357 attr.normalized_int_type = desc->channel[0].normalized;
358 attr.read_as_int_uint = desc->channel[0].pure_integer;
359 attr.instance_divisor = MIN2(elem->instance_divisor,
360 0xffff);
361
362 switch (desc->channel[0].type) {
363 case UTIL_FORMAT_TYPE_FLOAT:
364 if (r_size == 32) {
365 attr.type = ATTRIBUTE_FLOAT;
366 } else {
367 assert(r_size == 16);
368 attr.type = ATTRIBUTE_HALF_FLOAT;
369 }
370 break;
371
372 case UTIL_FORMAT_TYPE_SIGNED:
373 case UTIL_FORMAT_TYPE_UNSIGNED:
374 switch (r_size) {
375 case 32:
376 attr.type = ATTRIBUTE_INT;
377 break;
378 case 16:
379 attr.type = ATTRIBUTE_SHORT;
380 break;
381 case 10:
382 attr.type = ATTRIBUTE_INT2_10_10_10;
383 break;
384 case 8:
385 attr.type = ATTRIBUTE_BYTE;
386 break;
387 default:
388 fprintf(stderr,
389 "format %s unsupported\n",
390 desc->name);
391 attr.type = ATTRIBUTE_BYTE;
392 abort();
393 }
394 break;
395
396 default:
397 fprintf(stderr,
398 "format %s unsupported\n",
399 desc->name);
400 abort();
401 }
402 }
403 }
404
405 /* Set up the default attribute values in case any of the vertex
406 * elements use them.
407 */
408 uint32_t *attrs;
409 u_upload_alloc(v3d->state_uploader, 0,
410 VC5_MAX_ATTRIBUTES * 4 * sizeof(float), 16,
411 &so->defaults_offset, &so->defaults, (void **)&attrs);
412
413 for (int i = 0; i < VC5_MAX_ATTRIBUTES; i++) {
414 attrs[i * 4 + 0] = 0;
415 attrs[i * 4 + 1] = 0;
416 attrs[i * 4 + 2] = 0;
417 if (i < so->num_elements &&
418 util_format_is_pure_integer(so->pipe[i].src_format)) {
419 attrs[i * 4 + 3] = 1;
420 } else {
421 attrs[i * 4 + 3] = fui(1.0);
422 }
423 }
424
425 u_upload_unmap(v3d->state_uploader);
426 return so;
427 }
428
429 static void
430 v3d_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
431 {
432 struct v3d_vertex_stateobj *so = hwcso;
433
434 pipe_resource_reference(&so->defaults, NULL);
435 free(so);
436 }
437
438 static void
439 v3d_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
440 {
441 struct v3d_context *v3d = v3d_context(pctx);
442 v3d->vtx = hwcso;
443 v3d->dirty |= VC5_DIRTY_VTXSTATE;
444 }
445
446 static void
447 v3d_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
448 const struct pipe_constant_buffer *cb)
449 {
450 struct v3d_context *v3d = v3d_context(pctx);
451 struct v3d_constbuf_stateobj *so = &v3d->constbuf[shader];
452
453 util_copy_constant_buffer(&so->cb[index], cb);
454
455 /* Note that the state tracker can unbind constant buffers by
456 * passing NULL here.
457 */
458 if (unlikely(!cb)) {
459 so->enabled_mask &= ~(1 << index);
460 so->dirty_mask &= ~(1 << index);
461 return;
462 }
463
464 so->enabled_mask |= 1 << index;
465 so->dirty_mask |= 1 << index;
466 v3d->dirty |= VC5_DIRTY_CONSTBUF;
467 }
468
469 static void
470 v3d_set_framebuffer_state(struct pipe_context *pctx,
471 const struct pipe_framebuffer_state *framebuffer)
472 {
473 struct v3d_context *v3d = v3d_context(pctx);
474 struct pipe_framebuffer_state *cso = &v3d->framebuffer;
475
476 v3d->job = NULL;
477
478 util_copy_framebuffer_state(cso, framebuffer);
479
480 v3d->swap_color_rb = 0;
481 v3d->blend_dst_alpha_one = 0;
482 for (int i = 0; i < v3d->framebuffer.nr_cbufs; i++) {
483 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
484 if (!cbuf)
485 continue;
486 struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);
487
488 const struct util_format_description *desc =
489 util_format_description(cbuf->format);
490
491 /* For BGRA8 formats (DRI window system default format), we
492 * need to swap R and B, since the HW's format is RGBA8. On
493 * V3D 4.1+, the RCL can swap R and B on load/store.
494 */
495 if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb)
496 v3d->swap_color_rb |= 1 << i;
497
498 if (desc->swizzle[3] == PIPE_SWIZZLE_1)
499 v3d->blend_dst_alpha_one |= 1 << i;
500 }
501
502 v3d->dirty |= VC5_DIRTY_FRAMEBUFFER;
503 }
504
505 static uint32_t translate_wrap(uint32_t pipe_wrap, bool using_nearest)
506 {
507 switch (pipe_wrap) {
508 case PIPE_TEX_WRAP_REPEAT:
509 return 0;
510 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
511 return 1;
512 case PIPE_TEX_WRAP_MIRROR_REPEAT:
513 return 2;
514 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
515 return 3;
516 case PIPE_TEX_WRAP_CLAMP:
517 return (using_nearest ? 1 : 3);
518 default:
519 unreachable("Unknown wrap mode");
520 }
521 }
522
523
524 static void *
525 v3d_create_sampler_state(struct pipe_context *pctx,
526 const struct pipe_sampler_state *cso)
527 {
528 MAYBE_UNUSED struct v3d_context *v3d = v3d_context(pctx);
529 struct v3d_sampler_state *so = CALLOC_STRUCT(v3d_sampler_state);
530
531 if (!so)
532 return NULL;
533
534 memcpy(so, cso, sizeof(*cso));
535
536 bool either_nearest =
537 (cso->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST ||
538 cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST);
539
540 #if V3D_VERSION >= 40
541 so->bo = v3d_bo_alloc(v3d->screen, cl_packet_length(SAMPLER_STATE),
542 "sampler");
543 void *map = v3d_bo_map(so->bo);
544
545 v3dx_pack(map, SAMPLER_STATE, sampler) {
546 sampler.wrap_i_border = false;
547
548 sampler.wrap_s = translate_wrap(cso->wrap_s, either_nearest);
549 sampler.wrap_t = translate_wrap(cso->wrap_t, either_nearest);
550 sampler.wrap_r = translate_wrap(cso->wrap_r, either_nearest);
551
552 sampler.fixed_bias = cso->lod_bias;
553 sampler.depth_compare_function = cso->compare_func;
554
555 sampler.min_filter_nearest =
556 cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
557 sampler.mag_filter_nearest =
558 cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
559 sampler.mip_filter_nearest =
560 cso->min_mip_filter != PIPE_TEX_MIPFILTER_LINEAR;
561
562 sampler.min_level_of_detail = MIN2(MAX2(0, cso->min_lod),
563 15);
564 sampler.max_level_of_detail = MIN2(cso->max_lod, 15);
565
566 /* If we're not doing inter-miplevel filtering, we need to
567 * clamp the LOD so that we only sample from baselevel.
568 * However, we need to still allow the calculated LOD to be
569 * fractionally over the baselevel, so that the HW can decide
570 * between the min and mag filters.
571 */
572 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
573 sampler.min_level_of_detail =
574 MIN2(sampler.min_level_of_detail, 1.0 / 256.0);
575 sampler.max_level_of_detail =
576 MIN2(sampler.max_level_of_detail, 1.0 / 256.0);
577 }
578
579 if (cso->max_anisotropy) {
580 sampler.anisotropy_enable = true;
581
582 if (cso->max_anisotropy > 8)
583 sampler.maximum_anisotropy = 3;
584 else if (cso->max_anisotropy > 4)
585 sampler.maximum_anisotropy = 2;
586 else if (cso->max_anisotropy > 2)
587 sampler.maximum_anisotropy = 1;
588 }
589
590 sampler.border_color_mode = V3D_BORDER_COLOR_FOLLOWS;
591 /* XXX: The border color field is in the TMU blending format
592 * (32, f16, or i16), and we need to customize it based on
593 * that.
594 *
595 * XXX: for compat alpha formats, we need the alpha field to
596 * be in the red channel.
597 */
598 sampler.border_color_red =
599 util_float_to_half(cso->border_color.f[0]);
600 sampler.border_color_green =
601 util_float_to_half(cso->border_color.f[1]);
602 sampler.border_color_blue =
603 util_float_to_half(cso->border_color.f[2]);
604 sampler.border_color_alpha =
605 util_float_to_half(cso->border_color.f[3]);
606 }
607
608 #else /* V3D_VERSION < 40 */
609 v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
610 p0.s_wrap_mode = translate_wrap(cso->wrap_s, either_nearest);
611 p0.t_wrap_mode = translate_wrap(cso->wrap_t, either_nearest);
612 p0.r_wrap_mode = translate_wrap(cso->wrap_r, either_nearest);
613 }
614
615 v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
616 tex.depth_compare_function = cso->compare_func;
617 tex.fixed_bias = cso->lod_bias;
618 }
619 #endif /* V3D_VERSION < 40 */
620 return so;
621 }
622
623 static void
624 v3d_sampler_states_bind(struct pipe_context *pctx,
625 enum pipe_shader_type shader, unsigned start,
626 unsigned nr, void **hwcso)
627 {
628 struct v3d_context *v3d = v3d_context(pctx);
629 struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader];
630
631 assert(start == 0);
632 unsigned i;
633 unsigned new_nr = 0;
634
635 for (i = 0; i < nr; i++) {
636 if (hwcso[i])
637 new_nr = i + 1;
638 stage_tex->samplers[i] = hwcso[i];
639 }
640
641 for (; i < stage_tex->num_samplers; i++) {
642 stage_tex->samplers[i] = NULL;
643 }
644
645 stage_tex->num_samplers = new_nr;
646 }
647
648 static void
649 v3d_sampler_state_delete(struct pipe_context *pctx,
650 void *hwcso)
651 {
652 struct pipe_sampler_state *psampler = hwcso;
653 struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
654
655 v3d_bo_unreference(&sampler->bo);
656 free(psampler);
657 }
658
659 #if V3D_VERSION >= 40
660 static uint32_t
661 translate_swizzle(unsigned char pipe_swizzle)
662 {
663 switch (pipe_swizzle) {
664 case PIPE_SWIZZLE_0:
665 return 0;
666 case PIPE_SWIZZLE_1:
667 return 1;
668 case PIPE_SWIZZLE_X:
669 case PIPE_SWIZZLE_Y:
670 case PIPE_SWIZZLE_Z:
671 case PIPE_SWIZZLE_W:
672 return 2 + pipe_swizzle;
673 default:
674 unreachable("unknown swizzle");
675 }
676 }
677 #endif
678
679 static void
680 v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex,
681 struct pipe_resource *prsc,
682 int base_level, int last_level,
683 int first_layer, int last_layer)
684 {
685 struct v3d_resource *rsc = v3d_resource(prsc);
686 int msaa_scale = prsc->nr_samples > 1 ? 2 : 1;
687
688 tex->image_width = prsc->width0 * msaa_scale;
689 tex->image_height = prsc->height0 * msaa_scale;
690
691 #if V3D_VERSION >= 40
692 /* On 4.x, the height of a 1D texture is redefined to be the
693 * upper 14 bits of the width (which is only usable with txf).
694 */
695 if (prsc->target == PIPE_TEXTURE_1D ||
696 prsc->target == PIPE_TEXTURE_1D_ARRAY) {
697 tex->image_height = tex->image_width >> 14;
698 }
699 #endif
700
701 if (prsc->target == PIPE_TEXTURE_3D) {
702 tex->image_depth = prsc->depth0;
703 } else {
704 tex->image_depth = (last_layer - first_layer) + 1;
705 }
706
707 tex->base_level = base_level;
708 #if V3D_VERSION >= 40
709 tex->max_level = last_level;
710 /* Note that we don't have a job to reference the texture's sBO
711 * at state create time, so any time this sampler view is used
712 * we need to add the texture to the job.
713 */
714 tex->texture_base_pointer =
715 cl_address(NULL,
716 rsc->bo->offset +
717 v3d_layer_offset(prsc, 0, first_layer));
718 #endif
719 tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
720
721 /* Since other platform devices may produce UIF images even
722 * when they're not big enough for V3D to assume they're UIF,
723 * we force images with level 0 as UIF to be always treated
724 * that way.
725 */
726 tex->level_0_is_strictly_uif =
727 (rsc->slices[0].tiling == VC5_TILING_UIF_XOR ||
728 rsc->slices[0].tiling == VC5_TILING_UIF_NO_XOR);
729 tex->level_0_xor_enable = (rsc->slices[0].tiling == VC5_TILING_UIF_XOR);
730
731 if (tex->level_0_is_strictly_uif)
732 tex->level_0_ub_pad = rsc->slices[0].ub_pad;
733
734 #if V3D_VERSION >= 40
735 if (tex->uif_xor_disable ||
736 tex->level_0_is_strictly_uif) {
737 tex->extended = true;
738 }
739 #endif /* V3D_VERSION >= 40 */
740 }
741
742 static struct pipe_sampler_view *
743 v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
744 const struct pipe_sampler_view *cso)
745 {
746 struct v3d_context *v3d = v3d_context(pctx);
747 struct v3d_screen *screen = v3d->screen;
748 struct v3d_sampler_view *so = CALLOC_STRUCT(v3d_sampler_view);
749 struct v3d_resource *rsc = v3d_resource(prsc);
750
751 if (!so)
752 return NULL;
753
754 so->base = *cso;
755
756 pipe_reference(NULL, &prsc->reference);
757
758 /* Compute the sampler view's swizzle up front. This will be plugged
759 * into either the sampler (for 16-bit returns) or the shader's
760 * texture key (for 32)
761 */
762 uint8_t view_swizzle[4] = {
763 cso->swizzle_r,
764 cso->swizzle_g,
765 cso->swizzle_b,
766 cso->swizzle_a
767 };
768 const uint8_t *fmt_swizzle =
769 v3d_get_format_swizzle(&screen->devinfo, so->base.format);
770 util_format_compose_swizzles(fmt_swizzle, view_swizzle, so->swizzle);
771
772 so->base.texture = prsc;
773 so->base.reference.count = 1;
774 so->base.context = pctx;
775
776 /* V3D still doesn't support sampling from raster textures, so we will
777 * have to copy to a temporary tiled texture.
778 */
779 if (!rsc->tiled && !(prsc->target == PIPE_TEXTURE_1D ||
780 prsc->target == PIPE_TEXTURE_1D_ARRAY)) {
781 struct v3d_resource *shadow_parent = rsc;
782 struct pipe_resource tmpl = {
783 .target = prsc->target,
784 .format = prsc->format,
785 .width0 = u_minify(prsc->width0,
786 cso->u.tex.first_level),
787 .height0 = u_minify(prsc->height0,
788 cso->u.tex.first_level),
789 .depth0 = 1,
790 .array_size = 1,
791 .bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET,
792 .last_level = cso->u.tex.last_level - cso->u.tex.first_level,
793 .nr_samples = prsc->nr_samples,
794 };
795
796 /* Create the shadow texture. The rest of the sampler view
797 * setup will use the shadow.
798 */
799 prsc = v3d_resource_create(pctx->screen, &tmpl);
800 if (!prsc) {
801 free(so);
802 return NULL;
803 }
804 rsc = v3d_resource(prsc);
805
806 /* Flag it as needing update of the contents from the parent. */
807 rsc->writes = shadow_parent->writes - 1;
808 assert(rsc->tiled);
809
810 so->texture = prsc;
811 } else {
812 pipe_resource_reference(&so->texture, prsc);
813 }
814
815 void *map;
816 #if V3D_VERSION >= 40
817 so->bo = v3d_bo_alloc(v3d->screen,
818 cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
819 map = v3d_bo_map(so->bo);
820 #else /* V3D_VERSION < 40 */
821 STATIC_ASSERT(sizeof(so->texture_shader_state) >=
822 cl_packet_length(TEXTURE_SHADER_STATE));
823 map = &so->texture_shader_state;
824 #endif
825
826 v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
827 v3d_setup_texture_shader_state(&tex, prsc,
828 cso->u.tex.first_level,
829 cso->u.tex.last_level,
830 cso->u.tex.first_layer,
831 cso->u.tex.last_layer);
832
833 tex.srgb = util_format_is_srgb(cso->format);
834
835 #if V3D_VERSION >= 40
836 tex.swizzle_r = translate_swizzle(so->swizzle[0]);
837 tex.swizzle_g = translate_swizzle(so->swizzle[1]);
838 tex.swizzle_b = translate_swizzle(so->swizzle[2]);
839 tex.swizzle_a = translate_swizzle(so->swizzle[3]);
840 #endif
841
842 if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
843 /* Using texture views to reinterpret formats on our
844 * MSAA textures won't work, because we don't lay out
845 * the bits in memory as it's expected -- for example,
846 * RGBA8 and RGB10_A2 are compatible in the
847 * ARB_texture_view spec, but in HW we lay them out as
848 * 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now
849 * to catch failures.
850 *
851 * We explicitly allow remapping S8Z24 to RGBA8888 for
852 * v3d_blit.c's stencil blits.
853 */
854 assert((util_format_linear(cso->format) ==
855 util_format_linear(prsc->format)) ||
856 (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
857 cso->format == PIPE_FORMAT_R8G8B8A8_UNORM));
858 uint32_t output_image_format =
859 v3d_get_rt_format(&screen->devinfo, cso->format);
860 uint32_t internal_type;
861 uint32_t internal_bpp;
862 v3d_get_internal_type_bpp_for_output_format(&screen->devinfo,
863 output_image_format,
864 &internal_type,
865 &internal_bpp);
866
867 switch (internal_type) {
868 case V3D_INTERNAL_TYPE_8:
869 tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
870 break;
871 case V3D_INTERNAL_TYPE_16F:
872 tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
873 break;
874 default:
875 unreachable("Bad MSAA texture type");
876 }
877
878 /* sRGB was stored in the tile buffer as linear and
879 * would have been encoded to sRGB on resolved tile
880 * buffer store. Note that this means we would need
881 * shader code if we wanted to read an MSAA sRGB
882 * texture without sRGB decode.
883 */
884 tex.srgb = false;
885 } else {
886 tex.texture_type = v3d_get_tex_format(&screen->devinfo,
887 cso->format);
888 }
889 };
890
891 return &so->base;
892 }
893
894 static void
895 v3d_sampler_view_destroy(struct pipe_context *pctx,
896 struct pipe_sampler_view *psview)
897 {
898 struct v3d_sampler_view *sview = v3d_sampler_view(psview);
899
900 v3d_bo_unreference(&sview->bo);
901 pipe_resource_reference(&psview->texture, NULL);
902 pipe_resource_reference(&sview->texture, NULL);
903 free(psview);
904 }
905
906 static void
907 v3d_set_sampler_views(struct pipe_context *pctx,
908 enum pipe_shader_type shader,
909 unsigned start, unsigned nr,
910 struct pipe_sampler_view **views)
911 {
912 struct v3d_context *v3d = v3d_context(pctx);
913 struct v3d_texture_stateobj *stage_tex = &v3d->tex[shader];
914 unsigned i;
915 unsigned new_nr = 0;
916
917 assert(start == 0);
918
919 for (i = 0; i < nr; i++) {
920 if (views[i])
921 new_nr = i + 1;
922 pipe_sampler_view_reference(&stage_tex->textures[i], views[i]);
923 }
924
925 for (; i < stage_tex->num_textures; i++) {
926 pipe_sampler_view_reference(&stage_tex->textures[i], NULL);
927 }
928
929 stage_tex->num_textures = new_nr;
930 }
931
932 static struct pipe_stream_output_target *
933 v3d_create_stream_output_target(struct pipe_context *pctx,
934 struct pipe_resource *prsc,
935 unsigned buffer_offset,
936 unsigned buffer_size)
937 {
938 struct pipe_stream_output_target *target;
939
940 target = CALLOC_STRUCT(pipe_stream_output_target);
941 if (!target)
942 return NULL;
943
944 pipe_reference_init(&target->reference, 1);
945 pipe_resource_reference(&target->buffer, prsc);
946
947 target->context = pctx;
948 target->buffer_offset = buffer_offset;
949 target->buffer_size = buffer_size;
950
951 return target;
952 }
953
954 static void
955 v3d_stream_output_target_destroy(struct pipe_context *pctx,
956 struct pipe_stream_output_target *target)
957 {
958 pipe_resource_reference(&target->buffer, NULL);
959 free(target);
960 }
961
962 static void
963 v3d_set_stream_output_targets(struct pipe_context *pctx,
964 unsigned num_targets,
965 struct pipe_stream_output_target **targets,
966 const unsigned *offsets)
967 {
968 struct v3d_context *ctx = v3d_context(pctx);
969 struct v3d_streamout_stateobj *so = &ctx->streamout;
970 unsigned i;
971
972 assert(num_targets <= ARRAY_SIZE(so->targets));
973
974 for (i = 0; i < num_targets; i++) {
975 if (offsets[i] != -1)
976 so->offsets[i] = offsets[i];
977
978 pipe_so_target_reference(&so->targets[i], targets[i]);
979 }
980
981 for (; i < so->num_targets; i++)
982 pipe_so_target_reference(&so->targets[i], NULL);
983
984 so->num_targets = num_targets;
985
986 ctx->dirty |= VC5_DIRTY_STREAMOUT;
987 }
988
989 static void
990 v3d_set_shader_buffers(struct pipe_context *pctx,
991 enum pipe_shader_type shader,
992 unsigned start, unsigned count,
993 const struct pipe_shader_buffer *buffers)
994 {
995 struct v3d_context *v3d = v3d_context(pctx);
996 struct v3d_ssbo_stateobj *so = &v3d->ssbo[shader];
997 unsigned mask = 0;
998
999 if (buffers) {
1000 for (unsigned i = 0; i < count; i++) {
1001 unsigned n = i + start;
1002 struct pipe_shader_buffer *buf = &so->sb[n];
1003
1004 if ((buf->buffer == buffers[i].buffer) &&
1005 (buf->buffer_offset == buffers[i].buffer_offset) &&
1006 (buf->buffer_size == buffers[i].buffer_size))
1007 continue;
1008
1009 mask |= 1 << n;
1010
1011 buf->buffer_offset = buffers[i].buffer_offset;
1012 buf->buffer_size = buffers[i].buffer_size;
1013 pipe_resource_reference(&buf->buffer, buffers[i].buffer);
1014
1015 if (buf->buffer)
1016 so->enabled_mask |= 1 << n;
1017 else
1018 so->enabled_mask &= ~(1 << n);
1019 }
1020 } else {
1021 mask = ((1 << count) - 1) << start;
1022
1023 for (unsigned i = 0; i < count; i++) {
1024 unsigned n = i + start;
1025 struct pipe_shader_buffer *buf = &so->sb[n];
1026
1027 pipe_resource_reference(&buf->buffer, NULL);
1028 }
1029
1030 so->enabled_mask &= ~mask;
1031 }
1032
1033 v3d->dirty |= VC5_DIRTY_SSBO;
1034 }
1035
1036 void
1037 v3dX(state_init)(struct pipe_context *pctx)
1038 {
1039 pctx->set_blend_color = v3d_set_blend_color;
1040 pctx->set_stencil_ref = v3d_set_stencil_ref;
1041 pctx->set_clip_state = v3d_set_clip_state;
1042 pctx->set_sample_mask = v3d_set_sample_mask;
1043 pctx->set_constant_buffer = v3d_set_constant_buffer;
1044 pctx->set_framebuffer_state = v3d_set_framebuffer_state;
1045 pctx->set_polygon_stipple = v3d_set_polygon_stipple;
1046 pctx->set_scissor_states = v3d_set_scissor_states;
1047 pctx->set_viewport_states = v3d_set_viewport_states;
1048
1049 pctx->set_vertex_buffers = v3d_set_vertex_buffers;
1050
1051 pctx->create_blend_state = v3d_create_blend_state;
1052 pctx->bind_blend_state = v3d_blend_state_bind;
1053 pctx->delete_blend_state = v3d_generic_cso_state_delete;
1054
1055 pctx->create_rasterizer_state = v3d_create_rasterizer_state;
1056 pctx->bind_rasterizer_state = v3d_rasterizer_state_bind;
1057 pctx->delete_rasterizer_state = v3d_generic_cso_state_delete;
1058
1059 pctx->create_depth_stencil_alpha_state = v3d_create_depth_stencil_alpha_state;
1060 pctx->bind_depth_stencil_alpha_state = v3d_zsa_state_bind;
1061 pctx->delete_depth_stencil_alpha_state = v3d_generic_cso_state_delete;
1062
1063 pctx->create_vertex_elements_state = v3d_vertex_state_create;
1064 pctx->delete_vertex_elements_state = v3d_vertex_state_delete;
1065 pctx->bind_vertex_elements_state = v3d_vertex_state_bind;
1066
1067 pctx->create_sampler_state = v3d_create_sampler_state;
1068 pctx->delete_sampler_state = v3d_sampler_state_delete;
1069 pctx->bind_sampler_states = v3d_sampler_states_bind;
1070
1071 pctx->create_sampler_view = v3d_create_sampler_view;
1072 pctx->sampler_view_destroy = v3d_sampler_view_destroy;
1073 pctx->set_sampler_views = v3d_set_sampler_views;
1074
1075 pctx->set_shader_buffers = v3d_set_shader_buffers;
1076
1077 pctx->create_stream_output_target = v3d_create_stream_output_target;
1078 pctx->stream_output_target_destroy = v3d_stream_output_target_destroy;
1079 pctx->set_stream_output_targets = v3d_set_stream_output_targets;
1080 }