panfrost: Prepare shader_meta descriptors at emission time
[mesa.git] / src / gallium / drivers / panfrost / pan_context.c
1 /*
2 * © Copyright 2018 Alyssa Rosenzweig
3 * Copyright © 2014-2017 Broadcom
4 * Copyright (C) 2017 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 */
26
27 #include <sys/poll.h>
28 #include <errno.h>
29
30 #include "pan_bo.h"
31 #include "pan_context.h"
32 #include "pan_minmax_cache.h"
33 #include "panfrost-quirks.h"
34
35 #include "util/macros.h"
36 #include "util/format/u_format.h"
37 #include "util/u_inlines.h"
38 #include "util/u_upload_mgr.h"
39 #include "util/u_memory.h"
40 #include "util/u_vbuf.h"
41 #include "util/half_float.h"
42 #include "util/u_helpers.h"
43 #include "util/format/u_format.h"
44 #include "util/u_prim.h"
45 #include "util/u_prim_restart.h"
46 #include "indices/u_primconvert.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_from_mesa.h"
49 #include "util/u_math.h"
50
51 #include "pan_screen.h"
52 #include "pan_blending.h"
53 #include "pan_blend_shaders.h"
54 #include "pan_cmdstream.h"
55 #include "pan_util.h"
56 #include "pandecode/decode.h"
57
58 struct midgard_tiler_descriptor
59 panfrost_emit_midg_tiler(struct panfrost_batch *batch, unsigned vertex_count)
60 {
61 struct panfrost_screen *screen = pan_screen(batch->ctx->base.screen);
62 bool hierarchy = !(screen->quirks & MIDGARD_NO_HIER_TILING);
63 struct midgard_tiler_descriptor t = {0};
64 unsigned height = batch->key.height;
65 unsigned width = batch->key.width;
66
67 t.hierarchy_mask =
68 panfrost_choose_hierarchy_mask(width, height, vertex_count, hierarchy);
69
70 /* Compute the polygon header size and use that to offset the body */
71
72 unsigned header_size = panfrost_tiler_header_size(
73 width, height, t.hierarchy_mask, hierarchy);
74
75 t.polygon_list_size = panfrost_tiler_full_size(
76 width, height, t.hierarchy_mask, hierarchy);
77
78 /* Sanity check */
79
80 if (vertex_count) {
81 struct panfrost_bo *tiler_heap;
82
83 tiler_heap = panfrost_batch_get_tiler_heap(batch);
84 t.polygon_list = panfrost_batch_get_polygon_list(batch,
85 header_size +
86 t.polygon_list_size);
87
88
89 /* Allow the entire tiler heap */
90 t.heap_start = tiler_heap->gpu;
91 t.heap_end = tiler_heap->gpu + tiler_heap->size;
92 } else {
93 struct panfrost_bo *tiler_dummy;
94
95 tiler_dummy = panfrost_batch_get_tiler_dummy(batch);
96 header_size = MALI_TILER_MINIMUM_HEADER_SIZE;
97
98 /* The tiler is disabled, so don't allow the tiler heap */
99 t.heap_start = tiler_dummy->gpu;
100 t.heap_end = t.heap_start;
101
102 /* Use a dummy polygon list */
103 t.polygon_list = tiler_dummy->gpu;
104
105 /* Disable the tiler */
106 if (hierarchy)
107 t.hierarchy_mask |= MALI_TILER_DISABLED;
108 else {
109 t.hierarchy_mask = MALI_TILER_USER;
110 t.polygon_list_size = MALI_TILER_MINIMUM_HEADER_SIZE + 4;
111
112 /* We don't have a WRITE_VALUE job, so write the polygon list manually */
113 uint32_t *polygon_list_body = (uint32_t *) (tiler_dummy->cpu + header_size);
114 polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */
115 }
116 }
117
118 t.polygon_list_body =
119 t.polygon_list + header_size;
120
121 return t;
122 }
123
124 static void
125 panfrost_clear(
126 struct pipe_context *pipe,
127 unsigned buffers,
128 const union pipe_color_union *color,
129 double depth, unsigned stencil)
130 {
131 struct panfrost_context *ctx = pan_context(pipe);
132
133 /* TODO: panfrost_get_fresh_batch_for_fbo() instantiates a new batch if
134 * the existing batch targeting this FBO has draws. We could probably
135 * avoid that by replacing plain clears by quad-draws with a specific
136 * color/depth/stencil value, thus avoiding the generation of extra
137 * fragment jobs.
138 */
139 struct panfrost_batch *batch = panfrost_get_fresh_batch_for_fbo(ctx);
140
141 panfrost_batch_add_fbo_bos(batch);
142 panfrost_batch_clear(batch, buffers, color, depth, stencil);
143 }
144
145 /* Reset per-frame context, called on context initialisation as well as after
146 * flushing a frame */
147
148 void
149 panfrost_invalidate_frame(struct panfrost_context *ctx)
150 {
151 for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
152 ctx->payloads[i].postfix.shared_memory = 0;
153
154 /* TODO: When does this need to be handled? */
155 ctx->active_queries = true;
156 }
157
158 /* In practice, every field of these payloads should be configurable
159 * arbitrarily, which means these functions are basically catch-all's for
160 * as-of-yet unwavering unknowns */
161
162 static void
163 panfrost_emit_vertex_payload(struct panfrost_context *ctx)
164 {
165 /* 0x2 bit clear on 32-bit T6XX */
166
167 struct midgard_payload_vertex_tiler payload = {
168 .gl_enables = 0x4 | 0x2,
169 };
170
171 /* Vertex and compute are closely coupled, so share a payload */
172
173 memcpy(&ctx->payloads[PIPE_SHADER_VERTEX], &payload, sizeof(payload));
174 memcpy(&ctx->payloads[PIPE_SHADER_COMPUTE], &payload, sizeof(payload));
175 }
176
177 static unsigned
178 translate_tex_wrap(enum pipe_tex_wrap w)
179 {
180 switch (w) {
181 case PIPE_TEX_WRAP_REPEAT:
182 return MALI_WRAP_REPEAT;
183
184 case PIPE_TEX_WRAP_CLAMP:
185 return MALI_WRAP_CLAMP;
186
187 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
188 return MALI_WRAP_CLAMP_TO_EDGE;
189
190 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
191 return MALI_WRAP_CLAMP_TO_BORDER;
192
193 case PIPE_TEX_WRAP_MIRROR_REPEAT:
194 return MALI_WRAP_MIRRORED_REPEAT;
195
196 case PIPE_TEX_WRAP_MIRROR_CLAMP:
197 return MALI_WRAP_MIRRORED_CLAMP;
198
199 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
200 return MALI_WRAP_MIRRORED_CLAMP_TO_EDGE;
201
202 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
203 return MALI_WRAP_MIRRORED_CLAMP_TO_BORDER;
204
205 default:
206 unreachable("Invalid wrap");
207 }
208 }
209
210 bool
211 panfrost_writes_point_size(struct panfrost_context *ctx)
212 {
213 assert(ctx->shader[PIPE_SHADER_VERTEX]);
214 struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
215
216 return vs->writes_point_size && ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode == MALI_POINTS;
217 }
218
219 /* Stage the attribute descriptors so we can adjust src_offset
220 * to let BOs align nicely */
221
222 static void
223 panfrost_stage_attributes(struct panfrost_context *ctx)
224 {
225 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
226 struct panfrost_vertex_state *so = ctx->vertex;
227
228 size_t sz = sizeof(struct mali_attr_meta) * PAN_MAX_ATTRIBUTE;
229 struct panfrost_transfer transfer = panfrost_allocate_transient(batch, sz);
230 struct mali_attr_meta *target = (struct mali_attr_meta *) transfer.cpu;
231
232 /* Copy as-is for the first pass */
233 memcpy(target, so->hw, sz);
234
235 /* Fixup offsets for the second pass. Recall that the hardware
236 * calculates attribute addresses as:
237 *
238 * addr = base + (stride * vtx) + src_offset;
239 *
240 * However, on Mali, base must be aligned to 64-bytes, so we
241 * instead let:
242 *
243 * base' = base & ~63 = base - (base & 63)
244 *
245 * To compensate when using base' (see emit_vertex_data), we have
246 * to adjust src_offset by the masked off piece:
247 *
248 * addr' = base' + (stride * vtx) + (src_offset + (base & 63))
249 * = base - (base & 63) + (stride * vtx) + src_offset + (base & 63)
250 * = base + (stride * vtx) + src_offset
251 * = addr;
252 *
253 * QED.
254 */
255
256 unsigned start = ctx->payloads[PIPE_SHADER_VERTEX].offset_start;
257
258 for (unsigned i = 0; i < so->num_elements; ++i) {
259 unsigned vbi = so->pipe[i].vertex_buffer_index;
260 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
261 struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource);
262 mali_ptr addr = rsrc->bo->gpu + buf->buffer_offset;
263
264 /* Adjust by the masked off bits of the offset. Make sure we
265 * read src_offset from so->hw (which is not GPU visible)
266 * rather than target (which is) due to caching effects */
267
268 unsigned src_offset = so->hw[i].src_offset;
269 src_offset += (addr & 63);
270
271 /* Also, somewhat obscurely per-instance data needs to be
272 * offset in response to a delayed start in an indexed draw */
273
274 if (so->pipe[i].instance_divisor && ctx->instance_count > 1 && start)
275 src_offset -= buf->stride * start;
276
277 target[i].src_offset = src_offset;
278 }
279
280 /* Let's also include vertex builtins */
281
282 struct mali_attr_meta builtin = {
283 .format = MALI_R32UI,
284 .swizzle = panfrost_get_default_swizzle(1)
285 };
286
287 /* See mali_attr_meta specification for the magic number */
288
289 builtin.index = so->vertexid_index;
290 memcpy(&target[PAN_VERTEX_ID], &builtin, 4);
291
292 builtin.index = so->vertexid_index + 1;
293 memcpy(&target[PAN_INSTANCE_ID], &builtin, 4);
294
295 ctx->payloads[PIPE_SHADER_VERTEX].postfix.attribute_meta = transfer.gpu;
296 }
297
298 static void
299 panfrost_upload_sampler_descriptors(struct panfrost_context *ctx)
300 {
301 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
302 size_t desc_size = sizeof(struct mali_sampler_descriptor);
303
304 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
305 mali_ptr upload = 0;
306
307 if (ctx->sampler_count[t]) {
308 size_t transfer_size = desc_size * ctx->sampler_count[t];
309
310 struct panfrost_transfer transfer =
311 panfrost_allocate_transient(batch, transfer_size);
312
313 struct mali_sampler_descriptor *desc =
314 (struct mali_sampler_descriptor *) transfer.cpu;
315
316 for (int i = 0; i < ctx->sampler_count[t]; ++i)
317 desc[i] = ctx->samplers[t][i]->hw;
318
319 upload = transfer.gpu;
320 }
321
322 ctx->payloads[t].postfix.sampler_descriptor = upload;
323 }
324 }
325
326 static mali_ptr
327 panfrost_upload_tex(
328 struct panfrost_context *ctx,
329 enum pipe_shader_type st,
330 struct panfrost_sampler_view *view)
331 {
332 if (!view)
333 return (mali_ptr) 0;
334
335 struct pipe_sampler_view *pview = &view->base;
336 struct panfrost_resource *rsrc = pan_resource(pview->texture);
337
338 /* Add the BO to the job so it's retained until the job is done. */
339 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
340
341 panfrost_batch_add_bo(batch, rsrc->bo,
342 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
343 panfrost_bo_access_for_stage(st));
344
345 panfrost_batch_add_bo(batch, view->bo,
346 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
347 panfrost_bo_access_for_stage(st));
348
349 return view->bo->gpu;
350 }
351
352 static void
353 panfrost_upload_texture_descriptors(struct panfrost_context *ctx)
354 {
355 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
356
357 for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) {
358 mali_ptr trampoline = 0;
359
360 if (ctx->sampler_view_count[t]) {
361 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
362
363 for (int i = 0; i < ctx->sampler_view_count[t]; ++i)
364 trampolines[i] =
365 panfrost_upload_tex(ctx, t, ctx->sampler_views[t][i]);
366
367 trampoline = panfrost_upload_transient(batch, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]);
368 }
369
370 ctx->payloads[t].postfix.texture_trampoline = trampoline;
371 }
372 }
373
374 /* Compute number of UBOs active (more specifically, compute the highest UBO
375 * number addressable -- if there are gaps, include them in the count anyway).
376 * We always include UBO #0 in the count, since we *need* uniforms enabled for
377 * sysvals. */
378
379 unsigned
380 panfrost_ubo_count(struct panfrost_context *ctx, enum pipe_shader_type stage)
381 {
382 unsigned mask = ctx->constant_buffer[stage].enabled_mask | 1;
383 return 32 - __builtin_clz(mask);
384 }
385
386 /* Go through dirty flags and actualise them in the cmdstream. */
387
388 static void
389 panfrost_emit_for_draw(struct panfrost_context *ctx)
390 {
391 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
392
393 panfrost_batch_add_fbo_bos(batch);
394
395 for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i)
396 panfrost_vt_attach_framebuffer(ctx, &ctx->payloads[i]);
397
398 panfrost_emit_vertex_data(batch);
399
400 /* Varyings emitted for -all- geometry */
401 unsigned total_count = ctx->padded_count * ctx->instance_count;
402 panfrost_emit_varying_descriptor(ctx, total_count);
403
404 panfrost_batch_set_requirements(batch);
405
406 panfrost_vt_update_rasterizer(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
407 panfrost_vt_update_occlusion_query(ctx, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
408
409 panfrost_emit_shader_meta(batch, PIPE_SHADER_VERTEX,
410 &ctx->payloads[PIPE_SHADER_VERTEX]);
411 panfrost_emit_shader_meta(batch, PIPE_SHADER_FRAGMENT,
412 &ctx->payloads[PIPE_SHADER_FRAGMENT]);
413
414 /* We stage to transient, so always dirty.. */
415 if (ctx->vertex)
416 panfrost_stage_attributes(ctx);
417
418 panfrost_upload_sampler_descriptors(ctx);
419 panfrost_upload_texture_descriptors(ctx);
420
421 for (int i = 0; i <= PIPE_SHADER_FRAGMENT; ++i)
422 panfrost_emit_const_buf(batch, i, &ctx->payloads[i]);
423
424 /* TODO: Upload the viewport somewhere more appropriate */
425
426 panfrost_emit_viewport(batch, &ctx->payloads[PIPE_SHADER_FRAGMENT]);
427 }
428
429 /* Corresponds to exactly one draw, but does not submit anything */
430
431 static void
432 panfrost_queue_draw(struct panfrost_context *ctx)
433 {
434 /* Handle dirty flags now */
435 panfrost_emit_for_draw(ctx);
436
437 /* If rasterizer discard is enable, only submit the vertex */
438
439 bool rasterizer_discard = ctx->rasterizer
440 && ctx->rasterizer->base.rasterizer_discard;
441
442
443 struct midgard_payload_vertex_tiler *vertex_payload = &ctx->payloads[PIPE_SHADER_VERTEX];
444 struct midgard_payload_vertex_tiler *tiler_payload = &ctx->payloads[PIPE_SHADER_FRAGMENT];
445
446 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
447 bool wallpapering = ctx->wallpaper_batch && batch->tiler_dep;
448
449 if (wallpapering) {
450 /* Inject in reverse order, with "predicted" job indices. THIS IS A HACK XXX */
451 panfrost_new_job(batch, JOB_TYPE_TILER, false, batch->job_index + 2, tiler_payload, sizeof(*tiler_payload), true);
452 panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), true);
453 } else {
454 unsigned vertex = panfrost_new_job(batch, JOB_TYPE_VERTEX, false, 0, vertex_payload, sizeof(*vertex_payload), false);
455
456 if (!rasterizer_discard)
457 panfrost_new_job(batch, JOB_TYPE_TILER, false, vertex, tiler_payload, sizeof(*tiler_payload), false);
458 }
459
460 panfrost_batch_adjust_stack_size(batch);
461 }
462
463 /* The entire frame is in memory -- send it off to the kernel! */
464
465 void
466 panfrost_flush(
467 struct pipe_context *pipe,
468 struct pipe_fence_handle **fence,
469 unsigned flags)
470 {
471 struct panfrost_context *ctx = pan_context(pipe);
472 struct util_dynarray fences;
473
474 /* We must collect the fences before the flush is done, otherwise we'll
475 * lose track of them.
476 */
477 if (fence) {
478 util_dynarray_init(&fences, NULL);
479 hash_table_foreach(ctx->batches, hentry) {
480 struct panfrost_batch *batch = hentry->data;
481
482 panfrost_batch_fence_reference(batch->out_sync);
483 util_dynarray_append(&fences,
484 struct panfrost_batch_fence *,
485 batch->out_sync);
486 }
487 }
488
489 /* Submit all pending jobs */
490 panfrost_flush_all_batches(ctx, false);
491
492 if (fence) {
493 struct panfrost_fence *f = panfrost_fence_create(ctx, &fences);
494 pipe->screen->fence_reference(pipe->screen, fence, NULL);
495 *fence = (struct pipe_fence_handle *)f;
496
497 util_dynarray_foreach(&fences, struct panfrost_batch_fence *, fence)
498 panfrost_batch_fence_unreference(*fence);
499
500 util_dynarray_fini(&fences);
501 }
502
503 if (pan_debug & PAN_DBG_TRACE)
504 pandecode_next_frame();
505 }
506
507 #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c;
508
509 static int
510 g2m_draw_mode(enum pipe_prim_type mode)
511 {
512 switch (mode) {
513 DEFINE_CASE(POINTS);
514 DEFINE_CASE(LINES);
515 DEFINE_CASE(LINE_LOOP);
516 DEFINE_CASE(LINE_STRIP);
517 DEFINE_CASE(TRIANGLES);
518 DEFINE_CASE(TRIANGLE_STRIP);
519 DEFINE_CASE(TRIANGLE_FAN);
520 DEFINE_CASE(QUADS);
521 DEFINE_CASE(QUAD_STRIP);
522 DEFINE_CASE(POLYGON);
523
524 default:
525 unreachable("Invalid draw mode");
526 }
527 }
528
529 #undef DEFINE_CASE
530
531 static unsigned
532 panfrost_translate_index_size(unsigned size)
533 {
534 switch (size) {
535 case 1:
536 return MALI_DRAW_INDEXED_UINT8;
537
538 case 2:
539 return MALI_DRAW_INDEXED_UINT16;
540
541 case 4:
542 return MALI_DRAW_INDEXED_UINT32;
543
544 default:
545 unreachable("Invalid index size");
546 }
547 }
548
549 /* Gets a GPU address for the associated index buffer. Only gauranteed to be
550 * good for the duration of the draw (transient), could last longer. Also get
551 * the bounds on the index buffer for the range accessed by the draw. We do
552 * these operations together because there are natural optimizations which
553 * require them to be together. */
554
555 static mali_ptr
556 panfrost_get_index_buffer_bounded(struct panfrost_context *ctx, const struct pipe_draw_info *info, unsigned *min_index, unsigned *max_index)
557 {
558 struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource);
559
560 off_t offset = info->start * info->index_size;
561 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
562 mali_ptr out = 0;
563
564 bool needs_indices = true;
565
566 if (info->max_index != ~0u) {
567 *min_index = info->min_index;
568 *max_index = info->max_index;
569 needs_indices = false;
570 }
571
572 if (!info->has_user_indices) {
573 /* Only resources can be directly mapped */
574 panfrost_batch_add_bo(batch, rsrc->bo,
575 PAN_BO_ACCESS_SHARED |
576 PAN_BO_ACCESS_READ |
577 PAN_BO_ACCESS_VERTEX_TILER);
578 out = rsrc->bo->gpu + offset;
579
580 /* Check the cache */
581 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache, info->start, info->count,
582 min_index, max_index);
583 } else {
584 /* Otherwise, we need to upload to transient memory */
585 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
586 out = panfrost_upload_transient(batch, ibuf8 + offset, info->count * info->index_size);
587 }
588
589 if (needs_indices) {
590 /* Fallback */
591 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
592
593 if (!info->has_user_indices) {
594 panfrost_minmax_cache_add(rsrc->index_cache, info->start, info->count,
595 *min_index, *max_index);
596 }
597 }
598
599
600 return out;
601 }
602
603 static bool
604 panfrost_scissor_culls_everything(struct panfrost_context *ctx)
605 {
606 const struct pipe_scissor_state *ss = &ctx->scissor;
607
608 /* Check if we're scissoring at all */
609
610 if (!(ctx->rasterizer && ctx->rasterizer->base.scissor))
611 return false;
612
613 return (ss->minx == ss->maxx) || (ss->miny == ss->maxy);
614 }
615
616 /* Count generated primitives (when there is no geom/tess shaders) for
617 * transform feedback */
618
619 static void
620 panfrost_statistics_record(
621 struct panfrost_context *ctx,
622 const struct pipe_draw_info *info)
623 {
624 if (!ctx->active_queries)
625 return;
626
627 uint32_t prims = u_prims_for_vertices(info->mode, info->count);
628 ctx->prims_generated += prims;
629
630 if (!ctx->streamout.num_targets)
631 return;
632
633 ctx->tf_prims_generated += prims;
634 }
635
636 static void
637 panfrost_draw_vbo(
638 struct pipe_context *pipe,
639 const struct pipe_draw_info *info)
640 {
641 struct panfrost_context *ctx = pan_context(pipe);
642
643 /* First of all, check the scissor to see if anything is drawn at all.
644 * If it's not, we drop the draw (mostly a conformance issue;
645 * well-behaved apps shouldn't hit this) */
646
647 if (panfrost_scissor_culls_everything(ctx))
648 return;
649
650 int mode = info->mode;
651
652 /* Fallback unsupported restart index */
653 unsigned primitive_index = (1 << (info->index_size * 8)) - 1;
654
655 if (info->primitive_restart && info->index_size
656 && info->restart_index != primitive_index) {
657 util_draw_vbo_without_prim_restart(pipe, info);
658 return;
659 }
660
661 /* Fallback for unsupported modes */
662
663 assert(ctx->rasterizer != NULL);
664
665 if (!(ctx->draw_modes & (1 << mode))) {
666 if (mode == PIPE_PRIM_QUADS && info->count == 4 && !ctx->rasterizer->base.flatshade) {
667 mode = PIPE_PRIM_TRIANGLE_FAN;
668 } else {
669 if (info->count < 4) {
670 /* Degenerate case? */
671 return;
672 }
673
674 util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base);
675 util_primconvert_draw_vbo(ctx->primconvert, info);
676 return;
677 }
678 }
679
680 ctx->payloads[PIPE_SHADER_VERTEX].offset_start = info->start;
681 ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = info->start;
682
683 /* Now that we have a guaranteed terminating path, find the job.
684 * Assignment commented out to prevent unused warning */
685
686 /* struct panfrost_batch *batch = */ panfrost_get_batch_for_fbo(ctx);
687
688 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.draw_mode = g2m_draw_mode(mode);
689
690 /* Take into account a negative bias */
691 ctx->vertex_count = info->count + abs(info->index_bias);
692 ctx->instance_count = info->instance_count;
693 ctx->active_prim = info->mode;
694
695 /* For non-indexed draws, they're the same */
696 unsigned vertex_count = ctx->vertex_count;
697
698 unsigned draw_flags = 0;
699
700 /* The draw flags interpret how primitive size is interpreted */
701
702 if (panfrost_writes_point_size(ctx))
703 draw_flags |= MALI_DRAW_VARYING_SIZE;
704
705 if (info->primitive_restart)
706 draw_flags |= MALI_DRAW_PRIMITIVE_RESTART_FIXED_INDEX;
707
708 /* These doesn't make much sense */
709
710 draw_flags |= 0x3000;
711
712 if (ctx->rasterizer && ctx->rasterizer->base.flatshade_first)
713 draw_flags |= MALI_DRAW_FLATSHADE_FIRST;
714
715 panfrost_statistics_record(ctx, info);
716
717 if (info->index_size) {
718 unsigned min_index = 0, max_index = 0;
719 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices =
720 panfrost_get_index_buffer_bounded(ctx, info, &min_index, &max_index);
721
722 /* Use the corresponding values */
723 vertex_count = max_index - min_index + 1;
724 ctx->payloads[PIPE_SHADER_VERTEX].offset_start = min_index + info->index_bias;
725 ctx->payloads[PIPE_SHADER_FRAGMENT].offset_start = min_index + info->index_bias;
726
727 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = -min_index;
728 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(info->count);
729
730 draw_flags |= panfrost_translate_index_size(info->index_size);
731 } else {
732 /* Index count == vertex count, if no indexing is applied, as
733 * if it is internally indexed in the expected order */
734
735 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.offset_bias_correction = 0;
736 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.index_count = MALI_POSITIVE(ctx->vertex_count);
737
738 /* Reverse index state */
739 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.indices = (mali_ptr) 0;
740 }
741
742 /* Dispatch "compute jobs" for the vertex/tiler pair as (1,
743 * vertex_count, 1) */
744
745 panfrost_pack_work_groups_fused(
746 &ctx->payloads[PIPE_SHADER_VERTEX].prefix,
747 &ctx->payloads[PIPE_SHADER_FRAGMENT].prefix,
748 1, vertex_count, info->instance_count,
749 1, 1, 1);
750
751 ctx->payloads[PIPE_SHADER_FRAGMENT].prefix.unknown_draw = draw_flags;
752
753 /* Encode the padded vertex count */
754
755 if (info->instance_count > 1) {
756 ctx->padded_count = panfrost_padded_vertex_count(vertex_count);
757
758 unsigned shift = __builtin_ctz(ctx->padded_count);
759 unsigned k = ctx->padded_count >> (shift + 1);
760
761 ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = shift;
762 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = shift;
763
764 ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = k;
765 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = k;
766 } else {
767 ctx->padded_count = vertex_count;
768
769 /* Reset instancing state */
770 ctx->payloads[PIPE_SHADER_VERTEX].instance_shift = 0;
771 ctx->payloads[PIPE_SHADER_VERTEX].instance_odd = 0;
772 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_shift = 0;
773 ctx->payloads[PIPE_SHADER_FRAGMENT].instance_odd = 0;
774 }
775
776 /* Fire off the draw itself */
777 panfrost_queue_draw(ctx);
778
779 /* Increment transform feedback offsets */
780
781 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
782 unsigned output_count = u_stream_outputs_for_vertices(
783 ctx->active_prim, ctx->vertex_count);
784
785 ctx->streamout.offsets[i] += output_count;
786 }
787 }
788
789 /* CSO state */
790
791 static void
792 panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso)
793 {
794 free(hwcso);
795 }
796
797 static void *
798 panfrost_create_rasterizer_state(
799 struct pipe_context *pctx,
800 const struct pipe_rasterizer_state *cso)
801 {
802 struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer);
803
804 so->base = *cso;
805
806 return so;
807 }
808
809 static void
810 panfrost_bind_rasterizer_state(
811 struct pipe_context *pctx,
812 void *hwcso)
813 {
814 struct panfrost_context *ctx = pan_context(pctx);
815
816 ctx->rasterizer = hwcso;
817
818 if (!hwcso)
819 return;
820
821 /* Gauranteed with the core GL call, so don't expose ARB_polygon_offset */
822 assert(ctx->rasterizer->base.offset_clamp == 0.0);
823
824 /* Point sprites are emulated */
825
826 struct panfrost_shader_state *variant = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
827
828 if (ctx->rasterizer->base.sprite_coord_enable || (variant && variant->point_sprite_mask))
829 ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
830 }
831
832 static void *
833 panfrost_create_vertex_elements_state(
834 struct pipe_context *pctx,
835 unsigned num_elements,
836 const struct pipe_vertex_element *elements)
837 {
838 struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state);
839
840 so->num_elements = num_elements;
841 memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
842
843 for (int i = 0; i < num_elements; ++i) {
844 so->hw[i].index = i;
845
846 enum pipe_format fmt = elements[i].src_format;
847 const struct util_format_description *desc = util_format_description(fmt);
848 so->hw[i].unknown1 = 0x2;
849 so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels);
850
851 so->hw[i].format = panfrost_find_format(desc);
852
853 /* The field itself should probably be shifted over */
854 so->hw[i].src_offset = elements[i].src_offset;
855 }
856
857 return so;
858 }
859
860 static void
861 panfrost_bind_vertex_elements_state(
862 struct pipe_context *pctx,
863 void *hwcso)
864 {
865 struct panfrost_context *ctx = pan_context(pctx);
866 ctx->vertex = hwcso;
867 }
868
869 static void *
870 panfrost_create_shader_state(
871 struct pipe_context *pctx,
872 const struct pipe_shader_state *cso,
873 enum pipe_shader_type stage)
874 {
875 struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants);
876 so->base = *cso;
877
878 /* Token deep copy to prevent memory corruption */
879
880 if (cso->type == PIPE_SHADER_IR_TGSI)
881 so->base.tokens = tgsi_dup_tokens(so->base.tokens);
882
883 /* Precompile for shader-db if we need to */
884 if (unlikely((pan_debug & PAN_DBG_PRECOMPILE) && cso->type == PIPE_SHADER_IR_NIR)) {
885 struct panfrost_context *ctx = pan_context(pctx);
886
887 struct panfrost_shader_state state;
888 uint64_t outputs_written;
889
890 panfrost_shader_compile(ctx, PIPE_SHADER_IR_NIR,
891 so->base.ir.nir,
892 tgsi_processor_to_shader_stage(stage),
893 &state, &outputs_written);
894 }
895
896 return so;
897 }
898
899 static void
900 panfrost_delete_shader_state(
901 struct pipe_context *pctx,
902 void *so)
903 {
904 struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so;
905
906 if (cso->base.type == PIPE_SHADER_IR_TGSI) {
907 DBG("Deleting TGSI shader leaks duplicated tokens\n");
908 }
909
910 for (unsigned i = 0; i < cso->variant_count; ++i) {
911 struct panfrost_shader_state *shader_state = &cso->variants[i];
912 panfrost_bo_unreference(shader_state->bo);
913 shader_state->bo = NULL;
914 }
915 free(cso->variants);
916
917 free(so);
918 }
919
920 static void *
921 panfrost_create_sampler_state(
922 struct pipe_context *pctx,
923 const struct pipe_sampler_state *cso)
924 {
925 struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state);
926 so->base = *cso;
927
928 /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */
929
930 bool min_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
931 bool mag_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
932 bool mip_linear = cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR;
933
934 unsigned min_filter = min_nearest ? MALI_SAMP_MIN_NEAREST : 0;
935 unsigned mag_filter = mag_nearest ? MALI_SAMP_MAG_NEAREST : 0;
936 unsigned mip_filter = mip_linear ?
937 (MALI_SAMP_MIP_LINEAR_1 | MALI_SAMP_MIP_LINEAR_2) : 0;
938 unsigned normalized = cso->normalized_coords ? MALI_SAMP_NORM_COORDS : 0;
939
940 struct mali_sampler_descriptor sampler_descriptor = {
941 .filter_mode = min_filter | mag_filter | mip_filter | normalized,
942 .wrap_s = translate_tex_wrap(cso->wrap_s),
943 .wrap_t = translate_tex_wrap(cso->wrap_t),
944 .wrap_r = translate_tex_wrap(cso->wrap_r),
945 .compare_func = panfrost_flip_compare_func(
946 panfrost_translate_compare_func(
947 cso->compare_func)),
948 .border_color = {
949 cso->border_color.f[0],
950 cso->border_color.f[1],
951 cso->border_color.f[2],
952 cso->border_color.f[3]
953 },
954 .min_lod = FIXED_16(cso->min_lod, false), /* clamp at 0 */
955 .max_lod = FIXED_16(cso->max_lod, false),
956 .lod_bias = FIXED_16(cso->lod_bias, true), /* can be negative */
957 .seamless_cube_map = cso->seamless_cube_map,
958 };
959
960 /* If necessary, we disable mipmapping in the sampler descriptor by
961 * clamping the LOD as tight as possible (from 0 to epsilon,
962 * essentially -- remember these are fixed point numbers, so
963 * epsilon=1/256) */
964
965 if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
966 sampler_descriptor.max_lod = sampler_descriptor.min_lod;
967
968 /* Enforce that there is something in the middle by adding epsilon*/
969
970 if (sampler_descriptor.min_lod == sampler_descriptor.max_lod)
971 sampler_descriptor.max_lod++;
972
973 /* Sanity check */
974 assert(sampler_descriptor.max_lod > sampler_descriptor.min_lod);
975 }
976
977 so->hw = sampler_descriptor;
978
979 return so;
980 }
981
982 static void
983 panfrost_bind_sampler_states(
984 struct pipe_context *pctx,
985 enum pipe_shader_type shader,
986 unsigned start_slot, unsigned num_sampler,
987 void **sampler)
988 {
989 assert(start_slot == 0);
990
991 struct panfrost_context *ctx = pan_context(pctx);
992
993 /* XXX: Should upload, not just copy? */
994 ctx->sampler_count[shader] = num_sampler;
995 memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *));
996 }
997
998 static bool
999 panfrost_variant_matches(
1000 struct panfrost_context *ctx,
1001 struct panfrost_shader_state *variant,
1002 enum pipe_shader_type type)
1003 {
1004 struct pipe_rasterizer_state *rasterizer = &ctx->rasterizer->base;
1005 struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha;
1006
1007 bool is_fragment = (type == PIPE_SHADER_FRAGMENT);
1008
1009 if (is_fragment && (alpha->enabled || variant->alpha_state.enabled)) {
1010 /* Make sure enable state is at least the same */
1011 if (alpha->enabled != variant->alpha_state.enabled) {
1012 return false;
1013 }
1014
1015 /* Check that the contents of the test are the same */
1016 bool same_func = alpha->func == variant->alpha_state.func;
1017 bool same_ref = alpha->ref_value == variant->alpha_state.ref_value;
1018
1019 if (!(same_func && same_ref)) {
1020 return false;
1021 }
1022 }
1023
1024 if (is_fragment && rasterizer && (rasterizer->sprite_coord_enable |
1025 variant->point_sprite_mask)) {
1026 /* Ensure the same varyings are turned to point sprites */
1027 if (rasterizer->sprite_coord_enable != variant->point_sprite_mask)
1028 return false;
1029
1030 /* Ensure the orientation is correct */
1031 bool upper_left =
1032 rasterizer->sprite_coord_mode ==
1033 PIPE_SPRITE_COORD_UPPER_LEFT;
1034
1035 if (variant->point_sprite_upper_left != upper_left)
1036 return false;
1037 }
1038
1039 /* Otherwise, we're good to go */
1040 return true;
1041 }
1042
1043 /**
1044 * Fix an uncompiled shader's stream output info, and produce a bitmask
1045 * of which VARYING_SLOT_* are captured for stream output.
1046 *
1047 * Core Gallium stores output->register_index as a "slot" number, where
1048 * slots are assigned consecutively to all outputs in info->outputs_written.
1049 * This naive packing of outputs doesn't work for us - we too have slots,
1050 * but the layout is defined by the VUE map, which we won't have until we
1051 * compile a specific shader variant. So, we remap these and simply store
1052 * VARYING_SLOT_* in our copy's output->register_index fields.
1053 *
1054 * We then produce a bitmask of outputs which are used for SO.
1055 *
1056 * Implementation from iris.
1057 */
1058
1059 static uint64_t
1060 update_so_info(struct pipe_stream_output_info *so_info,
1061 uint64_t outputs_written)
1062 {
1063 uint64_t so_outputs = 0;
1064 uint8_t reverse_map[64] = {0};
1065 unsigned slot = 0;
1066
1067 while (outputs_written)
1068 reverse_map[slot++] = u_bit_scan64(&outputs_written);
1069
1070 for (unsigned i = 0; i < so_info->num_outputs; i++) {
1071 struct pipe_stream_output *output = &so_info->output[i];
1072
1073 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
1074 output->register_index = reverse_map[output->register_index];
1075
1076 so_outputs |= 1ull << output->register_index;
1077 }
1078
1079 return so_outputs;
1080 }
1081
1082 static void
1083 panfrost_bind_shader_state(
1084 struct pipe_context *pctx,
1085 void *hwcso,
1086 enum pipe_shader_type type)
1087 {
1088 struct panfrost_context *ctx = pan_context(pctx);
1089 ctx->shader[type] = hwcso;
1090
1091 if (!hwcso) return;
1092
1093 /* Match the appropriate variant */
1094
1095 signed variant = -1;
1096 struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso;
1097
1098 for (unsigned i = 0; i < variants->variant_count; ++i) {
1099 if (panfrost_variant_matches(ctx, &variants->variants[i], type)) {
1100 variant = i;
1101 break;
1102 }
1103 }
1104
1105 if (variant == -1) {
1106 /* No variant matched, so create a new one */
1107 variant = variants->variant_count++;
1108
1109 if (variants->variant_count > variants->variant_space) {
1110 unsigned old_space = variants->variant_space;
1111
1112 variants->variant_space *= 2;
1113 if (variants->variant_space == 0)
1114 variants->variant_space = 1;
1115
1116 /* Arbitrary limit to stop runaway programs from
1117 * creating an unbounded number of shader variants. */
1118 assert(variants->variant_space < 1024);
1119
1120 unsigned msize = sizeof(struct panfrost_shader_state);
1121 variants->variants = realloc(variants->variants,
1122 variants->variant_space * msize);
1123
1124 memset(&variants->variants[old_space], 0,
1125 (variants->variant_space - old_space) * msize);
1126 }
1127
1128 struct panfrost_shader_state *v =
1129 &variants->variants[variant];
1130
1131 if (type == PIPE_SHADER_FRAGMENT) {
1132 v->alpha_state = ctx->depth_stencil->alpha;
1133
1134 if (ctx->rasterizer) {
1135 v->point_sprite_mask = ctx->rasterizer->base.sprite_coord_enable;
1136 v->point_sprite_upper_left =
1137 ctx->rasterizer->base.sprite_coord_mode ==
1138 PIPE_SPRITE_COORD_UPPER_LEFT;
1139 }
1140 }
1141 }
1142
1143 /* Select this variant */
1144 variants->active_variant = variant;
1145
1146 struct panfrost_shader_state *shader_state = &variants->variants[variant];
1147 assert(panfrost_variant_matches(ctx, shader_state, type));
1148
1149 /* We finally have a variant, so compile it */
1150
1151 if (!shader_state->compiled) {
1152 uint64_t outputs_written = 0;
1153
1154 panfrost_shader_compile(ctx, variants->base.type,
1155 variants->base.type == PIPE_SHADER_IR_NIR ?
1156 variants->base.ir.nir :
1157 variants->base.tokens,
1158 tgsi_processor_to_shader_stage(type),
1159 shader_state,
1160 &outputs_written);
1161
1162 shader_state->compiled = true;
1163
1164 /* Fixup the stream out information, since what Gallium returns
1165 * normally is mildly insane */
1166
1167 shader_state->stream_output = variants->base.stream_output;
1168 shader_state->so_mask =
1169 update_so_info(&shader_state->stream_output, outputs_written);
1170 }
1171 }
1172
1173 static void *
1174 panfrost_create_vs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso)
1175 {
1176 return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
1177 }
1178
1179 static void *
1180 panfrost_create_fs_state(struct pipe_context *pctx, const struct pipe_shader_state *hwcso)
1181 {
1182 return panfrost_create_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
1183 }
1184
1185 static void
1186 panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
1187 {
1188 panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
1189 }
1190
1191 static void
1192 panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
1193 {
1194 panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
1195 }
1196
1197 static void
1198 panfrost_set_vertex_buffers(
1199 struct pipe_context *pctx,
1200 unsigned start_slot,
1201 unsigned num_buffers,
1202 const struct pipe_vertex_buffer *buffers)
1203 {
1204 struct panfrost_context *ctx = pan_context(pctx);
1205
1206 util_set_vertex_buffers_mask(ctx->vertex_buffers, &ctx->vb_mask, buffers, start_slot, num_buffers);
1207 }
1208
1209 static void
1210 panfrost_set_constant_buffer(
1211 struct pipe_context *pctx,
1212 enum pipe_shader_type shader, uint index,
1213 const struct pipe_constant_buffer *buf)
1214 {
1215 struct panfrost_context *ctx = pan_context(pctx);
1216 struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader];
1217
1218 util_copy_constant_buffer(&pbuf->cb[index], buf);
1219
1220 unsigned mask = (1 << index);
1221
1222 if (unlikely(!buf)) {
1223 pbuf->enabled_mask &= ~mask;
1224 pbuf->dirty_mask &= ~mask;
1225 return;
1226 }
1227
1228 pbuf->enabled_mask |= mask;
1229 pbuf->dirty_mask |= mask;
1230 }
1231
1232 static void
1233 panfrost_set_stencil_ref(
1234 struct pipe_context *pctx,
1235 const struct pipe_stencil_ref *ref)
1236 {
1237 struct panfrost_context *ctx = pan_context(pctx);
1238 ctx->stencil_ref = *ref;
1239 }
1240
1241 static enum mali_texture_type
1242 panfrost_translate_texture_type(enum pipe_texture_target t) {
1243 switch (t)
1244 {
1245 case PIPE_BUFFER:
1246 case PIPE_TEXTURE_1D:
1247 case PIPE_TEXTURE_1D_ARRAY:
1248 return MALI_TEX_1D;
1249
1250 case PIPE_TEXTURE_2D:
1251 case PIPE_TEXTURE_2D_ARRAY:
1252 case PIPE_TEXTURE_RECT:
1253 return MALI_TEX_2D;
1254
1255 case PIPE_TEXTURE_3D:
1256 return MALI_TEX_3D;
1257
1258 case PIPE_TEXTURE_CUBE:
1259 case PIPE_TEXTURE_CUBE_ARRAY:
1260 return MALI_TEX_CUBE;
1261
1262 default:
1263 unreachable("Unknown target");
1264 }
1265 }
1266
1267 static struct pipe_sampler_view *
1268 panfrost_create_sampler_view(
1269 struct pipe_context *pctx,
1270 struct pipe_resource *texture,
1271 const struct pipe_sampler_view *template)
1272 {
1273 struct panfrost_screen *screen = pan_screen(pctx->screen);
1274 struct panfrost_sampler_view *so = rzalloc(pctx, struct panfrost_sampler_view);
1275
1276 pipe_reference(NULL, &texture->reference);
1277
1278 struct panfrost_resource *prsrc = (struct panfrost_resource *) texture;
1279 assert(prsrc->bo);
1280
1281 so->base = *template;
1282 so->base.texture = texture;
1283 so->base.reference.count = 1;
1284 so->base.context = pctx;
1285
1286 unsigned char user_swizzle[4] = {
1287 template->swizzle_r,
1288 template->swizzle_g,
1289 template->swizzle_b,
1290 template->swizzle_a
1291 };
1292
1293 /* In the hardware, array_size refers specifically to array textures,
1294 * whereas in Gallium, it also covers cubemaps */
1295
1296 unsigned array_size = texture->array_size;
1297
1298 if (template->target == PIPE_TEXTURE_CUBE) {
1299 /* TODO: Cubemap arrays */
1300 assert(array_size == 6);
1301 array_size /= 6;
1302 }
1303
1304 enum mali_texture_type type =
1305 panfrost_translate_texture_type(template->target);
1306
1307 unsigned size = panfrost_estimate_texture_size(
1308 template->u.tex.first_level,
1309 template->u.tex.last_level,
1310 template->u.tex.first_layer,
1311 template->u.tex.last_layer,
1312 type, prsrc->layout);
1313
1314 so->bo = panfrost_bo_create(screen, size, 0);
1315
1316 panfrost_new_texture(
1317 so->bo->cpu,
1318 texture->width0, texture->height0,
1319 texture->depth0, array_size,
1320 template->format,
1321 type, prsrc->layout,
1322 template->u.tex.first_level,
1323 template->u.tex.last_level,
1324 template->u.tex.first_layer,
1325 template->u.tex.last_layer,
1326 prsrc->cubemap_stride,
1327 panfrost_translate_swizzle_4(user_swizzle),
1328 prsrc->bo->gpu,
1329 prsrc->slices);
1330
1331 return (struct pipe_sampler_view *) so;
1332 }
1333
1334 static void
1335 panfrost_set_sampler_views(
1336 struct pipe_context *pctx,
1337 enum pipe_shader_type shader,
1338 unsigned start_slot, unsigned num_views,
1339 struct pipe_sampler_view **views)
1340 {
1341 struct panfrost_context *ctx = pan_context(pctx);
1342 unsigned new_nr = 0;
1343 unsigned i;
1344
1345 assert(start_slot == 0);
1346
1347 for (i = 0; i < num_views; ++i) {
1348 if (views[i])
1349 new_nr = i + 1;
1350 pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
1351 views[i]);
1352 }
1353
1354 for (; i < ctx->sampler_view_count[shader]; i++) {
1355 pipe_sampler_view_reference((struct pipe_sampler_view **)&ctx->sampler_views[shader][i],
1356 NULL);
1357 }
1358 ctx->sampler_view_count[shader] = new_nr;
1359 }
1360
1361 static void
1362 panfrost_sampler_view_destroy(
1363 struct pipe_context *pctx,
1364 struct pipe_sampler_view *pview)
1365 {
1366 struct panfrost_sampler_view *view = (struct panfrost_sampler_view *) pview;
1367
1368 pipe_resource_reference(&pview->texture, NULL);
1369 panfrost_bo_unreference(view->bo);
1370 ralloc_free(view);
1371 }
1372
1373 static void
1374 panfrost_set_shader_buffers(
1375 struct pipe_context *pctx,
1376 enum pipe_shader_type shader,
1377 unsigned start, unsigned count,
1378 const struct pipe_shader_buffer *buffers,
1379 unsigned writable_bitmask)
1380 {
1381 struct panfrost_context *ctx = pan_context(pctx);
1382
1383 util_set_shader_buffers_mask(ctx->ssbo[shader], &ctx->ssbo_mask[shader],
1384 buffers, start, count);
1385 }
1386
1387 /* Hints that a framebuffer should use AFBC where possible */
1388
1389 static void
1390 panfrost_hint_afbc(
1391 struct panfrost_screen *screen,
1392 const struct pipe_framebuffer_state *fb)
1393 {
1394 /* AFBC implemenation incomplete; hide it */
1395 if (!(pan_debug & PAN_DBG_AFBC)) return;
1396
1397 /* Hint AFBC to the resources bound to each color buffer */
1398
1399 for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
1400 struct pipe_surface *surf = fb->cbufs[i];
1401 struct panfrost_resource *rsrc = pan_resource(surf->texture);
1402 panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
1403 }
1404
1405 /* Also hint it to the depth buffer */
1406
1407 if (fb->zsbuf) {
1408 struct panfrost_resource *rsrc = pan_resource(fb->zsbuf->texture);
1409 panfrost_resource_hint_layout(screen, rsrc, MALI_TEXTURE_AFBC, 1);
1410 }
1411 }
1412
1413 static void
1414 panfrost_set_framebuffer_state(struct pipe_context *pctx,
1415 const struct pipe_framebuffer_state *fb)
1416 {
1417 struct panfrost_context *ctx = pan_context(pctx);
1418
1419 panfrost_hint_afbc(pan_screen(pctx->screen), fb);
1420 util_copy_framebuffer_state(&ctx->pipe_framebuffer, fb);
1421 ctx->batch = NULL;
1422 panfrost_invalidate_frame(ctx);
1423 }
1424
1425 static void *
1426 panfrost_create_depth_stencil_state(struct pipe_context *pipe,
1427 const struct pipe_depth_stencil_alpha_state *depth_stencil)
1428 {
1429 return mem_dup(depth_stencil, sizeof(*depth_stencil));
1430 }
1431
1432 static void
1433 panfrost_bind_depth_stencil_state(struct pipe_context *pipe,
1434 void *cso)
1435 {
1436 struct panfrost_context *ctx = pan_context(pipe);
1437 struct pipe_depth_stencil_alpha_state *depth_stencil = cso;
1438 ctx->depth_stencil = depth_stencil;
1439
1440 if (!depth_stencil)
1441 return;
1442
1443 /* Alpha does not exist in the hardware (it's not in ES3), so it's
1444 * emulated in the fragment shader */
1445
1446 if (depth_stencil->alpha.enabled) {
1447 /* We need to trigger a new shader (maybe) */
1448 ctx->base.bind_fs_state(&ctx->base, ctx->shader[PIPE_SHADER_FRAGMENT]);
1449 }
1450
1451 /* Bounds test not implemented */
1452 assert(!depth_stencil->depth.bounds_test);
1453 }
1454
1455 static void
1456 panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
1457 {
1458 free( depth );
1459 }
1460
1461 static void
1462 panfrost_set_sample_mask(struct pipe_context *pipe,
1463 unsigned sample_mask)
1464 {
1465 }
1466
1467 static void
1468 panfrost_set_clip_state(struct pipe_context *pipe,
1469 const struct pipe_clip_state *clip)
1470 {
1471 //struct panfrost_context *panfrost = pan_context(pipe);
1472 }
1473
1474 static void
1475 panfrost_set_viewport_states(struct pipe_context *pipe,
1476 unsigned start_slot,
1477 unsigned num_viewports,
1478 const struct pipe_viewport_state *viewports)
1479 {
1480 struct panfrost_context *ctx = pan_context(pipe);
1481
1482 assert(start_slot == 0);
1483 assert(num_viewports == 1);
1484
1485 ctx->pipe_viewport = *viewports;
1486 }
1487
1488 static void
1489 panfrost_set_scissor_states(struct pipe_context *pipe,
1490 unsigned start_slot,
1491 unsigned num_scissors,
1492 const struct pipe_scissor_state *scissors)
1493 {
1494 struct panfrost_context *ctx = pan_context(pipe);
1495
1496 assert(start_slot == 0);
1497 assert(num_scissors == 1);
1498
1499 ctx->scissor = *scissors;
1500 }
1501
1502 static void
1503 panfrost_set_polygon_stipple(struct pipe_context *pipe,
1504 const struct pipe_poly_stipple *stipple)
1505 {
1506 //struct panfrost_context *panfrost = pan_context(pipe);
1507 }
1508
1509 static void
1510 panfrost_set_active_query_state(struct pipe_context *pipe,
1511 bool enable)
1512 {
1513 struct panfrost_context *ctx = pan_context(pipe);
1514 ctx->active_queries = enable;
1515 }
1516
1517 static void
1518 panfrost_destroy(struct pipe_context *pipe)
1519 {
1520 struct panfrost_context *panfrost = pan_context(pipe);
1521
1522 if (panfrost->blitter)
1523 util_blitter_destroy(panfrost->blitter);
1524
1525 if (panfrost->blitter_wallpaper)
1526 util_blitter_destroy(panfrost->blitter_wallpaper);
1527
1528 util_unreference_framebuffer_state(&panfrost->pipe_framebuffer);
1529 u_upload_destroy(pipe->stream_uploader);
1530
1531 ralloc_free(pipe);
1532 }
1533
1534 static struct pipe_query *
1535 panfrost_create_query(struct pipe_context *pipe,
1536 unsigned type,
1537 unsigned index)
1538 {
1539 struct panfrost_query *q = rzalloc(pipe, struct panfrost_query);
1540
1541 q->type = type;
1542 q->index = index;
1543
1544 return (struct pipe_query *) q;
1545 }
1546
1547 static void
1548 panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
1549 {
1550 struct panfrost_query *query = (struct panfrost_query *) q;
1551
1552 if (query->bo) {
1553 panfrost_bo_unreference(query->bo);
1554 query->bo = NULL;
1555 }
1556
1557 ralloc_free(q);
1558 }
1559
1560 static bool
1561 panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q)
1562 {
1563 struct panfrost_context *ctx = pan_context(pipe);
1564 struct panfrost_query *query = (struct panfrost_query *) q;
1565
1566 switch (query->type) {
1567 case PIPE_QUERY_OCCLUSION_COUNTER:
1568 case PIPE_QUERY_OCCLUSION_PREDICATE:
1569 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
1570 /* Allocate a bo for the query results to be stored */
1571 if (!query->bo) {
1572 query->bo = panfrost_bo_create(
1573 pan_screen(ctx->base.screen),
1574 sizeof(unsigned), 0);
1575 }
1576
1577 unsigned *result = (unsigned *)query->bo->cpu;
1578 *result = 0; /* Default to 0 if nothing at all drawn. */
1579 ctx->occlusion_query = query;
1580 break;
1581
1582 /* Geometry statistics are computed in the driver. XXX: geom/tess
1583 * shaders.. */
1584
1585 case PIPE_QUERY_PRIMITIVES_GENERATED:
1586 query->start = ctx->prims_generated;
1587 break;
1588 case PIPE_QUERY_PRIMITIVES_EMITTED:
1589 query->start = ctx->tf_prims_generated;
1590 break;
1591
1592 default:
1593 DBG("Skipping query %u\n", query->type);
1594 break;
1595 }
1596
1597 return true;
1598 }
1599
1600 static bool
1601 panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q)
1602 {
1603 struct panfrost_context *ctx = pan_context(pipe);
1604 struct panfrost_query *query = (struct panfrost_query *) q;
1605
1606 switch (query->type) {
1607 case PIPE_QUERY_OCCLUSION_COUNTER:
1608 case PIPE_QUERY_OCCLUSION_PREDICATE:
1609 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
1610 ctx->occlusion_query = NULL;
1611 break;
1612 case PIPE_QUERY_PRIMITIVES_GENERATED:
1613 query->end = ctx->prims_generated;
1614 break;
1615 case PIPE_QUERY_PRIMITIVES_EMITTED:
1616 query->end = ctx->tf_prims_generated;
1617 break;
1618 }
1619
1620 return true;
1621 }
1622
1623 static bool
1624 panfrost_get_query_result(struct pipe_context *pipe,
1625 struct pipe_query *q,
1626 bool wait,
1627 union pipe_query_result *vresult)
1628 {
1629 struct panfrost_query *query = (struct panfrost_query *) q;
1630 struct panfrost_context *ctx = pan_context(pipe);
1631
1632
1633 switch (query->type) {
1634 case PIPE_QUERY_OCCLUSION_COUNTER:
1635 case PIPE_QUERY_OCCLUSION_PREDICATE:
1636 case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
1637 /* Flush first */
1638 panfrost_flush_all_batches(ctx, true);
1639
1640 /* Read back the query results */
1641 unsigned *result = (unsigned *) query->bo->cpu;
1642 unsigned passed = *result;
1643
1644 if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) {
1645 vresult->u64 = passed;
1646 } else {
1647 vresult->b = !!passed;
1648 }
1649
1650 break;
1651
1652 case PIPE_QUERY_PRIMITIVES_GENERATED:
1653 case PIPE_QUERY_PRIMITIVES_EMITTED:
1654 panfrost_flush_all_batches(ctx, true);
1655 vresult->u64 = query->end - query->start;
1656 break;
1657
1658 default:
1659 DBG("Skipped query get %u\n", query->type);
1660 break;
1661 }
1662
1663 return true;
1664 }
1665
1666 static struct pipe_stream_output_target *
1667 panfrost_create_stream_output_target(struct pipe_context *pctx,
1668 struct pipe_resource *prsc,
1669 unsigned buffer_offset,
1670 unsigned buffer_size)
1671 {
1672 struct pipe_stream_output_target *target;
1673
1674 target = rzalloc(pctx, struct pipe_stream_output_target);
1675
1676 if (!target)
1677 return NULL;
1678
1679 pipe_reference_init(&target->reference, 1);
1680 pipe_resource_reference(&target->buffer, prsc);
1681
1682 target->context = pctx;
1683 target->buffer_offset = buffer_offset;
1684 target->buffer_size = buffer_size;
1685
1686 return target;
1687 }
1688
1689 static void
1690 panfrost_stream_output_target_destroy(struct pipe_context *pctx,
1691 struct pipe_stream_output_target *target)
1692 {
1693 pipe_resource_reference(&target->buffer, NULL);
1694 ralloc_free(target);
1695 }
1696
1697 static void
1698 panfrost_set_stream_output_targets(struct pipe_context *pctx,
1699 unsigned num_targets,
1700 struct pipe_stream_output_target **targets,
1701 const unsigned *offsets)
1702 {
1703 struct panfrost_context *ctx = pan_context(pctx);
1704 struct panfrost_streamout *so = &ctx->streamout;
1705
1706 assert(num_targets <= ARRAY_SIZE(so->targets));
1707
1708 for (unsigned i = 0; i < num_targets; i++) {
1709 if (offsets[i] != -1)
1710 so->offsets[i] = offsets[i];
1711
1712 pipe_so_target_reference(&so->targets[i], targets[i]);
1713 }
1714
1715 for (unsigned i = 0; i < so->num_targets; i++)
1716 pipe_so_target_reference(&so->targets[i], NULL);
1717
1718 so->num_targets = num_targets;
1719 }
1720
1721 struct pipe_context *
1722 panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
1723 {
1724 struct panfrost_context *ctx = rzalloc(screen, struct panfrost_context);
1725 struct pipe_context *gallium = (struct pipe_context *) ctx;
1726
1727 gallium->screen = screen;
1728
1729 gallium->destroy = panfrost_destroy;
1730
1731 gallium->set_framebuffer_state = panfrost_set_framebuffer_state;
1732
1733 gallium->flush = panfrost_flush;
1734 gallium->clear = panfrost_clear;
1735 gallium->draw_vbo = panfrost_draw_vbo;
1736
1737 gallium->set_vertex_buffers = panfrost_set_vertex_buffers;
1738 gallium->set_constant_buffer = panfrost_set_constant_buffer;
1739 gallium->set_shader_buffers = panfrost_set_shader_buffers;
1740
1741 gallium->set_stencil_ref = panfrost_set_stencil_ref;
1742
1743 gallium->create_sampler_view = panfrost_create_sampler_view;
1744 gallium->set_sampler_views = panfrost_set_sampler_views;
1745 gallium->sampler_view_destroy = panfrost_sampler_view_destroy;
1746
1747 gallium->create_rasterizer_state = panfrost_create_rasterizer_state;
1748 gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state;
1749 gallium->delete_rasterizer_state = panfrost_generic_cso_delete;
1750
1751 gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state;
1752 gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state;
1753 gallium->delete_vertex_elements_state = panfrost_generic_cso_delete;
1754
1755 gallium->create_fs_state = panfrost_create_fs_state;
1756 gallium->delete_fs_state = panfrost_delete_shader_state;
1757 gallium->bind_fs_state = panfrost_bind_fs_state;
1758
1759 gallium->create_vs_state = panfrost_create_vs_state;
1760 gallium->delete_vs_state = panfrost_delete_shader_state;
1761 gallium->bind_vs_state = panfrost_bind_vs_state;
1762
1763 gallium->create_sampler_state = panfrost_create_sampler_state;
1764 gallium->delete_sampler_state = panfrost_generic_cso_delete;
1765 gallium->bind_sampler_states = panfrost_bind_sampler_states;
1766
1767 gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state;
1768 gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state;
1769 gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state;
1770
1771 gallium->set_sample_mask = panfrost_set_sample_mask;
1772
1773 gallium->set_clip_state = panfrost_set_clip_state;
1774 gallium->set_viewport_states = panfrost_set_viewport_states;
1775 gallium->set_scissor_states = panfrost_set_scissor_states;
1776 gallium->set_polygon_stipple = panfrost_set_polygon_stipple;
1777 gallium->set_active_query_state = panfrost_set_active_query_state;
1778
1779 gallium->create_query = panfrost_create_query;
1780 gallium->destroy_query = panfrost_destroy_query;
1781 gallium->begin_query = panfrost_begin_query;
1782 gallium->end_query = panfrost_end_query;
1783 gallium->get_query_result = panfrost_get_query_result;
1784
1785 gallium->create_stream_output_target = panfrost_create_stream_output_target;
1786 gallium->stream_output_target_destroy = panfrost_stream_output_target_destroy;
1787 gallium->set_stream_output_targets = panfrost_set_stream_output_targets;
1788
1789 panfrost_resource_context_init(gallium);
1790 panfrost_blend_context_init(gallium);
1791 panfrost_compute_context_init(gallium);
1792
1793 /* XXX: leaks */
1794 gallium->stream_uploader = u_upload_create_default(gallium);
1795 gallium->const_uploader = gallium->stream_uploader;
1796 assert(gallium->stream_uploader);
1797
1798 /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */
1799 ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1;
1800
1801 ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes);
1802
1803 ctx->blitter = util_blitter_create(gallium);
1804 ctx->blitter_wallpaper = util_blitter_create(gallium);
1805
1806 assert(ctx->blitter);
1807 assert(ctx->blitter_wallpaper);
1808
1809 /* Prepare for render! */
1810
1811 panfrost_batch_init(ctx);
1812 panfrost_emit_vertex_payload(ctx);
1813 panfrost_invalidate_frame(ctx);
1814
1815 return gallium;
1816 }