gallium/u_threaded: implement asynchronous flushes
[mesa.git] / src / gallium / auxiliary / util / u_threaded_context.c
1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33
34 /* 0 = disabled, 1 = assertions, 2 = printfs */
35 #define TC_DEBUG 0
36
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42
43 #if TC_DEBUG >= 2
44 #define tc_printf printf
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52
53 #define TC_SENTINEL 0x5ca1ab1e
54
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59 TC_NUM_CALLS,
60 };
61
62 typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload);
63
64 static const tc_execute execute_func[TC_NUM_CALLS];
65
66 static void
67 tc_batch_check(struct tc_batch *batch)
68 {
69 tc_assert(batch->sentinel == TC_SENTINEL);
70 tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH);
71 }
72
73 static void
74 tc_debug_check(struct threaded_context *tc)
75 {
76 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
77 tc_batch_check(&tc->batch_slots[i]);
78 tc_assert(tc->batch_slots[i].pipe == tc->pipe);
79 }
80 }
81
82 static void
83 tc_batch_execute(void *job, int thread_index)
84 {
85 struct tc_batch *batch = job;
86 struct pipe_context *pipe = batch->pipe;
87 struct tc_call *last = &batch->call[batch->num_total_call_slots];
88
89 tc_batch_check(batch);
90
91 assert(!batch->token);
92
93 for (struct tc_call *iter = batch->call; iter != last;
94 iter += iter->num_call_slots) {
95 tc_assert(iter->sentinel == TC_SENTINEL);
96 execute_func[iter->call_id](pipe, &iter->payload);
97 }
98
99 tc_batch_check(batch);
100 batch->num_total_call_slots = 0;
101 }
102
103 static void
104 tc_batch_flush(struct threaded_context *tc)
105 {
106 struct tc_batch *next = &tc->batch_slots[tc->next];
107
108 tc_assert(next->num_total_call_slots != 0);
109 tc_batch_check(next);
110 tc_debug_check(tc);
111 p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
112
113 if (next->token) {
114 next->token->tc = NULL;
115 tc_unflushed_batch_token_reference(&next->token, NULL);
116 }
117
118 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
119 NULL);
120 tc->last = tc->next;
121 tc->next = (tc->next + 1) % TC_MAX_BATCHES;
122 }
123
124 /* This is the function that adds variable-sized calls into the current
125 * batch. It also flushes the batch if there is not enough space there.
126 * All other higher-level "add" functions use it.
127 */
128 static union tc_payload *
129 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
130 unsigned payload_size)
131 {
132 struct tc_batch *next = &tc->batch_slots[tc->next];
133 unsigned total_size = offsetof(struct tc_call, payload) + payload_size;
134 unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call));
135
136 tc_debug_check(tc);
137
138 if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) {
139 tc_batch_flush(tc);
140 next = &tc->batch_slots[tc->next];
141 tc_assert(next->num_total_call_slots == 0);
142 }
143
144 tc_assert(util_queue_fence_is_signalled(&next->fence));
145
146 struct tc_call *call = &next->call[next->num_total_call_slots];
147 next->num_total_call_slots += num_call_slots;
148
149 call->sentinel = TC_SENTINEL;
150 call->call_id = id;
151 call->num_call_slots = num_call_slots;
152
153 tc_debug_check(tc);
154 return &call->payload;
155 }
156
157 #define tc_add_struct_typed_call(tc, execute, type) \
158 ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type)))
159
160 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
161 ((struct type*)tc_add_sized_call(tc, execute, \
162 sizeof(struct type) + \
163 sizeof(((struct type*)NULL)->slot[0]) * \
164 (num_slots)))
165
166 static union tc_payload *
167 tc_add_small_call(struct threaded_context *tc, enum tc_call_id id)
168 {
169 return tc_add_sized_call(tc, id, 0);
170 }
171
172 static void
173 _tc_sync(struct threaded_context *tc, const char *info, const char *func)
174 {
175 struct tc_batch *last = &tc->batch_slots[tc->last];
176 struct tc_batch *next = &tc->batch_slots[tc->next];
177 bool synced = false;
178
179 tc_debug_check(tc);
180
181 /* Only wait for queued calls... */
182 if (!util_queue_fence_is_signalled(&last->fence)) {
183 util_queue_fence_wait(&last->fence);
184 synced = true;
185 }
186
187 tc_debug_check(tc);
188
189 if (next->token) {
190 next->token->tc = NULL;
191 tc_unflushed_batch_token_reference(&next->token, NULL);
192 }
193
194 /* .. and execute unflushed calls directly. */
195 if (next->num_total_call_slots) {
196 p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
197 tc_batch_execute(next, 0);
198 synced = true;
199 }
200
201 if (synced) {
202 p_atomic_inc(&tc->num_syncs);
203
204 if (tc_strcmp(func, "tc_destroy") != 0)
205 tc_printf("sync %s %s\n", func, info);
206 }
207
208 tc_debug_check(tc);
209 }
210
211 #define tc_sync(tc) _tc_sync(tc, "", __func__)
212 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
213
214 /**
215 * Call this from fence_finish for same-context fence waits of deferred fences
216 * that haven't been flushed yet.
217 *
218 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
219 * i.e., the wrapped one.
220 */
221 void
222 threaded_context_flush(struct pipe_context *_pipe,
223 struct tc_unflushed_batch_token *token)
224 {
225 struct threaded_context *tc = threaded_context(_pipe);
226
227 /* This is called from the state-tracker / application thread. */
228 if (token->tc && token->tc == tc)
229 tc_sync(token->tc);
230 }
231
232 static void
233 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
234 {
235 *dst = NULL;
236 pipe_resource_reference(dst, src);
237 }
238
239 void
240 threaded_resource_init(struct pipe_resource *res)
241 {
242 struct threaded_resource *tres = threaded_resource(res);
243
244 tres->latest = &tres->b;
245 util_range_init(&tres->valid_buffer_range);
246 tres->base_valid_buffer_range = &tres->valid_buffer_range;
247 tres->is_shared = false;
248 tres->is_user_ptr = false;
249 }
250
251 void
252 threaded_resource_deinit(struct pipe_resource *res)
253 {
254 struct threaded_resource *tres = threaded_resource(res);
255
256 if (tres->latest != &tres->b)
257 pipe_resource_reference(&tres->latest, NULL);
258 util_range_destroy(&tres->valid_buffer_range);
259 }
260
261 struct pipe_context *
262 threaded_context_unwrap_sync(struct pipe_context *pipe)
263 {
264 if (!pipe || !pipe->priv)
265 return pipe;
266
267 tc_sync(threaded_context(pipe));
268 return (struct pipe_context*)pipe->priv;
269 }
270
271
272 /********************************************************************
273 * simple functions
274 */
275
276 #define TC_FUNC1(func, m_payload, qualifier, type, deref, deref2) \
277 static void \
278 tc_call_##func(struct pipe_context *pipe, union tc_payload *payload) \
279 { \
280 pipe->func(pipe, deref2((type*)payload)); \
281 } \
282 \
283 static void \
284 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
285 { \
286 struct threaded_context *tc = threaded_context(_pipe); \
287 type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \
288 *p = deref(param); \
289 }
290
291 TC_FUNC1(set_active_query_state, flags, , boolean, , *)
292
293 TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, )
294 TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, )
295 TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, )
296 TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *)
297 TC_FUNC1(set_min_samples, min_samples, , unsigned, , *)
298 TC_FUNC1(set_polygon_stipple, polygon_stipple, const, struct pipe_poly_stipple, *, )
299
300 TC_FUNC1(texture_barrier, flags, , unsigned, , *)
301 TC_FUNC1(memory_barrier, flags, , unsigned, , *)
302
303
304 /********************************************************************
305 * queries
306 */
307
308 static struct pipe_query *
309 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
310 unsigned index)
311 {
312 struct threaded_context *tc = threaded_context(_pipe);
313 struct pipe_context *pipe = tc->pipe;
314
315 return pipe->create_query(pipe, query_type, index);
316 }
317
318 static struct pipe_query *
319 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
320 unsigned *query_types)
321 {
322 struct threaded_context *tc = threaded_context(_pipe);
323 struct pipe_context *pipe = tc->pipe;
324
325 return pipe->create_batch_query(pipe, num_queries, query_types);
326 }
327
328 static void
329 tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
330 {
331 pipe->destroy_query(pipe, payload->query);
332 }
333
334 static void
335 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
336 {
337 struct threaded_context *tc = threaded_context(_pipe);
338 struct threaded_query *tq = threaded_query(query);
339
340 if (tq->head_unflushed.next)
341 LIST_DEL(&tq->head_unflushed);
342
343 tc_add_small_call(tc, TC_CALL_destroy_query)->query = query;
344 }
345
346 static void
347 tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
348 {
349 pipe->begin_query(pipe, payload->query);
350 }
351
352 static boolean
353 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
354 {
355 struct threaded_context *tc = threaded_context(_pipe);
356 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query);
357
358 payload->query = query;
359 return true; /* we don't care about the return value for this call */
360 }
361
362 static void
363 tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
364 {
365 pipe->end_query(pipe, payload->query);
366 }
367
368 static bool
369 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
370 {
371 struct threaded_context *tc = threaded_context(_pipe);
372 struct threaded_query *tq = threaded_query(query);
373 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_end_query);
374
375 payload->query = query;
376
377 tq->flushed = false;
378 if (!tq->head_unflushed.next)
379 LIST_ADD(&tq->head_unflushed, &tc->unflushed_queries);
380
381 return true; /* we don't care about the return value for this call */
382 }
383
384 static boolean
385 tc_get_query_result(struct pipe_context *_pipe,
386 struct pipe_query *query, boolean wait,
387 union pipe_query_result *result)
388 {
389 struct threaded_context *tc = threaded_context(_pipe);
390 struct threaded_query *tq = threaded_query(query);
391 struct pipe_context *pipe = tc->pipe;
392
393 if (!tq->flushed)
394 tc_sync_msg(tc, wait ? "wait" : "nowait");
395
396 bool success = pipe->get_query_result(pipe, query, wait, result);
397
398 if (success) {
399 tq->flushed = true;
400 if (tq->head_unflushed.next)
401 LIST_DEL(&tq->head_unflushed);
402 }
403 return success;
404 }
405
406 struct tc_query_result_resource {
407 struct pipe_query *query;
408 boolean wait;
409 enum pipe_query_value_type result_type;
410 int index;
411 struct pipe_resource *resource;
412 unsigned offset;
413 };
414
415 static void
416 tc_call_get_query_result_resource(struct pipe_context *pipe,
417 union tc_payload *payload)
418 {
419 struct tc_query_result_resource *p = (struct tc_query_result_resource *)payload;
420
421 pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
422 p->index, p->resource, p->offset);
423 pipe_resource_reference(&p->resource, NULL);
424 }
425
426 static void
427 tc_get_query_result_resource(struct pipe_context *_pipe,
428 struct pipe_query *query, boolean wait,
429 enum pipe_query_value_type result_type, int index,
430 struct pipe_resource *resource, unsigned offset)
431 {
432 struct threaded_context *tc = threaded_context(_pipe);
433 struct tc_query_result_resource *p =
434 tc_add_struct_typed_call(tc, TC_CALL_get_query_result_resource,
435 tc_query_result_resource);
436
437 p->query = query;
438 p->wait = wait;
439 p->result_type = result_type;
440 p->index = index;
441 tc_set_resource_reference(&p->resource, resource);
442 p->offset = offset;
443 }
444
445 struct tc_render_condition {
446 struct pipe_query *query;
447 bool condition;
448 unsigned mode;
449 };
450
451 static void
452 tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload)
453 {
454 struct tc_render_condition *p = (struct tc_render_condition *)payload;
455 pipe->render_condition(pipe, p->query, p->condition, p->mode);
456 }
457
458 static void
459 tc_render_condition(struct pipe_context *_pipe,
460 struct pipe_query *query, boolean condition,
461 enum pipe_render_cond_flag mode)
462 {
463 struct threaded_context *tc = threaded_context(_pipe);
464 struct tc_render_condition *p =
465 tc_add_struct_typed_call(tc, TC_CALL_render_condition, tc_render_condition);
466
467 p->query = query;
468 p->condition = condition;
469 p->mode = mode;
470 }
471
472
473 /********************************************************************
474 * constant (immutable) states
475 */
476
477 #define TC_CSO_CREATE(name, sname) \
478 static void * \
479 tc_create_##name##_state(struct pipe_context *_pipe, \
480 const struct pipe_##sname##_state *state) \
481 { \
482 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
483 return pipe->create_##name##_state(pipe, state); \
484 }
485
486 #define TC_CSO_BIND(name) TC_FUNC1(bind_##name##_state, cso, , void *, , *)
487 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, cso, , void *, , *)
488
489 #define TC_CSO_WHOLE2(name, sname) \
490 TC_CSO_CREATE(name, sname) \
491 TC_CSO_BIND(name) \
492 TC_CSO_DELETE(name)
493
494 #define TC_CSO_WHOLE(name) TC_CSO_WHOLE2(name, name)
495
496 TC_CSO_WHOLE(blend)
497 TC_CSO_WHOLE(rasterizer)
498 TC_CSO_WHOLE(depth_stencil_alpha)
499 TC_CSO_WHOLE(compute)
500 TC_CSO_WHOLE2(fs, shader)
501 TC_CSO_WHOLE2(vs, shader)
502 TC_CSO_WHOLE2(gs, shader)
503 TC_CSO_WHOLE2(tcs, shader)
504 TC_CSO_WHOLE2(tes, shader)
505 TC_CSO_CREATE(sampler, sampler)
506 TC_CSO_DELETE(sampler)
507 TC_CSO_BIND(vertex_elements)
508 TC_CSO_DELETE(vertex_elements)
509
510 static void *
511 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
512 const struct pipe_vertex_element *elems)
513 {
514 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
515
516 return pipe->create_vertex_elements_state(pipe, count, elems);
517 }
518
519 struct tc_sampler_states {
520 ubyte shader, start, count;
521 void *slot[0]; /* more will be allocated if needed */
522 };
523
524 static void
525 tc_call_bind_sampler_states(struct pipe_context *pipe, union tc_payload *payload)
526 {
527 struct tc_sampler_states *p = (struct tc_sampler_states *)payload;
528 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
529 }
530
531 static void
532 tc_bind_sampler_states(struct pipe_context *_pipe,
533 enum pipe_shader_type shader,
534 unsigned start, unsigned count, void **states)
535 {
536 if (!count)
537 return;
538
539 struct threaded_context *tc = threaded_context(_pipe);
540 struct tc_sampler_states *p =
541 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
542
543 p->shader = shader;
544 p->start = start;
545 p->count = count;
546 memcpy(p->slot, states, count * sizeof(states[0]));
547 }
548
549
550 /********************************************************************
551 * immediate states
552 */
553
554 static void
555 tc_call_set_framebuffer_state(struct pipe_context *pipe, union tc_payload *payload)
556 {
557 struct pipe_framebuffer_state *p = (struct pipe_framebuffer_state *)payload;
558
559 pipe->set_framebuffer_state(pipe, p);
560
561 unsigned nr_cbufs = p->nr_cbufs;
562 for (unsigned i = 0; i < nr_cbufs; i++)
563 pipe_surface_reference(&p->cbufs[i], NULL);
564 pipe_surface_reference(&p->zsbuf, NULL);
565 }
566
567 static void
568 tc_set_framebuffer_state(struct pipe_context *_pipe,
569 const struct pipe_framebuffer_state *fb)
570 {
571 struct threaded_context *tc = threaded_context(_pipe);
572 struct pipe_framebuffer_state *p =
573 tc_add_struct_typed_call(tc, TC_CALL_set_framebuffer_state,
574 pipe_framebuffer_state);
575 unsigned nr_cbufs = fb->nr_cbufs;
576
577 p->width = fb->width;
578 p->height = fb->height;
579 p->samples = fb->samples;
580 p->layers = fb->layers;
581 p->nr_cbufs = nr_cbufs;
582
583 for (unsigned i = 0; i < nr_cbufs; i++) {
584 p->cbufs[i] = NULL;
585 pipe_surface_reference(&p->cbufs[i], fb->cbufs[i]);
586 }
587 p->zsbuf = NULL;
588 pipe_surface_reference(&p->zsbuf, fb->zsbuf);
589 }
590
591 static void
592 tc_call_set_tess_state(struct pipe_context *pipe, union tc_payload *payload)
593 {
594 float *p = (float*)payload;
595 pipe->set_tess_state(pipe, p, p + 4);
596 }
597
598 static void
599 tc_set_tess_state(struct pipe_context *_pipe,
600 const float default_outer_level[4],
601 const float default_inner_level[2])
602 {
603 struct threaded_context *tc = threaded_context(_pipe);
604 float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state,
605 sizeof(float) * 6);
606
607 memcpy(p, default_outer_level, 4 * sizeof(float));
608 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
609 }
610
611 struct tc_constant_buffer {
612 ubyte shader, index;
613 struct pipe_constant_buffer cb;
614 };
615
616 static void
617 tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload)
618 {
619 struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload;
620
621 pipe->set_constant_buffer(pipe,
622 p->shader,
623 p->index,
624 &p->cb);
625 pipe_resource_reference(&p->cb.buffer, NULL);
626 }
627
628 static void
629 tc_set_constant_buffer(struct pipe_context *_pipe,
630 enum pipe_shader_type shader, uint index,
631 const struct pipe_constant_buffer *cb)
632 {
633 struct threaded_context *tc = threaded_context(_pipe);
634 struct pipe_resource *buffer = NULL;
635 unsigned offset;
636
637 /* This must be done before adding set_constant_buffer, because it could
638 * generate e.g. transfer_unmap and flush partially-uninitialized
639 * set_constant_buffer to the driver if it was done afterwards.
640 */
641 if (cb && cb->user_buffer) {
642 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64,
643 cb->user_buffer, &offset, &buffer);
644 }
645
646 struct tc_constant_buffer *p =
647 tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer,
648 tc_constant_buffer);
649 p->shader = shader;
650 p->index = index;
651
652 if (cb) {
653 if (cb->user_buffer) {
654 p->cb.buffer_size = cb->buffer_size;
655 p->cb.user_buffer = NULL;
656 p->cb.buffer_offset = offset;
657 p->cb.buffer = buffer;
658 } else {
659 tc_set_resource_reference(&p->cb.buffer,
660 cb->buffer);
661 memcpy(&p->cb, cb, sizeof(*cb));
662 }
663 } else {
664 memset(&p->cb, 0, sizeof(*cb));
665 }
666 }
667
668 struct tc_scissors {
669 ubyte start, count;
670 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
671 };
672
673 static void
674 tc_call_set_scissor_states(struct pipe_context *pipe, union tc_payload *payload)
675 {
676 struct tc_scissors *p = (struct tc_scissors *)payload;
677 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
678 }
679
680 static void
681 tc_set_scissor_states(struct pipe_context *_pipe,
682 unsigned start, unsigned count,
683 const struct pipe_scissor_state *states)
684 {
685 struct threaded_context *tc = threaded_context(_pipe);
686 struct tc_scissors *p =
687 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
688
689 p->start = start;
690 p->count = count;
691 memcpy(&p->slot, states, count * sizeof(states[0]));
692 }
693
694 struct tc_viewports {
695 ubyte start, count;
696 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
697 };
698
699 static void
700 tc_call_set_viewport_states(struct pipe_context *pipe, union tc_payload *payload)
701 {
702 struct tc_viewports *p = (struct tc_viewports *)payload;
703 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
704 }
705
706 static void
707 tc_set_viewport_states(struct pipe_context *_pipe,
708 unsigned start, unsigned count,
709 const struct pipe_viewport_state *states)
710 {
711 if (!count)
712 return;
713
714 struct threaded_context *tc = threaded_context(_pipe);
715 struct tc_viewports *p =
716 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
717
718 p->start = start;
719 p->count = count;
720 memcpy(&p->slot, states, count * sizeof(states[0]));
721 }
722
723 struct tc_window_rects {
724 bool include;
725 ubyte count;
726 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
727 };
728
729 static void
730 tc_call_set_window_rectangles(struct pipe_context *pipe,
731 union tc_payload *payload)
732 {
733 struct tc_window_rects *p = (struct tc_window_rects *)payload;
734 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
735 }
736
737 static void
738 tc_set_window_rectangles(struct pipe_context *_pipe, boolean include,
739 unsigned count,
740 const struct pipe_scissor_state *rects)
741 {
742 struct threaded_context *tc = threaded_context(_pipe);
743 struct tc_window_rects *p =
744 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
745
746 p->include = include;
747 p->count = count;
748 memcpy(p->slot, rects, count * sizeof(rects[0]));
749 }
750
751 struct tc_sampler_views {
752 ubyte shader, start, count;
753 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
754 };
755
756 static void
757 tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload)
758 {
759 struct tc_sampler_views *p = (struct tc_sampler_views *)payload;
760 unsigned count = p->count;
761
762 pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot);
763 for (unsigned i = 0; i < count; i++)
764 pipe_sampler_view_reference(&p->slot[i], NULL);
765 }
766
767 static void
768 tc_set_sampler_views(struct pipe_context *_pipe,
769 enum pipe_shader_type shader,
770 unsigned start, unsigned count,
771 struct pipe_sampler_view **views)
772 {
773 if (!count)
774 return;
775
776 struct threaded_context *tc = threaded_context(_pipe);
777 struct tc_sampler_views *p =
778 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, count);
779
780 p->shader = shader;
781 p->start = start;
782 p->count = count;
783
784 if (views) {
785 for (unsigned i = 0; i < count; i++) {
786 p->slot[i] = NULL;
787 pipe_sampler_view_reference(&p->slot[i], views[i]);
788 }
789 } else {
790 memset(p->slot, 0, count * sizeof(views[0]));
791 }
792 }
793
794 struct tc_shader_images {
795 ubyte shader, start, count;
796 bool unbind;
797 struct pipe_image_view slot[0]; /* more will be allocated if needed */
798 };
799
800 static void
801 tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload)
802 {
803 struct tc_shader_images *p = (struct tc_shader_images *)payload;
804 unsigned count = p->count;
805
806 if (p->unbind) {
807 pipe->set_shader_images(pipe, p->shader, p->start, p->count, NULL);
808 return;
809 }
810
811 pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot);
812
813 for (unsigned i = 0; i < count; i++)
814 pipe_resource_reference(&p->slot[i].resource, NULL);
815 }
816
817 static void
818 tc_set_shader_images(struct pipe_context *_pipe,
819 enum pipe_shader_type shader,
820 unsigned start, unsigned count,
821 const struct pipe_image_view *images)
822 {
823 if (!count)
824 return;
825
826 struct threaded_context *tc = threaded_context(_pipe);
827 struct tc_shader_images *p =
828 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
829 images ? count : 0);
830
831 p->shader = shader;
832 p->start = start;
833 p->count = count;
834 p->unbind = images == NULL;
835
836 if (images) {
837 for (unsigned i = 0; i < count; i++) {
838 tc_set_resource_reference(&p->slot[i].resource, images[i].resource);
839
840 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
841 images[i].resource &&
842 images[i].resource->target == PIPE_BUFFER) {
843 struct threaded_resource *tres =
844 threaded_resource(images[i].resource);
845
846 util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
847 images[i].u.buf.offset + images[i].u.buf.size);
848 }
849 }
850 memcpy(p->slot, images, count * sizeof(images[0]));
851 }
852 }
853
854 struct tc_shader_buffers {
855 ubyte shader, start, count;
856 bool unbind;
857 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
858 };
859
860 static void
861 tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload)
862 {
863 struct tc_shader_buffers *p = (struct tc_shader_buffers *)payload;
864 unsigned count = p->count;
865
866 if (p->unbind) {
867 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL);
868 return;
869 }
870
871 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot);
872
873 for (unsigned i = 0; i < count; i++)
874 pipe_resource_reference(&p->slot[i].buffer, NULL);
875 }
876
877 static void
878 tc_set_shader_buffers(struct pipe_context *_pipe,
879 enum pipe_shader_type shader,
880 unsigned start, unsigned count,
881 const struct pipe_shader_buffer *buffers)
882 {
883 if (!count)
884 return;
885
886 struct threaded_context *tc = threaded_context(_pipe);
887 struct tc_shader_buffers *p =
888 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
889 buffers ? count : 0);
890
891 p->shader = shader;
892 p->start = start;
893 p->count = count;
894 p->unbind = buffers == NULL;
895
896 if (buffers) {
897 for (unsigned i = 0; i < count; i++) {
898 struct pipe_shader_buffer *dst = &p->slot[i];
899 const struct pipe_shader_buffer *src = buffers + i;
900
901 tc_set_resource_reference(&dst->buffer, src->buffer);
902 dst->buffer_offset = src->buffer_offset;
903 dst->buffer_size = src->buffer_size;
904
905 if (src->buffer) {
906 struct threaded_resource *tres = threaded_resource(src->buffer);
907
908 util_range_add(&tres->valid_buffer_range, src->buffer_offset,
909 src->buffer_offset + src->buffer_size);
910 }
911 }
912 }
913 }
914
915 struct tc_vertex_buffers {
916 ubyte start, count;
917 bool unbind;
918 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
919 };
920
921 static void
922 tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload)
923 {
924 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload;
925 unsigned count = p->count;
926
927 if (p->unbind) {
928 pipe->set_vertex_buffers(pipe, p->start, count, NULL);
929 return;
930 }
931
932 for (unsigned i = 0; i < count; i++)
933 tc_assert(!p->slot[i].is_user_buffer);
934
935 pipe->set_vertex_buffers(pipe, p->start, count, p->slot);
936 for (unsigned i = 0; i < count; i++)
937 pipe_resource_reference(&p->slot[i].buffer.resource, NULL);
938 }
939
940 static void
941 tc_set_vertex_buffers(struct pipe_context *_pipe,
942 unsigned start, unsigned count,
943 const struct pipe_vertex_buffer *buffers)
944 {
945 struct threaded_context *tc = threaded_context(_pipe);
946
947 if (!count)
948 return;
949
950 if (buffers) {
951 struct tc_vertex_buffers *p =
952 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
953 p->start = start;
954 p->count = count;
955 p->unbind = false;
956
957 for (unsigned i = 0; i < count; i++) {
958 struct pipe_vertex_buffer *dst = &p->slot[i];
959 const struct pipe_vertex_buffer *src = buffers + i;
960
961 tc_assert(!src->is_user_buffer);
962 dst->stride = src->stride;
963 dst->is_user_buffer = false;
964 tc_set_resource_reference(&dst->buffer.resource,
965 src->buffer.resource);
966 dst->buffer_offset = src->buffer_offset;
967 }
968 } else {
969 struct tc_vertex_buffers *p =
970 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
971 p->start = start;
972 p->count = count;
973 p->unbind = true;
974 }
975 }
976
977 struct tc_stream_outputs {
978 unsigned count;
979 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
980 unsigned offsets[PIPE_MAX_SO_BUFFERS];
981 };
982
983 static void
984 tc_call_set_stream_output_targets(struct pipe_context *pipe, union tc_payload *payload)
985 {
986 struct tc_stream_outputs *p = (struct tc_stream_outputs *)payload;
987 unsigned count = p->count;
988
989 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
990 for (unsigned i = 0; i < count; i++)
991 pipe_so_target_reference(&p->targets[i], NULL);
992 }
993
994 static void
995 tc_set_stream_output_targets(struct pipe_context *_pipe,
996 unsigned count,
997 struct pipe_stream_output_target **tgs,
998 const unsigned *offsets)
999 {
1000 struct threaded_context *tc = threaded_context(_pipe);
1001 struct tc_stream_outputs *p =
1002 tc_add_struct_typed_call(tc, TC_CALL_set_stream_output_targets,
1003 tc_stream_outputs);
1004
1005 for (unsigned i = 0; i < count; i++) {
1006 p->targets[i] = NULL;
1007 pipe_so_target_reference(&p->targets[i], tgs[i]);
1008 }
1009 p->count = count;
1010 memcpy(p->offsets, offsets, count * sizeof(unsigned));
1011 }
1012
1013 static void
1014 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1015 unsigned count, struct pipe_surface **resources)
1016 {
1017 struct threaded_context *tc = threaded_context(_pipe);
1018 struct pipe_context *pipe = tc->pipe;
1019
1020 tc_sync(tc);
1021 pipe->set_compute_resources(pipe, start, count, resources);
1022 }
1023
1024 static void
1025 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1026 unsigned count, struct pipe_resource **resources,
1027 uint32_t **handles)
1028 {
1029 struct threaded_context *tc = threaded_context(_pipe);
1030 struct pipe_context *pipe = tc->pipe;
1031
1032 tc_sync(tc);
1033 pipe->set_global_binding(pipe, first, count, resources, handles);
1034 }
1035
1036
1037 /********************************************************************
1038 * views
1039 */
1040
1041 static struct pipe_surface *
1042 tc_create_surface(struct pipe_context *_pipe,
1043 struct pipe_resource *resource,
1044 const struct pipe_surface *surf_tmpl)
1045 {
1046 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1047 struct pipe_surface *view =
1048 pipe->create_surface(pipe, resource, surf_tmpl);
1049
1050 if (view)
1051 view->context = _pipe;
1052 return view;
1053 }
1054
1055 static void
1056 tc_surface_destroy(struct pipe_context *_pipe,
1057 struct pipe_surface *surf)
1058 {
1059 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1060
1061 pipe->surface_destroy(pipe, surf);
1062 }
1063
1064 static struct pipe_sampler_view *
1065 tc_create_sampler_view(struct pipe_context *_pipe,
1066 struct pipe_resource *resource,
1067 const struct pipe_sampler_view *templ)
1068 {
1069 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1070 struct pipe_sampler_view *view =
1071 pipe->create_sampler_view(pipe, resource, templ);
1072
1073 if (view)
1074 view->context = _pipe;
1075 return view;
1076 }
1077
1078 static void
1079 tc_sampler_view_destroy(struct pipe_context *_pipe,
1080 struct pipe_sampler_view *view)
1081 {
1082 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1083
1084 pipe->sampler_view_destroy(pipe, view);
1085 }
1086
1087 static struct pipe_stream_output_target *
1088 tc_create_stream_output_target(struct pipe_context *_pipe,
1089 struct pipe_resource *res,
1090 unsigned buffer_offset,
1091 unsigned buffer_size)
1092 {
1093 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1094 struct threaded_resource *tres = threaded_resource(res);
1095 struct pipe_stream_output_target *view;
1096
1097 tc_sync(threaded_context(_pipe));
1098 util_range_add(&tres->valid_buffer_range, buffer_offset,
1099 buffer_offset + buffer_size);
1100
1101 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1102 buffer_size);
1103 if (view)
1104 view->context = _pipe;
1105 return view;
1106 }
1107
1108 static void
1109 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1110 struct pipe_stream_output_target *target)
1111 {
1112 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1113
1114 pipe->stream_output_target_destroy(pipe, target);
1115 }
1116
1117
1118 /********************************************************************
1119 * bindless
1120 */
1121
1122 static uint64_t
1123 tc_create_texture_handle(struct pipe_context *_pipe,
1124 struct pipe_sampler_view *view,
1125 const struct pipe_sampler_state *state)
1126 {
1127 struct threaded_context *tc = threaded_context(_pipe);
1128 struct pipe_context *pipe = tc->pipe;
1129
1130 tc_sync(tc);
1131 return pipe->create_texture_handle(pipe, view, state);
1132 }
1133
1134 static void
1135 tc_call_delete_texture_handle(struct pipe_context *pipe,
1136 union tc_payload *payload)
1137 {
1138 pipe->delete_texture_handle(pipe, payload->handle);
1139 }
1140
1141 static void
1142 tc_delete_texture_handle(struct pipe_context *_pipe, uint64_t handle)
1143 {
1144 struct threaded_context *tc = threaded_context(_pipe);
1145 union tc_payload *payload =
1146 tc_add_small_call(tc, TC_CALL_delete_texture_handle);
1147
1148 payload->handle = handle;
1149 }
1150
1151 struct tc_make_texture_handle_resident
1152 {
1153 uint64_t handle;
1154 bool resident;
1155 };
1156
1157 static void
1158 tc_call_make_texture_handle_resident(struct pipe_context *pipe,
1159 union tc_payload *payload)
1160 {
1161 struct tc_make_texture_handle_resident *p =
1162 (struct tc_make_texture_handle_resident *)payload;
1163
1164 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1165 }
1166
1167 static void
1168 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1169 bool resident)
1170 {
1171 struct threaded_context *tc = threaded_context(_pipe);
1172 struct tc_make_texture_handle_resident *p =
1173 tc_add_struct_typed_call(tc, TC_CALL_make_texture_handle_resident,
1174 tc_make_texture_handle_resident);
1175
1176 p->handle = handle;
1177 p->resident = resident;
1178 }
1179
1180 static uint64_t
1181 tc_create_image_handle(struct pipe_context *_pipe,
1182 const struct pipe_image_view *image)
1183 {
1184 struct threaded_context *tc = threaded_context(_pipe);
1185 struct pipe_context *pipe = tc->pipe;
1186
1187 tc_sync(tc);
1188 return pipe->create_image_handle(pipe, image);
1189 }
1190
1191 static void
1192 tc_call_delete_image_handle(struct pipe_context *pipe,
1193 union tc_payload *payload)
1194 {
1195 pipe->delete_image_handle(pipe, payload->handle);
1196 }
1197
1198 static void
1199 tc_delete_image_handle(struct pipe_context *_pipe, uint64_t handle)
1200 {
1201 struct threaded_context *tc = threaded_context(_pipe);
1202 union tc_payload *payload =
1203 tc_add_small_call(tc, TC_CALL_delete_image_handle);
1204
1205 payload->handle = handle;
1206 }
1207
1208 struct tc_make_image_handle_resident
1209 {
1210 uint64_t handle;
1211 unsigned access;
1212 bool resident;
1213 };
1214
1215 static void
1216 tc_call_make_image_handle_resident(struct pipe_context *pipe,
1217 union tc_payload *payload)
1218 {
1219 struct tc_make_image_handle_resident *p =
1220 (struct tc_make_image_handle_resident *)payload;
1221
1222 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1223 }
1224
1225 static void
1226 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1227 unsigned access, bool resident)
1228 {
1229 struct threaded_context *tc = threaded_context(_pipe);
1230 struct tc_make_image_handle_resident *p =
1231 tc_add_struct_typed_call(tc, TC_CALL_make_image_handle_resident,
1232 tc_make_image_handle_resident);
1233
1234 p->handle = handle;
1235 p->access = access;
1236 p->resident = resident;
1237 }
1238
1239
1240 /********************************************************************
1241 * transfer
1242 */
1243
1244 struct tc_replace_buffer_storage {
1245 struct pipe_resource *dst;
1246 struct pipe_resource *src;
1247 tc_replace_buffer_storage_func func;
1248 };
1249
1250 static void
1251 tc_call_replace_buffer_storage(struct pipe_context *pipe,
1252 union tc_payload *payload)
1253 {
1254 struct tc_replace_buffer_storage *p =
1255 (struct tc_replace_buffer_storage *)payload;
1256
1257 p->func(pipe, p->dst, p->src);
1258 pipe_resource_reference(&p->dst, NULL);
1259 pipe_resource_reference(&p->src, NULL);
1260 }
1261
1262 static bool
1263 tc_invalidate_buffer(struct threaded_context *tc,
1264 struct threaded_resource *tbuf)
1265 {
1266 /* We can't check if the buffer is idle, so we invalidate it
1267 * unconditionally. */
1268 struct pipe_screen *screen = tc->base.screen;
1269 struct pipe_resource *new_buf;
1270
1271 /* Shared, pinned, and sparse buffers can't be reallocated. */
1272 if (tbuf->is_shared ||
1273 tbuf->is_user_ptr ||
1274 tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1275 return false;
1276
1277 /* Allocate a new one. */
1278 new_buf = screen->resource_create(screen, &tbuf->b);
1279 if (!new_buf)
1280 return false;
1281
1282 /* Replace the "latest" pointer. */
1283 if (tbuf->latest != &tbuf->b)
1284 pipe_resource_reference(&tbuf->latest, NULL);
1285
1286 tbuf->latest = new_buf;
1287 util_range_set_empty(&tbuf->valid_buffer_range);
1288
1289 /* The valid range should point to the original buffer. */
1290 threaded_resource(new_buf)->base_valid_buffer_range =
1291 &tbuf->valid_buffer_range;
1292
1293 /* Enqueue storage replacement of the original buffer. */
1294 struct tc_replace_buffer_storage *p =
1295 tc_add_struct_typed_call(tc, TC_CALL_replace_buffer_storage,
1296 tc_replace_buffer_storage);
1297
1298 p->func = tc->replace_buffer_storage;
1299 tc_set_resource_reference(&p->dst, &tbuf->b);
1300 tc_set_resource_reference(&p->src, new_buf);
1301 return true;
1302 }
1303
1304 static unsigned
1305 tc_improve_map_buffer_flags(struct threaded_context *tc,
1306 struct threaded_resource *tres, unsigned usage,
1307 unsigned offset, unsigned size)
1308 {
1309 /* Never invalidate inside the driver and never infer "unsynchronized". */
1310 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
1311 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
1312
1313 /* Prevent a reentry. */
1314 if (usage & tc_flags)
1315 return usage;
1316
1317 /* Use the staging upload if it's preferred. */
1318 if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
1319 PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1320 !(usage & PIPE_TRANSFER_PERSISTENT) &&
1321 /* Try not to decrement the counter if it's not positive. Still racy,
1322 * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
1323 tres->max_forced_staging_uploads > 0 &&
1324 p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
1325 usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
1326 PIPE_TRANSFER_UNSYNCHRONIZED);
1327
1328 return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE;
1329 }
1330
1331 /* Sparse buffers can't be mapped directly and can't be reallocated
1332 * (fully invalidated). That may just be a radeonsi limitation, but
1333 * the threaded context must obey it with radeonsi.
1334 */
1335 if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
1336 /* We can use DISCARD_RANGE instead of full discard. This is the only
1337 * fast path for sparse buffers that doesn't need thread synchronization.
1338 */
1339 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
1340 usage |= PIPE_TRANSFER_DISCARD_RANGE;
1341
1342 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
1343 * The threaded context doesn't do unsychronized mappings and invalida-
1344 * tions of sparse buffers, therefore a correct driver behavior won't
1345 * result in an incorrect behavior with the threaded context.
1346 */
1347 return usage;
1348 }
1349
1350 usage |= tc_flags;
1351
1352 /* Handle CPU reads trivially. */
1353 if (usage & PIPE_TRANSFER_READ) {
1354 /* Drivers aren't allowed to do buffer invalidations. */
1355 return usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1356 }
1357
1358 /* See if the buffer range being mapped has never been initialized,
1359 * in which case it can be mapped unsynchronized. */
1360 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
1361 !tres->is_shared &&
1362 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size))
1363 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1364
1365 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1366 /* If discarding the entire range, discard the whole resource instead. */
1367 if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
1368 offset == 0 && size == tres->b.width0)
1369 usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1370
1371 /* Discard the whole resource if needed. */
1372 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
1373 if (tc_invalidate_buffer(tc, tres))
1374 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1375 else
1376 usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */
1377 }
1378 }
1379
1380 /* We won't need this flag anymore. */
1381 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
1382 usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1383
1384 /* GL_AMD_pinned_memory and persistent mappings can't use staging
1385 * buffers. */
1386 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1387 PIPE_TRANSFER_PERSISTENT) ||
1388 tres->is_user_ptr)
1389 usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
1390
1391 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1392 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
1393 usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
1394 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
1395 }
1396
1397 return usage;
1398 }
1399
1400 static void *
1401 tc_transfer_map(struct pipe_context *_pipe,
1402 struct pipe_resource *resource, unsigned level,
1403 unsigned usage, const struct pipe_box *box,
1404 struct pipe_transfer **transfer)
1405 {
1406 struct threaded_context *tc = threaded_context(_pipe);
1407 struct threaded_resource *tres = threaded_resource(resource);
1408 struct pipe_context *pipe = tc->pipe;
1409
1410 if (resource->target == PIPE_BUFFER) {
1411 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
1412
1413 /* Do a staging transfer within the threaded context. The driver should
1414 * only get resource_copy_region.
1415 */
1416 if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
1417 struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
1418 uint8_t *map;
1419
1420 ttrans->staging = NULL;
1421
1422 u_upload_alloc(tc->base.stream_uploader, 0,
1423 box->width + (box->x % tc->map_buffer_alignment),
1424 64, &ttrans->offset, &ttrans->staging, (void**)&map);
1425 if (!map) {
1426 slab_free(&tc->pool_transfers, ttrans);
1427 return NULL;
1428 }
1429
1430 tc_set_resource_reference(&ttrans->b.resource, resource);
1431 ttrans->b.level = 0;
1432 ttrans->b.usage = usage;
1433 ttrans->b.box = *box;
1434 ttrans->b.stride = 0;
1435 ttrans->b.layer_stride = 0;
1436 *transfer = &ttrans->b;
1437 return map + (box->x % tc->map_buffer_alignment);
1438 }
1439 }
1440
1441 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1442 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
1443 tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
1444 usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" :
1445 usage & PIPE_TRANSFER_READ ? " read" : " ??");
1446
1447 return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource,
1448 level, usage, box, transfer);
1449 }
1450
1451 struct tc_transfer_flush_region {
1452 struct pipe_transfer *transfer;
1453 struct pipe_box box;
1454 };
1455
1456 static void
1457 tc_call_transfer_flush_region(struct pipe_context *pipe,
1458 union tc_payload *payload)
1459 {
1460 struct tc_transfer_flush_region *p =
1461 (struct tc_transfer_flush_region *)payload;
1462
1463 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
1464 }
1465
1466 struct tc_resource_copy_region {
1467 struct pipe_resource *dst;
1468 unsigned dst_level;
1469 unsigned dstx, dsty, dstz;
1470 struct pipe_resource *src;
1471 unsigned src_level;
1472 struct pipe_box src_box;
1473 };
1474
1475 static void
1476 tc_resource_copy_region(struct pipe_context *_pipe,
1477 struct pipe_resource *dst, unsigned dst_level,
1478 unsigned dstx, unsigned dsty, unsigned dstz,
1479 struct pipe_resource *src, unsigned src_level,
1480 const struct pipe_box *src_box);
1481
1482 static void
1483 tc_buffer_do_flush_region(struct threaded_context *tc,
1484 struct threaded_transfer *ttrans,
1485 const struct pipe_box *box)
1486 {
1487 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
1488
1489 if (ttrans->staging) {
1490 struct pipe_box src_box;
1491
1492 u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
1493 box->width, &src_box);
1494
1495 /* Copy the staging buffer into the original one. */
1496 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
1497 ttrans->staging, 0, &src_box);
1498 }
1499
1500 util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
1501 }
1502
1503 static void
1504 tc_transfer_flush_region(struct pipe_context *_pipe,
1505 struct pipe_transfer *transfer,
1506 const struct pipe_box *rel_box)
1507 {
1508 struct threaded_context *tc = threaded_context(_pipe);
1509 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1510 struct threaded_resource *tres = threaded_resource(transfer->resource);
1511 unsigned required_usage = PIPE_TRANSFER_WRITE |
1512 PIPE_TRANSFER_FLUSH_EXPLICIT;
1513
1514 if (tres->b.target == PIPE_BUFFER) {
1515 if ((transfer->usage & required_usage) == required_usage) {
1516 struct pipe_box box;
1517
1518 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
1519 tc_buffer_do_flush_region(tc, ttrans, &box);
1520 }
1521
1522 /* Staging transfers don't send the call to the driver. */
1523 if (ttrans->staging)
1524 return;
1525 }
1526
1527 struct tc_transfer_flush_region *p =
1528 tc_add_struct_typed_call(tc, TC_CALL_transfer_flush_region,
1529 tc_transfer_flush_region);
1530 p->transfer = transfer;
1531 p->box = *rel_box;
1532 }
1533
1534 static void
1535 tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
1536 {
1537 pipe->transfer_unmap(pipe, payload->transfer);
1538 }
1539
1540 static void
1541 tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
1542 {
1543 struct threaded_context *tc = threaded_context(_pipe);
1544 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1545 struct threaded_resource *tres = threaded_resource(transfer->resource);
1546
1547 if (tres->b.target == PIPE_BUFFER) {
1548 if (transfer->usage & PIPE_TRANSFER_WRITE &&
1549 !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1550 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
1551
1552 /* Staging transfers don't send the call to the driver. */
1553 if (ttrans->staging) {
1554 pipe_resource_reference(&ttrans->staging, NULL);
1555 pipe_resource_reference(&ttrans->b.resource, NULL);
1556 slab_free(&tc->pool_transfers, ttrans);
1557 return;
1558 }
1559 }
1560
1561 tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
1562 }
1563
1564 struct tc_buffer_subdata {
1565 struct pipe_resource *resource;
1566 unsigned usage, offset, size;
1567 char slot[0]; /* more will be allocated if needed */
1568 };
1569
1570 static void
1571 tc_call_buffer_subdata(struct pipe_context *pipe, union tc_payload *payload)
1572 {
1573 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)payload;
1574
1575 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
1576 p->slot);
1577 pipe_resource_reference(&p->resource, NULL);
1578 }
1579
1580 static void
1581 tc_buffer_subdata(struct pipe_context *_pipe,
1582 struct pipe_resource *resource,
1583 unsigned usage, unsigned offset,
1584 unsigned size, const void *data)
1585 {
1586 struct threaded_context *tc = threaded_context(_pipe);
1587 struct threaded_resource *tres = threaded_resource(resource);
1588
1589 if (!size)
1590 return;
1591
1592 usage |= PIPE_TRANSFER_WRITE |
1593 PIPE_TRANSFER_DISCARD_RANGE;
1594
1595 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
1596
1597 /* Unsychronized and big transfers should use transfer_map. Also handle
1598 * full invalidations, because drivers aren't allowed to do them.
1599 */
1600 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1601 PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) ||
1602 size > TC_MAX_SUBDATA_BYTES) {
1603 struct pipe_transfer *transfer;
1604 struct pipe_box box;
1605 uint8_t *map = NULL;
1606
1607 u_box_1d(offset, size, &box);
1608
1609 map = tc_transfer_map(_pipe, resource, 0, usage, &box, &transfer);
1610 if (map) {
1611 memcpy(map, data, size);
1612 tc_transfer_unmap(_pipe, transfer);
1613 }
1614 return;
1615 }
1616
1617 util_range_add(&tres->valid_buffer_range, offset, offset + size);
1618
1619 /* The upload is small. Enqueue it. */
1620 struct tc_buffer_subdata *p =
1621 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
1622
1623 tc_set_resource_reference(&p->resource, resource);
1624 p->usage = usage;
1625 p->offset = offset;
1626 p->size = size;
1627 memcpy(p->slot, data, size);
1628 }
1629
1630 struct tc_texture_subdata {
1631 struct pipe_resource *resource;
1632 unsigned level, usage, stride, layer_stride;
1633 struct pipe_box box;
1634 char slot[0]; /* more will be allocated if needed */
1635 };
1636
1637 static void
1638 tc_call_texture_subdata(struct pipe_context *pipe, union tc_payload *payload)
1639 {
1640 struct tc_texture_subdata *p = (struct tc_texture_subdata *)payload;
1641
1642 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
1643 p->slot, p->stride, p->layer_stride);
1644 pipe_resource_reference(&p->resource, NULL);
1645 }
1646
1647 static void
1648 tc_texture_subdata(struct pipe_context *_pipe,
1649 struct pipe_resource *resource,
1650 unsigned level, unsigned usage,
1651 const struct pipe_box *box,
1652 const void *data, unsigned stride,
1653 unsigned layer_stride)
1654 {
1655 struct threaded_context *tc = threaded_context(_pipe);
1656 unsigned size;
1657
1658 assert(box->height >= 1);
1659 assert(box->depth >= 1);
1660
1661 size = (box->depth - 1) * layer_stride +
1662 (box->height - 1) * stride +
1663 box->width * util_format_get_blocksize(resource->format);
1664 if (!size)
1665 return;
1666
1667 /* Small uploads can be enqueued, big uploads must sync. */
1668 if (size <= TC_MAX_SUBDATA_BYTES) {
1669 struct tc_texture_subdata *p =
1670 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
1671
1672 tc_set_resource_reference(&p->resource, resource);
1673 p->level = level;
1674 p->usage = usage;
1675 p->box = *box;
1676 p->stride = stride;
1677 p->layer_stride = layer_stride;
1678 memcpy(p->slot, data, size);
1679 } else {
1680 struct pipe_context *pipe = tc->pipe;
1681
1682 tc_sync(tc);
1683 pipe->texture_subdata(pipe, resource, level, usage, box, data,
1684 stride, layer_stride);
1685 }
1686 }
1687
1688
1689 /********************************************************************
1690 * miscellaneous
1691 */
1692
1693 #define TC_FUNC_SYNC_RET0(ret_type, func) \
1694 static ret_type \
1695 tc_##func(struct pipe_context *_pipe) \
1696 { \
1697 struct threaded_context *tc = threaded_context(_pipe); \
1698 struct pipe_context *pipe = tc->pipe; \
1699 tc_sync(tc); \
1700 return pipe->func(pipe); \
1701 }
1702
1703 TC_FUNC_SYNC_RET0(enum pipe_reset_status, get_device_reset_status)
1704 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
1705
1706 static void
1707 tc_get_sample_position(struct pipe_context *_pipe,
1708 unsigned sample_count, unsigned sample_index,
1709 float *out_value)
1710 {
1711 struct threaded_context *tc = threaded_context(_pipe);
1712 struct pipe_context *pipe = tc->pipe;
1713
1714 tc_sync(tc);
1715 pipe->get_sample_position(pipe, sample_count, sample_index,
1716 out_value);
1717 }
1718
1719 static void
1720 tc_set_device_reset_callback(struct pipe_context *_pipe,
1721 const struct pipe_device_reset_callback *cb)
1722 {
1723 struct threaded_context *tc = threaded_context(_pipe);
1724 struct pipe_context *pipe = tc->pipe;
1725
1726 tc_sync(tc);
1727 pipe->set_device_reset_callback(pipe, cb);
1728 }
1729
1730 struct tc_string_marker {
1731 int len;
1732 char slot[0]; /* more will be allocated if needed */
1733 };
1734
1735 static void
1736 tc_call_emit_string_marker(struct pipe_context *pipe, union tc_payload *payload)
1737 {
1738 struct tc_string_marker *p = (struct tc_string_marker *)payload;
1739 pipe->emit_string_marker(pipe, p->slot, p->len);
1740 }
1741
1742 static void
1743 tc_emit_string_marker(struct pipe_context *_pipe,
1744 const char *string, int len)
1745 {
1746 struct threaded_context *tc = threaded_context(_pipe);
1747
1748 if (len <= TC_MAX_STRING_MARKER_BYTES) {
1749 struct tc_string_marker *p =
1750 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
1751
1752 memcpy(p->slot, string, len);
1753 p->len = len;
1754 } else {
1755 struct pipe_context *pipe = tc->pipe;
1756
1757 tc_sync(tc);
1758 pipe->emit_string_marker(pipe, string, len);
1759 }
1760 }
1761
1762 static void
1763 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
1764 unsigned flags)
1765 {
1766 struct threaded_context *tc = threaded_context(_pipe);
1767 struct pipe_context *pipe = tc->pipe;
1768
1769 tc_sync(tc);
1770 pipe->dump_debug_state(pipe, stream, flags);
1771 }
1772
1773 static void
1774 tc_set_debug_callback(struct pipe_context *_pipe,
1775 const struct pipe_debug_callback *cb)
1776 {
1777 struct threaded_context *tc = threaded_context(_pipe);
1778 struct pipe_context *pipe = tc->pipe;
1779
1780 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
1781 * with this. shader-db will use an environment variable to disable
1782 * the threaded context.
1783 */
1784 if (cb && cb->debug_message && !cb->async)
1785 return;
1786
1787 tc_sync(tc);
1788 pipe->set_debug_callback(pipe, cb);
1789 }
1790
1791 static void
1792 tc_create_fence_fd(struct pipe_context *_pipe,
1793 struct pipe_fence_handle **fence, int fd)
1794 {
1795 struct threaded_context *tc = threaded_context(_pipe);
1796 struct pipe_context *pipe = tc->pipe;
1797
1798 tc_sync(tc);
1799 pipe->create_fence_fd(pipe, fence, fd);
1800 }
1801
1802 static void
1803 tc_fence_server_sync(struct pipe_context *_pipe,
1804 struct pipe_fence_handle *fence)
1805 {
1806 struct threaded_context *tc = threaded_context(_pipe);
1807 struct pipe_context *pipe = tc->pipe;
1808
1809 tc_sync(tc);
1810 pipe->fence_server_sync(pipe, fence);
1811 }
1812
1813 static struct pipe_video_codec *
1814 tc_create_video_codec(struct pipe_context *_pipe,
1815 const struct pipe_video_codec *templ)
1816 {
1817 unreachable("Threaded context should not be enabled for video APIs");
1818 return NULL;
1819 }
1820
1821 static struct pipe_video_buffer *
1822 tc_create_video_buffer(struct pipe_context *_pipe,
1823 const struct pipe_video_buffer *templ)
1824 {
1825 unreachable("Threaded context should not be enabled for video APIs");
1826 return NULL;
1827 }
1828
1829
1830 /********************************************************************
1831 * draw, launch, clear, blit, copy, flush
1832 */
1833
1834 struct tc_flush_payload {
1835 struct pipe_fence_handle *fence;
1836 unsigned flags;
1837 };
1838
1839 static void
1840 tc_call_flush(struct pipe_context *pipe, union tc_payload *payload)
1841 {
1842 struct tc_flush_payload *p = (struct tc_flush_payload *)payload;
1843 struct pipe_screen *screen = pipe->screen;
1844
1845 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
1846 screen->fence_reference(screen, &p->fence, NULL);
1847 }
1848
1849 static void
1850 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
1851 unsigned flags)
1852 {
1853 struct threaded_context *tc = threaded_context(_pipe);
1854 struct pipe_context *pipe = tc->pipe;
1855 struct pipe_screen *screen = pipe->screen;
1856 struct threaded_query *tq, *tmp;
1857 bool async = flags & PIPE_FLUSH_DEFERRED;
1858
1859 if (flags & PIPE_FLUSH_ASYNC) {
1860 struct tc_batch *last = &tc->batch_slots[tc->last];
1861
1862 /* Prefer to do the flush in the driver thread, but avoid the inter-thread
1863 * communication overhead if the driver thread is currently idle and the
1864 * caller is going to wait for the fence immediately anyway.
1865 */
1866 if (!(util_queue_fence_is_signalled(&last->fence) &&
1867 (flags & PIPE_FLUSH_HINT_FINISH)))
1868 async = true;
1869 }
1870
1871 if (async && tc->create_fence) {
1872 if (fence) {
1873 struct tc_unflushed_batch_token *token = NULL;
1874 struct tc_batch *next = &tc->batch_slots[tc->next];
1875
1876 if (!next->token) {
1877 next->token = malloc(sizeof(*next->token));
1878 if (!next->token)
1879 goto out_of_memory;
1880
1881 pipe_reference_init(&next->token->ref, 1);
1882 next->token->tc = tc;
1883 }
1884
1885 screen->fence_reference(screen, fence, tc->create_fence(pipe, token));
1886 if (!*fence)
1887 goto out_of_memory;
1888 }
1889
1890 struct tc_flush_payload *p =
1891 tc_add_struct_typed_call(tc, TC_CALL_flush, tc_flush_payload);
1892 p->fence = fence ? *fence : NULL;
1893 p->flags = flags | TC_FLUSH_ASYNC;
1894
1895 if (!(flags & PIPE_FLUSH_DEFERRED))
1896 tc_batch_flush(tc);
1897 return;
1898 }
1899
1900 out_of_memory:
1901 if (!(flags & PIPE_FLUSH_DEFERRED)) {
1902 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
1903 tq->flushed = true;
1904 LIST_DEL(&tq->head_unflushed);
1905 }
1906 }
1907
1908 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
1909 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
1910 pipe->flush(pipe, fence, flags);
1911 }
1912
1913 /* This is actually variable-sized, because indirect isn't allocated if it's
1914 * not needed. */
1915 struct tc_full_draw_info {
1916 struct pipe_draw_info draw;
1917 struct pipe_draw_indirect_info indirect;
1918 };
1919
1920 static void
1921 tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload)
1922 {
1923 struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload;
1924
1925 pipe->draw_vbo(pipe, &info->draw);
1926 pipe_so_target_reference(&info->draw.count_from_stream_output, NULL);
1927 if (info->draw.index_size)
1928 pipe_resource_reference(&info->draw.index.resource, NULL);
1929 if (info->draw.indirect) {
1930 pipe_resource_reference(&info->indirect.buffer, NULL);
1931 pipe_resource_reference(&info->indirect.indirect_draw_count, NULL);
1932 }
1933 }
1934
1935 static struct tc_full_draw_info *
1936 tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect)
1937 {
1938 return (struct tc_full_draw_info*)
1939 tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo,
1940 indirect ? sizeof(struct tc_full_draw_info) :
1941 sizeof(struct pipe_draw_info));
1942 }
1943
1944 static void
1945 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info)
1946 {
1947 struct threaded_context *tc = threaded_context(_pipe);
1948 struct pipe_draw_indirect_info *indirect = info->indirect;
1949 unsigned index_size = info->index_size;
1950 bool has_user_indices = info->has_user_indices;
1951
1952 if (index_size && has_user_indices) {
1953 unsigned size = info->count * index_size;
1954 struct pipe_resource *buffer = NULL;
1955 unsigned offset;
1956
1957 tc_assert(!indirect);
1958
1959 /* This must be done before adding draw_vbo, because it could generate
1960 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
1961 * to the driver if it was done afterwards.
1962 */
1963 u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user,
1964 &offset, &buffer);
1965 if (unlikely(!buffer))
1966 return;
1967
1968 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false);
1969 p->draw.count_from_stream_output = NULL;
1970 pipe_so_target_reference(&p->draw.count_from_stream_output,
1971 info->count_from_stream_output);
1972 memcpy(&p->draw, info, sizeof(*info));
1973 p->draw.has_user_indices = false;
1974 p->draw.index.resource = buffer;
1975 p->draw.start = offset / index_size;
1976 } else {
1977 /* Non-indexed call or indexed with a real index buffer. */
1978 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL);
1979 p->draw.count_from_stream_output = NULL;
1980 pipe_so_target_reference(&p->draw.count_from_stream_output,
1981 info->count_from_stream_output);
1982 if (index_size) {
1983 tc_set_resource_reference(&p->draw.index.resource,
1984 info->index.resource);
1985 }
1986 memcpy(&p->draw, info, sizeof(*info));
1987
1988 if (indirect) {
1989 tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer);
1990 tc_set_resource_reference(&p->indirect.indirect_draw_count,
1991 indirect->indirect_draw_count);
1992 memcpy(&p->indirect, indirect, sizeof(*indirect));
1993 p->draw.indirect = &p->indirect;
1994 }
1995 }
1996 }
1997
1998 static void
1999 tc_call_launch_grid(struct pipe_context *pipe, union tc_payload *payload)
2000 {
2001 struct pipe_grid_info *p = (struct pipe_grid_info *)payload;
2002
2003 pipe->launch_grid(pipe, p);
2004 pipe_resource_reference(&p->indirect, NULL);
2005 }
2006
2007 static void
2008 tc_launch_grid(struct pipe_context *_pipe,
2009 const struct pipe_grid_info *info)
2010 {
2011 struct threaded_context *tc = threaded_context(_pipe);
2012 struct pipe_grid_info *p = tc_add_struct_typed_call(tc, TC_CALL_launch_grid,
2013 pipe_grid_info);
2014 assert(info->input == NULL);
2015
2016 tc_set_resource_reference(&p->indirect, info->indirect);
2017 memcpy(p, info, sizeof(*info));
2018 }
2019
2020 static void
2021 tc_call_resource_copy_region(struct pipe_context *pipe, union tc_payload *payload)
2022 {
2023 struct tc_resource_copy_region *p = (struct tc_resource_copy_region *)payload;
2024
2025 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
2026 p->dstz, p->src, p->src_level, &p->src_box);
2027 pipe_resource_reference(&p->dst, NULL);
2028 pipe_resource_reference(&p->src, NULL);
2029 }
2030
2031 static void
2032 tc_resource_copy_region(struct pipe_context *_pipe,
2033 struct pipe_resource *dst, unsigned dst_level,
2034 unsigned dstx, unsigned dsty, unsigned dstz,
2035 struct pipe_resource *src, unsigned src_level,
2036 const struct pipe_box *src_box)
2037 {
2038 struct threaded_context *tc = threaded_context(_pipe);
2039 struct threaded_resource *tdst = threaded_resource(dst);
2040 struct tc_resource_copy_region *p =
2041 tc_add_struct_typed_call(tc, TC_CALL_resource_copy_region,
2042 tc_resource_copy_region);
2043
2044 tc_set_resource_reference(&p->dst, dst);
2045 p->dst_level = dst_level;
2046 p->dstx = dstx;
2047 p->dsty = dsty;
2048 p->dstz = dstz;
2049 tc_set_resource_reference(&p->src, src);
2050 p->src_level = src_level;
2051 p->src_box = *src_box;
2052
2053 if (dst->target == PIPE_BUFFER)
2054 util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
2055 }
2056
2057 static void
2058 tc_call_blit(struct pipe_context *pipe, union tc_payload *payload)
2059 {
2060 struct pipe_blit_info *blit = (struct pipe_blit_info*)payload;
2061
2062 pipe->blit(pipe, blit);
2063 pipe_resource_reference(&blit->dst.resource, NULL);
2064 pipe_resource_reference(&blit->src.resource, NULL);
2065 }
2066
2067 static void
2068 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
2069 {
2070 struct threaded_context *tc = threaded_context(_pipe);
2071 struct pipe_blit_info *blit =
2072 tc_add_struct_typed_call(tc, TC_CALL_blit, pipe_blit_info);
2073
2074 tc_set_resource_reference(&blit->dst.resource, info->dst.resource);
2075 tc_set_resource_reference(&blit->src.resource, info->src.resource);
2076 memcpy(blit, info, sizeof(*info));
2077 }
2078
2079 struct tc_generate_mipmap {
2080 struct pipe_resource *res;
2081 enum pipe_format format;
2082 unsigned base_level;
2083 unsigned last_level;
2084 unsigned first_layer;
2085 unsigned last_layer;
2086 };
2087
2088 static void
2089 tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
2090 {
2091 struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
2092 bool MAYBE_UNUSED result = pipe->generate_mipmap(pipe, p->res, p->format,
2093 p->base_level,
2094 p->last_level,
2095 p->first_layer,
2096 p->last_layer);
2097 assert(result);
2098 pipe_resource_reference(&p->res, NULL);
2099 }
2100
2101 static boolean
2102 tc_generate_mipmap(struct pipe_context *_pipe,
2103 struct pipe_resource *res,
2104 enum pipe_format format,
2105 unsigned base_level,
2106 unsigned last_level,
2107 unsigned first_layer,
2108 unsigned last_layer)
2109 {
2110 struct threaded_context *tc = threaded_context(_pipe);
2111 struct pipe_context *pipe = tc->pipe;
2112 struct pipe_screen *screen = pipe->screen;
2113 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
2114
2115 if (util_format_is_depth_or_stencil(format))
2116 bind = PIPE_BIND_DEPTH_STENCIL;
2117 else
2118 bind = PIPE_BIND_RENDER_TARGET;
2119
2120 if (!screen->is_format_supported(screen, format, res->target,
2121 res->nr_samples, bind))
2122 return false;
2123
2124 struct tc_generate_mipmap *p =
2125 tc_add_struct_typed_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
2126
2127 tc_set_resource_reference(&p->res, res);
2128 p->format = format;
2129 p->base_level = base_level;
2130 p->last_level = last_level;
2131 p->first_layer = first_layer;
2132 p->last_layer = last_layer;
2133 return true;
2134 }
2135
2136 static void
2137 tc_call_flush_resource(struct pipe_context *pipe, union tc_payload *payload)
2138 {
2139 pipe->flush_resource(pipe, payload->resource);
2140 pipe_resource_reference(&payload->resource, NULL);
2141 }
2142
2143 static void
2144 tc_flush_resource(struct pipe_context *_pipe,
2145 struct pipe_resource *resource)
2146 {
2147 struct threaded_context *tc = threaded_context(_pipe);
2148 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_flush_resource);
2149
2150 tc_set_resource_reference(&payload->resource, resource);
2151 }
2152
2153 static void
2154 tc_call_invalidate_resource(struct pipe_context *pipe, union tc_payload *payload)
2155 {
2156 pipe->invalidate_resource(pipe, payload->resource);
2157 pipe_resource_reference(&payload->resource, NULL);
2158 }
2159
2160 static void
2161 tc_invalidate_resource(struct pipe_context *_pipe,
2162 struct pipe_resource *resource)
2163 {
2164 struct threaded_context *tc = threaded_context(_pipe);
2165
2166 if (resource->target == PIPE_BUFFER) {
2167 tc_invalidate_buffer(tc, threaded_resource(resource));
2168 return;
2169 }
2170
2171 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_invalidate_resource);
2172 tc_set_resource_reference(&payload->resource, resource);
2173 }
2174
2175 struct tc_clear {
2176 unsigned buffers;
2177 union pipe_color_union color;
2178 double depth;
2179 unsigned stencil;
2180 };
2181
2182 static void
2183 tc_call_clear(struct pipe_context *pipe, union tc_payload *payload)
2184 {
2185 struct tc_clear *p = (struct tc_clear *)payload;
2186 pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil);
2187 }
2188
2189 static void
2190 tc_clear(struct pipe_context *_pipe, unsigned buffers,
2191 const union pipe_color_union *color, double depth,
2192 unsigned stencil)
2193 {
2194 struct threaded_context *tc = threaded_context(_pipe);
2195 struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear);
2196
2197 p->buffers = buffers;
2198 p->color = *color;
2199 p->depth = depth;
2200 p->stencil = stencil;
2201 }
2202
2203 static void
2204 tc_clear_render_target(struct pipe_context *_pipe,
2205 struct pipe_surface *dst,
2206 const union pipe_color_union *color,
2207 unsigned dstx, unsigned dsty,
2208 unsigned width, unsigned height,
2209 bool render_condition_enabled)
2210 {
2211 struct threaded_context *tc = threaded_context(_pipe);
2212 struct pipe_context *pipe = tc->pipe;
2213
2214 tc_sync(tc);
2215 pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
2216 render_condition_enabled);
2217 }
2218
2219 static void
2220 tc_clear_depth_stencil(struct pipe_context *_pipe,
2221 struct pipe_surface *dst, unsigned clear_flags,
2222 double depth, unsigned stencil, unsigned dstx,
2223 unsigned dsty, unsigned width, unsigned height,
2224 bool render_condition_enabled)
2225 {
2226 struct threaded_context *tc = threaded_context(_pipe);
2227 struct pipe_context *pipe = tc->pipe;
2228
2229 tc_sync(tc);
2230 pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
2231 dstx, dsty, width, height,
2232 render_condition_enabled);
2233 }
2234
2235 struct tc_clear_buffer {
2236 struct pipe_resource *res;
2237 unsigned offset;
2238 unsigned size;
2239 char clear_value[16];
2240 int clear_value_size;
2241 };
2242
2243 static void
2244 tc_call_clear_buffer(struct pipe_context *pipe, union tc_payload *payload)
2245 {
2246 struct tc_clear_buffer *p = (struct tc_clear_buffer *)payload;
2247
2248 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
2249 p->clear_value_size);
2250 pipe_resource_reference(&p->res, NULL);
2251 }
2252
2253 static void
2254 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
2255 unsigned offset, unsigned size,
2256 const void *clear_value, int clear_value_size)
2257 {
2258 struct threaded_context *tc = threaded_context(_pipe);
2259 struct threaded_resource *tres = threaded_resource(res);
2260 struct tc_clear_buffer *p =
2261 tc_add_struct_typed_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
2262
2263 tc_set_resource_reference(&p->res, res);
2264 p->offset = offset;
2265 p->size = size;
2266 memcpy(p->clear_value, clear_value, clear_value_size);
2267 p->clear_value_size = clear_value_size;
2268
2269 util_range_add(&tres->valid_buffer_range, offset, offset + size);
2270 }
2271
2272 struct tc_clear_texture {
2273 struct pipe_resource *res;
2274 unsigned level;
2275 struct pipe_box box;
2276 char data[16];
2277 };
2278
2279 static void
2280 tc_call_clear_texture(struct pipe_context *pipe, union tc_payload *payload)
2281 {
2282 struct tc_clear_texture *p = (struct tc_clear_texture *)payload;
2283
2284 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
2285 pipe_resource_reference(&p->res, NULL);
2286 }
2287
2288 static void
2289 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
2290 unsigned level, const struct pipe_box *box, const void *data)
2291 {
2292 struct threaded_context *tc = threaded_context(_pipe);
2293 struct tc_clear_texture *p =
2294 tc_add_struct_typed_call(tc, TC_CALL_clear_texture, tc_clear_texture);
2295
2296 tc_set_resource_reference(&p->res, res);
2297 p->level = level;
2298 p->box = *box;
2299 memcpy(p->data, data,
2300 util_format_get_blocksize(res->format));
2301 }
2302
2303 struct tc_resource_commit {
2304 struct pipe_resource *res;
2305 unsigned level;
2306 struct pipe_box box;
2307 bool commit;
2308 };
2309
2310 static void
2311 tc_call_resource_commit(struct pipe_context *pipe, union tc_payload *payload)
2312 {
2313 struct tc_resource_commit *p = (struct tc_resource_commit *)payload;
2314
2315 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
2316 pipe_resource_reference(&p->res, NULL);
2317 }
2318
2319 static bool
2320 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
2321 unsigned level, struct pipe_box *box, bool commit)
2322 {
2323 struct threaded_context *tc = threaded_context(_pipe);
2324 struct tc_resource_commit *p =
2325 tc_add_struct_typed_call(tc, TC_CALL_resource_commit, tc_resource_commit);
2326
2327 tc_set_resource_reference(&p->res, res);
2328 p->level = level;
2329 p->box = *box;
2330 p->commit = commit;
2331 return true; /* we don't care about the return value for this call */
2332 }
2333
2334
2335 /********************************************************************
2336 * create & destroy
2337 */
2338
2339 static void
2340 tc_destroy(struct pipe_context *_pipe)
2341 {
2342 struct threaded_context *tc = threaded_context(_pipe);
2343 struct pipe_context *pipe = tc->pipe;
2344
2345 if (tc->base.const_uploader &&
2346 tc->base.stream_uploader != tc->base.const_uploader)
2347 u_upload_destroy(tc->base.const_uploader);
2348
2349 if (tc->base.stream_uploader)
2350 u_upload_destroy(tc->base.stream_uploader);
2351
2352 tc_sync(tc);
2353
2354 if (util_queue_is_initialized(&tc->queue)) {
2355 util_queue_destroy(&tc->queue);
2356
2357 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
2358 util_queue_fence_destroy(&tc->batch_slots[i].fence);
2359 assert(!tc->batch_slots[i].token);
2360 }
2361 }
2362
2363 slab_destroy_child(&tc->pool_transfers);
2364 assert(tc->batch_slots[tc->next].num_total_call_slots == 0);
2365 pipe->destroy(pipe);
2366 os_free_aligned(tc);
2367 }
2368
2369 static const tc_execute execute_func[TC_NUM_CALLS] = {
2370 #define CALL(name) tc_call_##name,
2371 #include "u_threaded_context_calls.h"
2372 #undef CALL
2373 };
2374
2375 /**
2376 * Wrap an existing pipe_context into a threaded_context.
2377 *
2378 * \param pipe pipe_context to wrap
2379 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
2380 * transfer objects; the driver should have one
2381 * in pipe_screen.
2382 * \param replace_buffer callback for replacing a pipe_resource's storage
2383 * with another pipe_resource's storage.
2384 * \param out if successful, the threaded_context will be returned here in
2385 * addition to the return value if "out" != NULL
2386 */
2387 struct pipe_context *
2388 threaded_context_create(struct pipe_context *pipe,
2389 struct slab_parent_pool *parent_transfer_pool,
2390 tc_replace_buffer_storage_func replace_buffer,
2391 tc_create_fence_func create_fence,
2392 struct threaded_context **out)
2393 {
2394 struct threaded_context *tc;
2395
2396 STATIC_ASSERT(sizeof(union tc_payload) <= 8);
2397 STATIC_ASSERT(sizeof(struct tc_call) <= 16);
2398
2399 if (!pipe)
2400 return NULL;
2401
2402 util_cpu_detect();
2403
2404 if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1))
2405 return pipe;
2406
2407 tc = os_malloc_aligned(sizeof(struct threaded_context), 16);
2408 if (!tc) {
2409 pipe->destroy(pipe);
2410 return NULL;
2411 }
2412 memset(tc, 0, sizeof(*tc));
2413
2414 assert((uintptr_t)tc % 16 == 0);
2415 /* These should be static asserts, but they don't work with MSVC */
2416 assert(offsetof(struct threaded_context, batch_slots) % 16 == 0);
2417 assert(offsetof(struct threaded_context, batch_slots[0].call) % 16 == 0);
2418 assert(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0);
2419 assert(offsetof(struct threaded_context, batch_slots[1].call) % 16 == 0);
2420
2421 /* The driver context isn't wrapped, so set its "priv" to NULL. */
2422 pipe->priv = NULL;
2423
2424 tc->pipe = pipe;
2425 tc->replace_buffer_storage = replace_buffer;
2426 tc->create_fence = create_fence;
2427 tc->map_buffer_alignment =
2428 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
2429 tc->base.priv = pipe; /* priv points to the wrapped driver context */
2430 tc->base.screen = pipe->screen;
2431 tc->base.destroy = tc_destroy;
2432
2433 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
2434 if (pipe->stream_uploader == pipe->const_uploader)
2435 tc->base.const_uploader = tc->base.stream_uploader;
2436 else
2437 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
2438
2439 if (!tc->base.stream_uploader || !tc->base.const_uploader)
2440 goto fail;
2441
2442 /* The queue size is the number of batches "waiting". Batches are removed
2443 * from the queue before being executed, so keep one tc_batch slot for that
2444 * execution. Also, keep one unused slot for an unflushed batch.
2445 */
2446 if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0))
2447 goto fail;
2448
2449 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
2450 tc->batch_slots[i].sentinel = TC_SENTINEL;
2451 tc->batch_slots[i].pipe = pipe;
2452 util_queue_fence_init(&tc->batch_slots[i].fence);
2453 }
2454
2455 LIST_INITHEAD(&tc->unflushed_queries);
2456
2457 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
2458
2459 #define CTX_INIT(_member) \
2460 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
2461
2462 CTX_INIT(flush);
2463 CTX_INIT(draw_vbo);
2464 CTX_INIT(launch_grid);
2465 CTX_INIT(resource_copy_region);
2466 CTX_INIT(blit);
2467 CTX_INIT(clear);
2468 CTX_INIT(clear_render_target);
2469 CTX_INIT(clear_depth_stencil);
2470 CTX_INIT(clear_buffer);
2471 CTX_INIT(clear_texture);
2472 CTX_INIT(flush_resource);
2473 CTX_INIT(generate_mipmap);
2474 CTX_INIT(render_condition);
2475 CTX_INIT(create_query);
2476 CTX_INIT(create_batch_query);
2477 CTX_INIT(destroy_query);
2478 CTX_INIT(begin_query);
2479 CTX_INIT(end_query);
2480 CTX_INIT(get_query_result);
2481 CTX_INIT(get_query_result_resource);
2482 CTX_INIT(set_active_query_state);
2483 CTX_INIT(create_blend_state);
2484 CTX_INIT(bind_blend_state);
2485 CTX_INIT(delete_blend_state);
2486 CTX_INIT(create_sampler_state);
2487 CTX_INIT(bind_sampler_states);
2488 CTX_INIT(delete_sampler_state);
2489 CTX_INIT(create_rasterizer_state);
2490 CTX_INIT(bind_rasterizer_state);
2491 CTX_INIT(delete_rasterizer_state);
2492 CTX_INIT(create_depth_stencil_alpha_state);
2493 CTX_INIT(bind_depth_stencil_alpha_state);
2494 CTX_INIT(delete_depth_stencil_alpha_state);
2495 CTX_INIT(create_fs_state);
2496 CTX_INIT(bind_fs_state);
2497 CTX_INIT(delete_fs_state);
2498 CTX_INIT(create_vs_state);
2499 CTX_INIT(bind_vs_state);
2500 CTX_INIT(delete_vs_state);
2501 CTX_INIT(create_gs_state);
2502 CTX_INIT(bind_gs_state);
2503 CTX_INIT(delete_gs_state);
2504 CTX_INIT(create_tcs_state);
2505 CTX_INIT(bind_tcs_state);
2506 CTX_INIT(delete_tcs_state);
2507 CTX_INIT(create_tes_state);
2508 CTX_INIT(bind_tes_state);
2509 CTX_INIT(delete_tes_state);
2510 CTX_INIT(create_compute_state);
2511 CTX_INIT(bind_compute_state);
2512 CTX_INIT(delete_compute_state);
2513 CTX_INIT(create_vertex_elements_state);
2514 CTX_INIT(bind_vertex_elements_state);
2515 CTX_INIT(delete_vertex_elements_state);
2516 CTX_INIT(set_blend_color);
2517 CTX_INIT(set_stencil_ref);
2518 CTX_INIT(set_sample_mask);
2519 CTX_INIT(set_min_samples);
2520 CTX_INIT(set_clip_state);
2521 CTX_INIT(set_constant_buffer);
2522 CTX_INIT(set_framebuffer_state);
2523 CTX_INIT(set_polygon_stipple);
2524 CTX_INIT(set_scissor_states);
2525 CTX_INIT(set_viewport_states);
2526 CTX_INIT(set_window_rectangles);
2527 CTX_INIT(set_sampler_views);
2528 CTX_INIT(set_tess_state);
2529 CTX_INIT(set_shader_buffers);
2530 CTX_INIT(set_shader_images);
2531 CTX_INIT(set_vertex_buffers);
2532 CTX_INIT(create_stream_output_target);
2533 CTX_INIT(stream_output_target_destroy);
2534 CTX_INIT(set_stream_output_targets);
2535 CTX_INIT(create_sampler_view);
2536 CTX_INIT(sampler_view_destroy);
2537 CTX_INIT(create_surface);
2538 CTX_INIT(surface_destroy);
2539 CTX_INIT(transfer_map);
2540 CTX_INIT(transfer_flush_region);
2541 CTX_INIT(transfer_unmap);
2542 CTX_INIT(buffer_subdata);
2543 CTX_INIT(texture_subdata);
2544 CTX_INIT(texture_barrier);
2545 CTX_INIT(memory_barrier);
2546 CTX_INIT(resource_commit);
2547 CTX_INIT(create_video_codec);
2548 CTX_INIT(create_video_buffer);
2549 CTX_INIT(set_compute_resources);
2550 CTX_INIT(set_global_binding);
2551 CTX_INIT(get_sample_position);
2552 CTX_INIT(invalidate_resource);
2553 CTX_INIT(get_device_reset_status);
2554 CTX_INIT(set_device_reset_callback);
2555 CTX_INIT(dump_debug_state);
2556 CTX_INIT(emit_string_marker);
2557 CTX_INIT(set_debug_callback);
2558 CTX_INIT(create_fence_fd);
2559 CTX_INIT(fence_server_sync);
2560 CTX_INIT(get_timestamp);
2561 CTX_INIT(create_texture_handle);
2562 CTX_INIT(delete_texture_handle);
2563 CTX_INIT(make_texture_handle_resident);
2564 CTX_INIT(create_image_handle);
2565 CTX_INIT(delete_image_handle);
2566 CTX_INIT(make_image_handle_resident);
2567 #undef CTX_INIT
2568
2569 if (out)
2570 *out = tc;
2571
2572 return &tc->base;
2573
2574 fail:
2575 tc_destroy(&tc->base);
2576 return NULL;
2577 }