gallium/u_threaded: avoid syncs for get_query_result
[mesa.git] / src / gallium / auxiliary / util / u_threaded_context.c
1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33
34 /* 0 = disabled, 1 = assertions, 2 = printfs */
35 #define TC_DEBUG 0
36
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42
43 #if TC_DEBUG >= 2
44 #define tc_printf printf
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52
53 #define TC_SENTINEL 0x5ca1ab1e
54
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59 TC_NUM_CALLS,
60 };
61
62 typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload);
63
64 static const tc_execute execute_func[TC_NUM_CALLS];
65
66 static void
67 tc_batch_check(struct tc_batch *batch)
68 {
69 tc_assert(batch->sentinel == TC_SENTINEL);
70 tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH);
71 }
72
73 static void
74 tc_debug_check(struct threaded_context *tc)
75 {
76 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
77 tc_batch_check(&tc->batch_slots[i]);
78 tc_assert(tc->batch_slots[i].pipe == tc->pipe);
79 }
80 }
81
82 static void
83 tc_batch_execute(void *job, int thread_index)
84 {
85 struct tc_batch *batch = job;
86 struct pipe_context *pipe = batch->pipe;
87 struct tc_call *last = &batch->call[batch->num_total_call_slots];
88
89 tc_batch_check(batch);
90
91 assert(!batch->token);
92
93 for (struct tc_call *iter = batch->call; iter != last;
94 iter += iter->num_call_slots) {
95 tc_assert(iter->sentinel == TC_SENTINEL);
96 execute_func[iter->call_id](pipe, &iter->payload);
97 }
98
99 tc_batch_check(batch);
100 batch->num_total_call_slots = 0;
101 }
102
103 static void
104 tc_batch_flush(struct threaded_context *tc)
105 {
106 struct tc_batch *next = &tc->batch_slots[tc->next];
107
108 tc_assert(next->num_total_call_slots != 0);
109 tc_batch_check(next);
110 tc_debug_check(tc);
111 p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
112
113 if (next->token) {
114 next->token->tc = NULL;
115 tc_unflushed_batch_token_reference(&next->token, NULL);
116 }
117
118 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
119 NULL);
120 tc->last = tc->next;
121 tc->next = (tc->next + 1) % TC_MAX_BATCHES;
122 }
123
124 /* This is the function that adds variable-sized calls into the current
125 * batch. It also flushes the batch if there is not enough space there.
126 * All other higher-level "add" functions use it.
127 */
128 static union tc_payload *
129 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
130 unsigned payload_size)
131 {
132 struct tc_batch *next = &tc->batch_slots[tc->next];
133 unsigned total_size = offsetof(struct tc_call, payload) + payload_size;
134 unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call));
135
136 tc_debug_check(tc);
137
138 if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) {
139 tc_batch_flush(tc);
140 next = &tc->batch_slots[tc->next];
141 tc_assert(next->num_total_call_slots == 0);
142 }
143
144 tc_assert(util_queue_fence_is_signalled(&next->fence));
145
146 struct tc_call *call = &next->call[next->num_total_call_slots];
147 next->num_total_call_slots += num_call_slots;
148
149 call->sentinel = TC_SENTINEL;
150 call->call_id = id;
151 call->num_call_slots = num_call_slots;
152
153 tc_debug_check(tc);
154 return &call->payload;
155 }
156
157 #define tc_add_struct_typed_call(tc, execute, type) \
158 ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type)))
159
160 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
161 ((struct type*)tc_add_sized_call(tc, execute, \
162 sizeof(struct type) + \
163 sizeof(((struct type*)NULL)->slot[0]) * \
164 (num_slots)))
165
166 static union tc_payload *
167 tc_add_small_call(struct threaded_context *tc, enum tc_call_id id)
168 {
169 return tc_add_sized_call(tc, id, 0);
170 }
171
172 static void
173 _tc_sync(struct threaded_context *tc, const char *info, const char *func)
174 {
175 struct tc_batch *last = &tc->batch_slots[tc->last];
176 struct tc_batch *next = &tc->batch_slots[tc->next];
177 bool synced = false;
178
179 tc_debug_check(tc);
180
181 /* Only wait for queued calls... */
182 if (!util_queue_fence_is_signalled(&last->fence)) {
183 util_queue_fence_wait(&last->fence);
184 synced = true;
185 }
186
187 tc_debug_check(tc);
188
189 if (next->token) {
190 next->token->tc = NULL;
191 tc_unflushed_batch_token_reference(&next->token, NULL);
192 }
193
194 /* .. and execute unflushed calls directly. */
195 if (next->num_total_call_slots) {
196 p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
197 tc_batch_execute(next, 0);
198 synced = true;
199 }
200
201 if (synced) {
202 p_atomic_inc(&tc->num_syncs);
203
204 if (tc_strcmp(func, "tc_destroy") != 0)
205 tc_printf("sync %s %s\n", func, info);
206 }
207
208 tc_debug_check(tc);
209 }
210
211 #define tc_sync(tc) _tc_sync(tc, "", __func__)
212 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
213
214 /**
215 * Call this from fence_finish for same-context fence waits of deferred fences
216 * that haven't been flushed yet.
217 *
218 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
219 * i.e., the wrapped one.
220 */
221 void
222 threaded_context_flush(struct pipe_context *_pipe,
223 struct tc_unflushed_batch_token *token)
224 {
225 struct threaded_context *tc = threaded_context(_pipe);
226
227 /* This is called from the state-tracker / application thread. */
228 if (token->tc && token->tc == tc)
229 tc_sync(token->tc);
230 }
231
232 static void
233 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
234 {
235 *dst = NULL;
236 pipe_resource_reference(dst, src);
237 }
238
239 void
240 threaded_resource_init(struct pipe_resource *res)
241 {
242 struct threaded_resource *tres = threaded_resource(res);
243
244 tres->latest = &tres->b;
245 util_range_init(&tres->valid_buffer_range);
246 tres->base_valid_buffer_range = &tres->valid_buffer_range;
247 tres->is_shared = false;
248 tres->is_user_ptr = false;
249 }
250
251 void
252 threaded_resource_deinit(struct pipe_resource *res)
253 {
254 struct threaded_resource *tres = threaded_resource(res);
255
256 if (tres->latest != &tres->b)
257 pipe_resource_reference(&tres->latest, NULL);
258 util_range_destroy(&tres->valid_buffer_range);
259 }
260
261 struct pipe_context *
262 threaded_context_unwrap_sync(struct pipe_context *pipe)
263 {
264 if (!pipe || !pipe->priv)
265 return pipe;
266
267 tc_sync(threaded_context(pipe));
268 return (struct pipe_context*)pipe->priv;
269 }
270
271
272 /********************************************************************
273 * simple functions
274 */
275
276 #define TC_FUNC1(func, m_payload, qualifier, type, deref, deref2) \
277 static void \
278 tc_call_##func(struct pipe_context *pipe, union tc_payload *payload) \
279 { \
280 pipe->func(pipe, deref2((type*)payload)); \
281 } \
282 \
283 static void \
284 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
285 { \
286 struct threaded_context *tc = threaded_context(_pipe); \
287 type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \
288 *p = deref(param); \
289 }
290
291 TC_FUNC1(set_active_query_state, flags, , boolean, , *)
292
293 TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, )
294 TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, )
295 TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, )
296 TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *)
297 TC_FUNC1(set_min_samples, min_samples, , unsigned, , *)
298 TC_FUNC1(set_polygon_stipple, polygon_stipple, const, struct pipe_poly_stipple, *, )
299
300 TC_FUNC1(texture_barrier, flags, , unsigned, , *)
301 TC_FUNC1(memory_barrier, flags, , unsigned, , *)
302
303
304 /********************************************************************
305 * queries
306 */
307
308 static struct pipe_query *
309 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
310 unsigned index)
311 {
312 struct threaded_context *tc = threaded_context(_pipe);
313 struct pipe_context *pipe = tc->pipe;
314
315 return pipe->create_query(pipe, query_type, index);
316 }
317
318 static struct pipe_query *
319 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
320 unsigned *query_types)
321 {
322 struct threaded_context *tc = threaded_context(_pipe);
323 struct pipe_context *pipe = tc->pipe;
324
325 return pipe->create_batch_query(pipe, num_queries, query_types);
326 }
327
328 static void
329 tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
330 {
331 struct threaded_query *tq = threaded_query(payload->query);
332
333 if (tq->head_unflushed.next)
334 LIST_DEL(&tq->head_unflushed);
335
336 pipe->destroy_query(pipe, payload->query);
337 }
338
339 static void
340 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
341 {
342 struct threaded_context *tc = threaded_context(_pipe);
343
344 tc_add_small_call(tc, TC_CALL_destroy_query)->query = query;
345 }
346
347 static void
348 tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
349 {
350 pipe->begin_query(pipe, payload->query);
351 }
352
353 static boolean
354 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
355 {
356 struct threaded_context *tc = threaded_context(_pipe);
357 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query);
358
359 payload->query = query;
360 return true; /* we don't care about the return value for this call */
361 }
362
363 struct tc_end_query_payload {
364 struct threaded_context *tc;
365 struct pipe_query *query;
366 };
367
368 static void
369 tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
370 {
371 struct tc_end_query_payload *p = (struct tc_end_query_payload *)payload;
372 struct threaded_query *tq = threaded_query(p->query);
373
374 if (!tq->head_unflushed.next)
375 LIST_ADD(&tq->head_unflushed, &p->tc->unflushed_queries);
376
377 pipe->end_query(pipe, p->query);
378 }
379
380 static bool
381 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
382 {
383 struct threaded_context *tc = threaded_context(_pipe);
384 struct threaded_query *tq = threaded_query(query);
385 struct tc_end_query_payload *payload =
386 tc_add_struct_typed_call(tc, TC_CALL_end_query, tc_end_query_payload);
387
388 tc_add_small_call(tc, TC_CALL_end_query);
389
390 payload->tc = tc;
391 payload->query = query;
392
393 tq->flushed = false;
394
395 return true; /* we don't care about the return value for this call */
396 }
397
398 static boolean
399 tc_get_query_result(struct pipe_context *_pipe,
400 struct pipe_query *query, boolean wait,
401 union pipe_query_result *result)
402 {
403 struct threaded_context *tc = threaded_context(_pipe);
404 struct threaded_query *tq = threaded_query(query);
405 struct pipe_context *pipe = tc->pipe;
406
407 if (!tq->flushed)
408 tc_sync_msg(tc, wait ? "wait" : "nowait");
409
410 bool success = pipe->get_query_result(pipe, query, wait, result);
411
412 if (success) {
413 tq->flushed = true;
414 if (tq->head_unflushed.next) {
415 /* This is safe because it can only happen after we sync'd. */
416 LIST_DEL(&tq->head_unflushed);
417 }
418 }
419 return success;
420 }
421
422 struct tc_query_result_resource {
423 struct pipe_query *query;
424 boolean wait;
425 enum pipe_query_value_type result_type;
426 int index;
427 struct pipe_resource *resource;
428 unsigned offset;
429 };
430
431 static void
432 tc_call_get_query_result_resource(struct pipe_context *pipe,
433 union tc_payload *payload)
434 {
435 struct tc_query_result_resource *p = (struct tc_query_result_resource *)payload;
436
437 pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
438 p->index, p->resource, p->offset);
439 pipe_resource_reference(&p->resource, NULL);
440 }
441
442 static void
443 tc_get_query_result_resource(struct pipe_context *_pipe,
444 struct pipe_query *query, boolean wait,
445 enum pipe_query_value_type result_type, int index,
446 struct pipe_resource *resource, unsigned offset)
447 {
448 struct threaded_context *tc = threaded_context(_pipe);
449 struct tc_query_result_resource *p =
450 tc_add_struct_typed_call(tc, TC_CALL_get_query_result_resource,
451 tc_query_result_resource);
452
453 p->query = query;
454 p->wait = wait;
455 p->result_type = result_type;
456 p->index = index;
457 tc_set_resource_reference(&p->resource, resource);
458 p->offset = offset;
459 }
460
461 struct tc_render_condition {
462 struct pipe_query *query;
463 bool condition;
464 unsigned mode;
465 };
466
467 static void
468 tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload)
469 {
470 struct tc_render_condition *p = (struct tc_render_condition *)payload;
471 pipe->render_condition(pipe, p->query, p->condition, p->mode);
472 }
473
474 static void
475 tc_render_condition(struct pipe_context *_pipe,
476 struct pipe_query *query, boolean condition,
477 enum pipe_render_cond_flag mode)
478 {
479 struct threaded_context *tc = threaded_context(_pipe);
480 struct tc_render_condition *p =
481 tc_add_struct_typed_call(tc, TC_CALL_render_condition, tc_render_condition);
482
483 p->query = query;
484 p->condition = condition;
485 p->mode = mode;
486 }
487
488
489 /********************************************************************
490 * constant (immutable) states
491 */
492
493 #define TC_CSO_CREATE(name, sname) \
494 static void * \
495 tc_create_##name##_state(struct pipe_context *_pipe, \
496 const struct pipe_##sname##_state *state) \
497 { \
498 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
499 return pipe->create_##name##_state(pipe, state); \
500 }
501
502 #define TC_CSO_BIND(name) TC_FUNC1(bind_##name##_state, cso, , void *, , *)
503 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, cso, , void *, , *)
504
505 #define TC_CSO_WHOLE2(name, sname) \
506 TC_CSO_CREATE(name, sname) \
507 TC_CSO_BIND(name) \
508 TC_CSO_DELETE(name)
509
510 #define TC_CSO_WHOLE(name) TC_CSO_WHOLE2(name, name)
511
512 TC_CSO_WHOLE(blend)
513 TC_CSO_WHOLE(rasterizer)
514 TC_CSO_WHOLE(depth_stencil_alpha)
515 TC_CSO_WHOLE(compute)
516 TC_CSO_WHOLE2(fs, shader)
517 TC_CSO_WHOLE2(vs, shader)
518 TC_CSO_WHOLE2(gs, shader)
519 TC_CSO_WHOLE2(tcs, shader)
520 TC_CSO_WHOLE2(tes, shader)
521 TC_CSO_CREATE(sampler, sampler)
522 TC_CSO_DELETE(sampler)
523 TC_CSO_BIND(vertex_elements)
524 TC_CSO_DELETE(vertex_elements)
525
526 static void *
527 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
528 const struct pipe_vertex_element *elems)
529 {
530 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
531
532 return pipe->create_vertex_elements_state(pipe, count, elems);
533 }
534
535 struct tc_sampler_states {
536 ubyte shader, start, count;
537 void *slot[0]; /* more will be allocated if needed */
538 };
539
540 static void
541 tc_call_bind_sampler_states(struct pipe_context *pipe, union tc_payload *payload)
542 {
543 struct tc_sampler_states *p = (struct tc_sampler_states *)payload;
544 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
545 }
546
547 static void
548 tc_bind_sampler_states(struct pipe_context *_pipe,
549 enum pipe_shader_type shader,
550 unsigned start, unsigned count, void **states)
551 {
552 if (!count)
553 return;
554
555 struct threaded_context *tc = threaded_context(_pipe);
556 struct tc_sampler_states *p =
557 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
558
559 p->shader = shader;
560 p->start = start;
561 p->count = count;
562 memcpy(p->slot, states, count * sizeof(states[0]));
563 }
564
565
566 /********************************************************************
567 * immediate states
568 */
569
570 static void
571 tc_call_set_framebuffer_state(struct pipe_context *pipe, union tc_payload *payload)
572 {
573 struct pipe_framebuffer_state *p = (struct pipe_framebuffer_state *)payload;
574
575 pipe->set_framebuffer_state(pipe, p);
576
577 unsigned nr_cbufs = p->nr_cbufs;
578 for (unsigned i = 0; i < nr_cbufs; i++)
579 pipe_surface_reference(&p->cbufs[i], NULL);
580 pipe_surface_reference(&p->zsbuf, NULL);
581 }
582
583 static void
584 tc_set_framebuffer_state(struct pipe_context *_pipe,
585 const struct pipe_framebuffer_state *fb)
586 {
587 struct threaded_context *tc = threaded_context(_pipe);
588 struct pipe_framebuffer_state *p =
589 tc_add_struct_typed_call(tc, TC_CALL_set_framebuffer_state,
590 pipe_framebuffer_state);
591 unsigned nr_cbufs = fb->nr_cbufs;
592
593 p->width = fb->width;
594 p->height = fb->height;
595 p->samples = fb->samples;
596 p->layers = fb->layers;
597 p->nr_cbufs = nr_cbufs;
598
599 for (unsigned i = 0; i < nr_cbufs; i++) {
600 p->cbufs[i] = NULL;
601 pipe_surface_reference(&p->cbufs[i], fb->cbufs[i]);
602 }
603 p->zsbuf = NULL;
604 pipe_surface_reference(&p->zsbuf, fb->zsbuf);
605 }
606
607 static void
608 tc_call_set_tess_state(struct pipe_context *pipe, union tc_payload *payload)
609 {
610 float *p = (float*)payload;
611 pipe->set_tess_state(pipe, p, p + 4);
612 }
613
614 static void
615 tc_set_tess_state(struct pipe_context *_pipe,
616 const float default_outer_level[4],
617 const float default_inner_level[2])
618 {
619 struct threaded_context *tc = threaded_context(_pipe);
620 float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state,
621 sizeof(float) * 6);
622
623 memcpy(p, default_outer_level, 4 * sizeof(float));
624 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
625 }
626
627 struct tc_constant_buffer {
628 ubyte shader, index;
629 struct pipe_constant_buffer cb;
630 };
631
632 static void
633 tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload)
634 {
635 struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload;
636
637 pipe->set_constant_buffer(pipe,
638 p->shader,
639 p->index,
640 &p->cb);
641 pipe_resource_reference(&p->cb.buffer, NULL);
642 }
643
644 static void
645 tc_set_constant_buffer(struct pipe_context *_pipe,
646 enum pipe_shader_type shader, uint index,
647 const struct pipe_constant_buffer *cb)
648 {
649 struct threaded_context *tc = threaded_context(_pipe);
650 struct pipe_resource *buffer = NULL;
651 unsigned offset;
652
653 /* This must be done before adding set_constant_buffer, because it could
654 * generate e.g. transfer_unmap and flush partially-uninitialized
655 * set_constant_buffer to the driver if it was done afterwards.
656 */
657 if (cb && cb->user_buffer) {
658 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64,
659 cb->user_buffer, &offset, &buffer);
660 }
661
662 struct tc_constant_buffer *p =
663 tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer,
664 tc_constant_buffer);
665 p->shader = shader;
666 p->index = index;
667
668 if (cb) {
669 if (cb->user_buffer) {
670 p->cb.buffer_size = cb->buffer_size;
671 p->cb.user_buffer = NULL;
672 p->cb.buffer_offset = offset;
673 p->cb.buffer = buffer;
674 } else {
675 tc_set_resource_reference(&p->cb.buffer,
676 cb->buffer);
677 memcpy(&p->cb, cb, sizeof(*cb));
678 }
679 } else {
680 memset(&p->cb, 0, sizeof(*cb));
681 }
682 }
683
684 struct tc_scissors {
685 ubyte start, count;
686 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
687 };
688
689 static void
690 tc_call_set_scissor_states(struct pipe_context *pipe, union tc_payload *payload)
691 {
692 struct tc_scissors *p = (struct tc_scissors *)payload;
693 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
694 }
695
696 static void
697 tc_set_scissor_states(struct pipe_context *_pipe,
698 unsigned start, unsigned count,
699 const struct pipe_scissor_state *states)
700 {
701 struct threaded_context *tc = threaded_context(_pipe);
702 struct tc_scissors *p =
703 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
704
705 p->start = start;
706 p->count = count;
707 memcpy(&p->slot, states, count * sizeof(states[0]));
708 }
709
710 struct tc_viewports {
711 ubyte start, count;
712 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
713 };
714
715 static void
716 tc_call_set_viewport_states(struct pipe_context *pipe, union tc_payload *payload)
717 {
718 struct tc_viewports *p = (struct tc_viewports *)payload;
719 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
720 }
721
722 static void
723 tc_set_viewport_states(struct pipe_context *_pipe,
724 unsigned start, unsigned count,
725 const struct pipe_viewport_state *states)
726 {
727 if (!count)
728 return;
729
730 struct threaded_context *tc = threaded_context(_pipe);
731 struct tc_viewports *p =
732 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
733
734 p->start = start;
735 p->count = count;
736 memcpy(&p->slot, states, count * sizeof(states[0]));
737 }
738
739 struct tc_window_rects {
740 bool include;
741 ubyte count;
742 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
743 };
744
745 static void
746 tc_call_set_window_rectangles(struct pipe_context *pipe,
747 union tc_payload *payload)
748 {
749 struct tc_window_rects *p = (struct tc_window_rects *)payload;
750 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
751 }
752
753 static void
754 tc_set_window_rectangles(struct pipe_context *_pipe, boolean include,
755 unsigned count,
756 const struct pipe_scissor_state *rects)
757 {
758 struct threaded_context *tc = threaded_context(_pipe);
759 struct tc_window_rects *p =
760 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
761
762 p->include = include;
763 p->count = count;
764 memcpy(p->slot, rects, count * sizeof(rects[0]));
765 }
766
767 struct tc_sampler_views {
768 ubyte shader, start, count;
769 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
770 };
771
772 static void
773 tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload)
774 {
775 struct tc_sampler_views *p = (struct tc_sampler_views *)payload;
776 unsigned count = p->count;
777
778 pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot);
779 for (unsigned i = 0; i < count; i++)
780 pipe_sampler_view_reference(&p->slot[i], NULL);
781 }
782
783 static void
784 tc_set_sampler_views(struct pipe_context *_pipe,
785 enum pipe_shader_type shader,
786 unsigned start, unsigned count,
787 struct pipe_sampler_view **views)
788 {
789 if (!count)
790 return;
791
792 struct threaded_context *tc = threaded_context(_pipe);
793 struct tc_sampler_views *p =
794 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, count);
795
796 p->shader = shader;
797 p->start = start;
798 p->count = count;
799
800 if (views) {
801 for (unsigned i = 0; i < count; i++) {
802 p->slot[i] = NULL;
803 pipe_sampler_view_reference(&p->slot[i], views[i]);
804 }
805 } else {
806 memset(p->slot, 0, count * sizeof(views[0]));
807 }
808 }
809
810 struct tc_shader_images {
811 ubyte shader, start, count;
812 bool unbind;
813 struct pipe_image_view slot[0]; /* more will be allocated if needed */
814 };
815
816 static void
817 tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload)
818 {
819 struct tc_shader_images *p = (struct tc_shader_images *)payload;
820 unsigned count = p->count;
821
822 if (p->unbind) {
823 pipe->set_shader_images(pipe, p->shader, p->start, p->count, NULL);
824 return;
825 }
826
827 pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot);
828
829 for (unsigned i = 0; i < count; i++)
830 pipe_resource_reference(&p->slot[i].resource, NULL);
831 }
832
833 static void
834 tc_set_shader_images(struct pipe_context *_pipe,
835 enum pipe_shader_type shader,
836 unsigned start, unsigned count,
837 const struct pipe_image_view *images)
838 {
839 if (!count)
840 return;
841
842 struct threaded_context *tc = threaded_context(_pipe);
843 struct tc_shader_images *p =
844 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
845 images ? count : 0);
846
847 p->shader = shader;
848 p->start = start;
849 p->count = count;
850 p->unbind = images == NULL;
851
852 if (images) {
853 for (unsigned i = 0; i < count; i++) {
854 tc_set_resource_reference(&p->slot[i].resource, images[i].resource);
855
856 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
857 images[i].resource &&
858 images[i].resource->target == PIPE_BUFFER) {
859 struct threaded_resource *tres =
860 threaded_resource(images[i].resource);
861
862 util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
863 images[i].u.buf.offset + images[i].u.buf.size);
864 }
865 }
866 memcpy(p->slot, images, count * sizeof(images[0]));
867 }
868 }
869
870 struct tc_shader_buffers {
871 ubyte shader, start, count;
872 bool unbind;
873 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
874 };
875
876 static void
877 tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload)
878 {
879 struct tc_shader_buffers *p = (struct tc_shader_buffers *)payload;
880 unsigned count = p->count;
881
882 if (p->unbind) {
883 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL);
884 return;
885 }
886
887 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot);
888
889 for (unsigned i = 0; i < count; i++)
890 pipe_resource_reference(&p->slot[i].buffer, NULL);
891 }
892
893 static void
894 tc_set_shader_buffers(struct pipe_context *_pipe,
895 enum pipe_shader_type shader,
896 unsigned start, unsigned count,
897 const struct pipe_shader_buffer *buffers)
898 {
899 if (!count)
900 return;
901
902 struct threaded_context *tc = threaded_context(_pipe);
903 struct tc_shader_buffers *p =
904 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
905 buffers ? count : 0);
906
907 p->shader = shader;
908 p->start = start;
909 p->count = count;
910 p->unbind = buffers == NULL;
911
912 if (buffers) {
913 for (unsigned i = 0; i < count; i++) {
914 struct pipe_shader_buffer *dst = &p->slot[i];
915 const struct pipe_shader_buffer *src = buffers + i;
916
917 tc_set_resource_reference(&dst->buffer, src->buffer);
918 dst->buffer_offset = src->buffer_offset;
919 dst->buffer_size = src->buffer_size;
920
921 if (src->buffer) {
922 struct threaded_resource *tres = threaded_resource(src->buffer);
923
924 util_range_add(&tres->valid_buffer_range, src->buffer_offset,
925 src->buffer_offset + src->buffer_size);
926 }
927 }
928 }
929 }
930
931 struct tc_vertex_buffers {
932 ubyte start, count;
933 bool unbind;
934 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
935 };
936
937 static void
938 tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload)
939 {
940 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload;
941 unsigned count = p->count;
942
943 if (p->unbind) {
944 pipe->set_vertex_buffers(pipe, p->start, count, NULL);
945 return;
946 }
947
948 for (unsigned i = 0; i < count; i++)
949 tc_assert(!p->slot[i].is_user_buffer);
950
951 pipe->set_vertex_buffers(pipe, p->start, count, p->slot);
952 for (unsigned i = 0; i < count; i++)
953 pipe_resource_reference(&p->slot[i].buffer.resource, NULL);
954 }
955
956 static void
957 tc_set_vertex_buffers(struct pipe_context *_pipe,
958 unsigned start, unsigned count,
959 const struct pipe_vertex_buffer *buffers)
960 {
961 struct threaded_context *tc = threaded_context(_pipe);
962
963 if (!count)
964 return;
965
966 if (buffers) {
967 struct tc_vertex_buffers *p =
968 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
969 p->start = start;
970 p->count = count;
971 p->unbind = false;
972
973 for (unsigned i = 0; i < count; i++) {
974 struct pipe_vertex_buffer *dst = &p->slot[i];
975 const struct pipe_vertex_buffer *src = buffers + i;
976
977 tc_assert(!src->is_user_buffer);
978 dst->stride = src->stride;
979 dst->is_user_buffer = false;
980 tc_set_resource_reference(&dst->buffer.resource,
981 src->buffer.resource);
982 dst->buffer_offset = src->buffer_offset;
983 }
984 } else {
985 struct tc_vertex_buffers *p =
986 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
987 p->start = start;
988 p->count = count;
989 p->unbind = true;
990 }
991 }
992
993 struct tc_stream_outputs {
994 unsigned count;
995 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
996 unsigned offsets[PIPE_MAX_SO_BUFFERS];
997 };
998
999 static void
1000 tc_call_set_stream_output_targets(struct pipe_context *pipe, union tc_payload *payload)
1001 {
1002 struct tc_stream_outputs *p = (struct tc_stream_outputs *)payload;
1003 unsigned count = p->count;
1004
1005 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1006 for (unsigned i = 0; i < count; i++)
1007 pipe_so_target_reference(&p->targets[i], NULL);
1008 }
1009
1010 static void
1011 tc_set_stream_output_targets(struct pipe_context *_pipe,
1012 unsigned count,
1013 struct pipe_stream_output_target **tgs,
1014 const unsigned *offsets)
1015 {
1016 struct threaded_context *tc = threaded_context(_pipe);
1017 struct tc_stream_outputs *p =
1018 tc_add_struct_typed_call(tc, TC_CALL_set_stream_output_targets,
1019 tc_stream_outputs);
1020
1021 for (unsigned i = 0; i < count; i++) {
1022 p->targets[i] = NULL;
1023 pipe_so_target_reference(&p->targets[i], tgs[i]);
1024 }
1025 p->count = count;
1026 memcpy(p->offsets, offsets, count * sizeof(unsigned));
1027 }
1028
1029 static void
1030 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1031 unsigned count, struct pipe_surface **resources)
1032 {
1033 struct threaded_context *tc = threaded_context(_pipe);
1034 struct pipe_context *pipe = tc->pipe;
1035
1036 tc_sync(tc);
1037 pipe->set_compute_resources(pipe, start, count, resources);
1038 }
1039
1040 static void
1041 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1042 unsigned count, struct pipe_resource **resources,
1043 uint32_t **handles)
1044 {
1045 struct threaded_context *tc = threaded_context(_pipe);
1046 struct pipe_context *pipe = tc->pipe;
1047
1048 tc_sync(tc);
1049 pipe->set_global_binding(pipe, first, count, resources, handles);
1050 }
1051
1052
1053 /********************************************************************
1054 * views
1055 */
1056
1057 static struct pipe_surface *
1058 tc_create_surface(struct pipe_context *_pipe,
1059 struct pipe_resource *resource,
1060 const struct pipe_surface *surf_tmpl)
1061 {
1062 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1063 struct pipe_surface *view =
1064 pipe->create_surface(pipe, resource, surf_tmpl);
1065
1066 if (view)
1067 view->context = _pipe;
1068 return view;
1069 }
1070
1071 static void
1072 tc_surface_destroy(struct pipe_context *_pipe,
1073 struct pipe_surface *surf)
1074 {
1075 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1076
1077 pipe->surface_destroy(pipe, surf);
1078 }
1079
1080 static struct pipe_sampler_view *
1081 tc_create_sampler_view(struct pipe_context *_pipe,
1082 struct pipe_resource *resource,
1083 const struct pipe_sampler_view *templ)
1084 {
1085 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1086 struct pipe_sampler_view *view =
1087 pipe->create_sampler_view(pipe, resource, templ);
1088
1089 if (view)
1090 view->context = _pipe;
1091 return view;
1092 }
1093
1094 static void
1095 tc_sampler_view_destroy(struct pipe_context *_pipe,
1096 struct pipe_sampler_view *view)
1097 {
1098 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1099
1100 pipe->sampler_view_destroy(pipe, view);
1101 }
1102
1103 static struct pipe_stream_output_target *
1104 tc_create_stream_output_target(struct pipe_context *_pipe,
1105 struct pipe_resource *res,
1106 unsigned buffer_offset,
1107 unsigned buffer_size)
1108 {
1109 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1110 struct threaded_resource *tres = threaded_resource(res);
1111 struct pipe_stream_output_target *view;
1112
1113 tc_sync(threaded_context(_pipe));
1114 util_range_add(&tres->valid_buffer_range, buffer_offset,
1115 buffer_offset + buffer_size);
1116
1117 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1118 buffer_size);
1119 if (view)
1120 view->context = _pipe;
1121 return view;
1122 }
1123
1124 static void
1125 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1126 struct pipe_stream_output_target *target)
1127 {
1128 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1129
1130 pipe->stream_output_target_destroy(pipe, target);
1131 }
1132
1133
1134 /********************************************************************
1135 * bindless
1136 */
1137
1138 static uint64_t
1139 tc_create_texture_handle(struct pipe_context *_pipe,
1140 struct pipe_sampler_view *view,
1141 const struct pipe_sampler_state *state)
1142 {
1143 struct threaded_context *tc = threaded_context(_pipe);
1144 struct pipe_context *pipe = tc->pipe;
1145
1146 tc_sync(tc);
1147 return pipe->create_texture_handle(pipe, view, state);
1148 }
1149
1150 static void
1151 tc_call_delete_texture_handle(struct pipe_context *pipe,
1152 union tc_payload *payload)
1153 {
1154 pipe->delete_texture_handle(pipe, payload->handle);
1155 }
1156
1157 static void
1158 tc_delete_texture_handle(struct pipe_context *_pipe, uint64_t handle)
1159 {
1160 struct threaded_context *tc = threaded_context(_pipe);
1161 union tc_payload *payload =
1162 tc_add_small_call(tc, TC_CALL_delete_texture_handle);
1163
1164 payload->handle = handle;
1165 }
1166
1167 struct tc_make_texture_handle_resident
1168 {
1169 uint64_t handle;
1170 bool resident;
1171 };
1172
1173 static void
1174 tc_call_make_texture_handle_resident(struct pipe_context *pipe,
1175 union tc_payload *payload)
1176 {
1177 struct tc_make_texture_handle_resident *p =
1178 (struct tc_make_texture_handle_resident *)payload;
1179
1180 pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1181 }
1182
1183 static void
1184 tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1185 bool resident)
1186 {
1187 struct threaded_context *tc = threaded_context(_pipe);
1188 struct tc_make_texture_handle_resident *p =
1189 tc_add_struct_typed_call(tc, TC_CALL_make_texture_handle_resident,
1190 tc_make_texture_handle_resident);
1191
1192 p->handle = handle;
1193 p->resident = resident;
1194 }
1195
1196 static uint64_t
1197 tc_create_image_handle(struct pipe_context *_pipe,
1198 const struct pipe_image_view *image)
1199 {
1200 struct threaded_context *tc = threaded_context(_pipe);
1201 struct pipe_context *pipe = tc->pipe;
1202
1203 tc_sync(tc);
1204 return pipe->create_image_handle(pipe, image);
1205 }
1206
1207 static void
1208 tc_call_delete_image_handle(struct pipe_context *pipe,
1209 union tc_payload *payload)
1210 {
1211 pipe->delete_image_handle(pipe, payload->handle);
1212 }
1213
1214 static void
1215 tc_delete_image_handle(struct pipe_context *_pipe, uint64_t handle)
1216 {
1217 struct threaded_context *tc = threaded_context(_pipe);
1218 union tc_payload *payload =
1219 tc_add_small_call(tc, TC_CALL_delete_image_handle);
1220
1221 payload->handle = handle;
1222 }
1223
1224 struct tc_make_image_handle_resident
1225 {
1226 uint64_t handle;
1227 unsigned access;
1228 bool resident;
1229 };
1230
1231 static void
1232 tc_call_make_image_handle_resident(struct pipe_context *pipe,
1233 union tc_payload *payload)
1234 {
1235 struct tc_make_image_handle_resident *p =
1236 (struct tc_make_image_handle_resident *)payload;
1237
1238 pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1239 }
1240
1241 static void
1242 tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1243 unsigned access, bool resident)
1244 {
1245 struct threaded_context *tc = threaded_context(_pipe);
1246 struct tc_make_image_handle_resident *p =
1247 tc_add_struct_typed_call(tc, TC_CALL_make_image_handle_resident,
1248 tc_make_image_handle_resident);
1249
1250 p->handle = handle;
1251 p->access = access;
1252 p->resident = resident;
1253 }
1254
1255
1256 /********************************************************************
1257 * transfer
1258 */
1259
1260 struct tc_replace_buffer_storage {
1261 struct pipe_resource *dst;
1262 struct pipe_resource *src;
1263 tc_replace_buffer_storage_func func;
1264 };
1265
1266 static void
1267 tc_call_replace_buffer_storage(struct pipe_context *pipe,
1268 union tc_payload *payload)
1269 {
1270 struct tc_replace_buffer_storage *p =
1271 (struct tc_replace_buffer_storage *)payload;
1272
1273 p->func(pipe, p->dst, p->src);
1274 pipe_resource_reference(&p->dst, NULL);
1275 pipe_resource_reference(&p->src, NULL);
1276 }
1277
1278 static bool
1279 tc_invalidate_buffer(struct threaded_context *tc,
1280 struct threaded_resource *tbuf)
1281 {
1282 /* We can't check if the buffer is idle, so we invalidate it
1283 * unconditionally. */
1284 struct pipe_screen *screen = tc->base.screen;
1285 struct pipe_resource *new_buf;
1286
1287 /* Shared, pinned, and sparse buffers can't be reallocated. */
1288 if (tbuf->is_shared ||
1289 tbuf->is_user_ptr ||
1290 tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1291 return false;
1292
1293 /* Allocate a new one. */
1294 new_buf = screen->resource_create(screen, &tbuf->b);
1295 if (!new_buf)
1296 return false;
1297
1298 /* Replace the "latest" pointer. */
1299 if (tbuf->latest != &tbuf->b)
1300 pipe_resource_reference(&tbuf->latest, NULL);
1301
1302 tbuf->latest = new_buf;
1303 util_range_set_empty(&tbuf->valid_buffer_range);
1304
1305 /* The valid range should point to the original buffer. */
1306 threaded_resource(new_buf)->base_valid_buffer_range =
1307 &tbuf->valid_buffer_range;
1308
1309 /* Enqueue storage replacement of the original buffer. */
1310 struct tc_replace_buffer_storage *p =
1311 tc_add_struct_typed_call(tc, TC_CALL_replace_buffer_storage,
1312 tc_replace_buffer_storage);
1313
1314 p->func = tc->replace_buffer_storage;
1315 tc_set_resource_reference(&p->dst, &tbuf->b);
1316 tc_set_resource_reference(&p->src, new_buf);
1317 return true;
1318 }
1319
1320 static unsigned
1321 tc_improve_map_buffer_flags(struct threaded_context *tc,
1322 struct threaded_resource *tres, unsigned usage,
1323 unsigned offset, unsigned size)
1324 {
1325 /* Never invalidate inside the driver and never infer "unsynchronized". */
1326 unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
1327 TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
1328
1329 /* Prevent a reentry. */
1330 if (usage & tc_flags)
1331 return usage;
1332
1333 /* Use the staging upload if it's preferred. */
1334 if (usage & (PIPE_TRANSFER_DISCARD_RANGE |
1335 PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) &&
1336 !(usage & PIPE_TRANSFER_PERSISTENT) &&
1337 /* Try not to decrement the counter if it's not positive. Still racy,
1338 * but it makes it harder to wrap the counter from INT_MIN to INT_MAX. */
1339 tres->max_forced_staging_uploads > 0 &&
1340 p_atomic_dec_return(&tres->max_forced_staging_uploads) >= 0) {
1341 usage &= ~(PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
1342 PIPE_TRANSFER_UNSYNCHRONIZED);
1343
1344 return usage | tc_flags | PIPE_TRANSFER_DISCARD_RANGE;
1345 }
1346
1347 /* Sparse buffers can't be mapped directly and can't be reallocated
1348 * (fully invalidated). That may just be a radeonsi limitation, but
1349 * the threaded context must obey it with radeonsi.
1350 */
1351 if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
1352 /* We can use DISCARD_RANGE instead of full discard. This is the only
1353 * fast path for sparse buffers that doesn't need thread synchronization.
1354 */
1355 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
1356 usage |= PIPE_TRANSFER_DISCARD_RANGE;
1357
1358 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
1359 * The threaded context doesn't do unsychronized mappings and invalida-
1360 * tions of sparse buffers, therefore a correct driver behavior won't
1361 * result in an incorrect behavior with the threaded context.
1362 */
1363 return usage;
1364 }
1365
1366 usage |= tc_flags;
1367
1368 /* Handle CPU reads trivially. */
1369 if (usage & PIPE_TRANSFER_READ) {
1370 /* Drivers aren't allowed to do buffer invalidations. */
1371 return usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1372 }
1373
1374 /* See if the buffer range being mapped has never been initialized,
1375 * in which case it can be mapped unsynchronized. */
1376 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
1377 !tres->is_shared &&
1378 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size))
1379 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1380
1381 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1382 /* If discarding the entire range, discard the whole resource instead. */
1383 if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
1384 offset == 0 && size == tres->b.width0)
1385 usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1386
1387 /* Discard the whole resource if needed. */
1388 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
1389 if (tc_invalidate_buffer(tc, tres))
1390 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1391 else
1392 usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */
1393 }
1394 }
1395
1396 /* We won't need this flag anymore. */
1397 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
1398 usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1399
1400 /* GL_AMD_pinned_memory and persistent mappings can't use staging
1401 * buffers. */
1402 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1403 PIPE_TRANSFER_PERSISTENT) ||
1404 tres->is_user_ptr)
1405 usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
1406
1407 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1408 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
1409 usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
1410 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
1411 }
1412
1413 return usage;
1414 }
1415
1416 static void *
1417 tc_transfer_map(struct pipe_context *_pipe,
1418 struct pipe_resource *resource, unsigned level,
1419 unsigned usage, const struct pipe_box *box,
1420 struct pipe_transfer **transfer)
1421 {
1422 struct threaded_context *tc = threaded_context(_pipe);
1423 struct threaded_resource *tres = threaded_resource(resource);
1424 struct pipe_context *pipe = tc->pipe;
1425
1426 if (resource->target == PIPE_BUFFER) {
1427 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
1428
1429 /* Do a staging transfer within the threaded context. The driver should
1430 * only get resource_copy_region.
1431 */
1432 if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
1433 struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
1434 uint8_t *map;
1435
1436 ttrans->staging = NULL;
1437
1438 u_upload_alloc(tc->base.stream_uploader, 0,
1439 box->width + (box->x % tc->map_buffer_alignment),
1440 64, &ttrans->offset, &ttrans->staging, (void**)&map);
1441 if (!map) {
1442 slab_free(&tc->pool_transfers, ttrans);
1443 return NULL;
1444 }
1445
1446 tc_set_resource_reference(&ttrans->b.resource, resource);
1447 ttrans->b.level = 0;
1448 ttrans->b.usage = usage;
1449 ttrans->b.box = *box;
1450 ttrans->b.stride = 0;
1451 ttrans->b.layer_stride = 0;
1452 *transfer = &ttrans->b;
1453 return map + (box->x % tc->map_buffer_alignment);
1454 }
1455 }
1456
1457 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1458 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
1459 tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
1460 usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" :
1461 usage & PIPE_TRANSFER_READ ? " read" : " ??");
1462
1463 return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource,
1464 level, usage, box, transfer);
1465 }
1466
1467 struct tc_transfer_flush_region {
1468 struct pipe_transfer *transfer;
1469 struct pipe_box box;
1470 };
1471
1472 static void
1473 tc_call_transfer_flush_region(struct pipe_context *pipe,
1474 union tc_payload *payload)
1475 {
1476 struct tc_transfer_flush_region *p =
1477 (struct tc_transfer_flush_region *)payload;
1478
1479 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
1480 }
1481
1482 struct tc_resource_copy_region {
1483 struct pipe_resource *dst;
1484 unsigned dst_level;
1485 unsigned dstx, dsty, dstz;
1486 struct pipe_resource *src;
1487 unsigned src_level;
1488 struct pipe_box src_box;
1489 };
1490
1491 static void
1492 tc_resource_copy_region(struct pipe_context *_pipe,
1493 struct pipe_resource *dst, unsigned dst_level,
1494 unsigned dstx, unsigned dsty, unsigned dstz,
1495 struct pipe_resource *src, unsigned src_level,
1496 const struct pipe_box *src_box);
1497
1498 static void
1499 tc_buffer_do_flush_region(struct threaded_context *tc,
1500 struct threaded_transfer *ttrans,
1501 const struct pipe_box *box)
1502 {
1503 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
1504
1505 if (ttrans->staging) {
1506 struct pipe_box src_box;
1507
1508 u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
1509 box->width, &src_box);
1510
1511 /* Copy the staging buffer into the original one. */
1512 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
1513 ttrans->staging, 0, &src_box);
1514 }
1515
1516 util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
1517 }
1518
1519 static void
1520 tc_transfer_flush_region(struct pipe_context *_pipe,
1521 struct pipe_transfer *transfer,
1522 const struct pipe_box *rel_box)
1523 {
1524 struct threaded_context *tc = threaded_context(_pipe);
1525 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1526 struct threaded_resource *tres = threaded_resource(transfer->resource);
1527 unsigned required_usage = PIPE_TRANSFER_WRITE |
1528 PIPE_TRANSFER_FLUSH_EXPLICIT;
1529
1530 if (tres->b.target == PIPE_BUFFER) {
1531 if ((transfer->usage & required_usage) == required_usage) {
1532 struct pipe_box box;
1533
1534 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
1535 tc_buffer_do_flush_region(tc, ttrans, &box);
1536 }
1537
1538 /* Staging transfers don't send the call to the driver. */
1539 if (ttrans->staging)
1540 return;
1541 }
1542
1543 struct tc_transfer_flush_region *p =
1544 tc_add_struct_typed_call(tc, TC_CALL_transfer_flush_region,
1545 tc_transfer_flush_region);
1546 p->transfer = transfer;
1547 p->box = *rel_box;
1548 }
1549
1550 static void
1551 tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
1552 {
1553 pipe->transfer_unmap(pipe, payload->transfer);
1554 }
1555
1556 static void
1557 tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
1558 {
1559 struct threaded_context *tc = threaded_context(_pipe);
1560 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1561 struct threaded_resource *tres = threaded_resource(transfer->resource);
1562
1563 if (tres->b.target == PIPE_BUFFER) {
1564 if (transfer->usage & PIPE_TRANSFER_WRITE &&
1565 !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1566 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
1567
1568 /* Staging transfers don't send the call to the driver. */
1569 if (ttrans->staging) {
1570 pipe_resource_reference(&ttrans->staging, NULL);
1571 pipe_resource_reference(&ttrans->b.resource, NULL);
1572 slab_free(&tc->pool_transfers, ttrans);
1573 return;
1574 }
1575 }
1576
1577 tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
1578 }
1579
1580 struct tc_buffer_subdata {
1581 struct pipe_resource *resource;
1582 unsigned usage, offset, size;
1583 char slot[0]; /* more will be allocated if needed */
1584 };
1585
1586 static void
1587 tc_call_buffer_subdata(struct pipe_context *pipe, union tc_payload *payload)
1588 {
1589 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)payload;
1590
1591 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
1592 p->slot);
1593 pipe_resource_reference(&p->resource, NULL);
1594 }
1595
1596 static void
1597 tc_buffer_subdata(struct pipe_context *_pipe,
1598 struct pipe_resource *resource,
1599 unsigned usage, unsigned offset,
1600 unsigned size, const void *data)
1601 {
1602 struct threaded_context *tc = threaded_context(_pipe);
1603 struct threaded_resource *tres = threaded_resource(resource);
1604
1605 if (!size)
1606 return;
1607
1608 usage |= PIPE_TRANSFER_WRITE |
1609 PIPE_TRANSFER_DISCARD_RANGE;
1610
1611 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
1612
1613 /* Unsychronized and big transfers should use transfer_map. Also handle
1614 * full invalidations, because drivers aren't allowed to do them.
1615 */
1616 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1617 PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) ||
1618 size > TC_MAX_SUBDATA_BYTES) {
1619 struct pipe_transfer *transfer;
1620 struct pipe_box box;
1621 uint8_t *map = NULL;
1622
1623 u_box_1d(offset, size, &box);
1624
1625 map = tc_transfer_map(_pipe, resource, 0, usage, &box, &transfer);
1626 if (map) {
1627 memcpy(map, data, size);
1628 tc_transfer_unmap(_pipe, transfer);
1629 }
1630 return;
1631 }
1632
1633 util_range_add(&tres->valid_buffer_range, offset, offset + size);
1634
1635 /* The upload is small. Enqueue it. */
1636 struct tc_buffer_subdata *p =
1637 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
1638
1639 tc_set_resource_reference(&p->resource, resource);
1640 p->usage = usage;
1641 p->offset = offset;
1642 p->size = size;
1643 memcpy(p->slot, data, size);
1644 }
1645
1646 struct tc_texture_subdata {
1647 struct pipe_resource *resource;
1648 unsigned level, usage, stride, layer_stride;
1649 struct pipe_box box;
1650 char slot[0]; /* more will be allocated if needed */
1651 };
1652
1653 static void
1654 tc_call_texture_subdata(struct pipe_context *pipe, union tc_payload *payload)
1655 {
1656 struct tc_texture_subdata *p = (struct tc_texture_subdata *)payload;
1657
1658 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
1659 p->slot, p->stride, p->layer_stride);
1660 pipe_resource_reference(&p->resource, NULL);
1661 }
1662
1663 static void
1664 tc_texture_subdata(struct pipe_context *_pipe,
1665 struct pipe_resource *resource,
1666 unsigned level, unsigned usage,
1667 const struct pipe_box *box,
1668 const void *data, unsigned stride,
1669 unsigned layer_stride)
1670 {
1671 struct threaded_context *tc = threaded_context(_pipe);
1672 unsigned size;
1673
1674 assert(box->height >= 1);
1675 assert(box->depth >= 1);
1676
1677 size = (box->depth - 1) * layer_stride +
1678 (box->height - 1) * stride +
1679 box->width * util_format_get_blocksize(resource->format);
1680 if (!size)
1681 return;
1682
1683 /* Small uploads can be enqueued, big uploads must sync. */
1684 if (size <= TC_MAX_SUBDATA_BYTES) {
1685 struct tc_texture_subdata *p =
1686 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
1687
1688 tc_set_resource_reference(&p->resource, resource);
1689 p->level = level;
1690 p->usage = usage;
1691 p->box = *box;
1692 p->stride = stride;
1693 p->layer_stride = layer_stride;
1694 memcpy(p->slot, data, size);
1695 } else {
1696 struct pipe_context *pipe = tc->pipe;
1697
1698 tc_sync(tc);
1699 pipe->texture_subdata(pipe, resource, level, usage, box, data,
1700 stride, layer_stride);
1701 }
1702 }
1703
1704
1705 /********************************************************************
1706 * miscellaneous
1707 */
1708
1709 #define TC_FUNC_SYNC_RET0(ret_type, func) \
1710 static ret_type \
1711 tc_##func(struct pipe_context *_pipe) \
1712 { \
1713 struct threaded_context *tc = threaded_context(_pipe); \
1714 struct pipe_context *pipe = tc->pipe; \
1715 tc_sync(tc); \
1716 return pipe->func(pipe); \
1717 }
1718
1719 TC_FUNC_SYNC_RET0(enum pipe_reset_status, get_device_reset_status)
1720 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
1721
1722 static void
1723 tc_get_sample_position(struct pipe_context *_pipe,
1724 unsigned sample_count, unsigned sample_index,
1725 float *out_value)
1726 {
1727 struct threaded_context *tc = threaded_context(_pipe);
1728 struct pipe_context *pipe = tc->pipe;
1729
1730 tc_sync(tc);
1731 pipe->get_sample_position(pipe, sample_count, sample_index,
1732 out_value);
1733 }
1734
1735 static void
1736 tc_set_device_reset_callback(struct pipe_context *_pipe,
1737 const struct pipe_device_reset_callback *cb)
1738 {
1739 struct threaded_context *tc = threaded_context(_pipe);
1740 struct pipe_context *pipe = tc->pipe;
1741
1742 tc_sync(tc);
1743 pipe->set_device_reset_callback(pipe, cb);
1744 }
1745
1746 struct tc_string_marker {
1747 int len;
1748 char slot[0]; /* more will be allocated if needed */
1749 };
1750
1751 static void
1752 tc_call_emit_string_marker(struct pipe_context *pipe, union tc_payload *payload)
1753 {
1754 struct tc_string_marker *p = (struct tc_string_marker *)payload;
1755 pipe->emit_string_marker(pipe, p->slot, p->len);
1756 }
1757
1758 static void
1759 tc_emit_string_marker(struct pipe_context *_pipe,
1760 const char *string, int len)
1761 {
1762 struct threaded_context *tc = threaded_context(_pipe);
1763
1764 if (len <= TC_MAX_STRING_MARKER_BYTES) {
1765 struct tc_string_marker *p =
1766 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
1767
1768 memcpy(p->slot, string, len);
1769 p->len = len;
1770 } else {
1771 struct pipe_context *pipe = tc->pipe;
1772
1773 tc_sync(tc);
1774 pipe->emit_string_marker(pipe, string, len);
1775 }
1776 }
1777
1778 static void
1779 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
1780 unsigned flags)
1781 {
1782 struct threaded_context *tc = threaded_context(_pipe);
1783 struct pipe_context *pipe = tc->pipe;
1784
1785 tc_sync(tc);
1786 pipe->dump_debug_state(pipe, stream, flags);
1787 }
1788
1789 static void
1790 tc_set_debug_callback(struct pipe_context *_pipe,
1791 const struct pipe_debug_callback *cb)
1792 {
1793 struct threaded_context *tc = threaded_context(_pipe);
1794 struct pipe_context *pipe = tc->pipe;
1795
1796 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
1797 * with this. shader-db will use an environment variable to disable
1798 * the threaded context.
1799 */
1800 if (cb && cb->debug_message && !cb->async)
1801 return;
1802
1803 tc_sync(tc);
1804 pipe->set_debug_callback(pipe, cb);
1805 }
1806
1807 static void
1808 tc_create_fence_fd(struct pipe_context *_pipe,
1809 struct pipe_fence_handle **fence, int fd)
1810 {
1811 struct threaded_context *tc = threaded_context(_pipe);
1812 struct pipe_context *pipe = tc->pipe;
1813
1814 tc_sync(tc);
1815 pipe->create_fence_fd(pipe, fence, fd);
1816 }
1817
1818 static void
1819 tc_fence_server_sync(struct pipe_context *_pipe,
1820 struct pipe_fence_handle *fence)
1821 {
1822 struct threaded_context *tc = threaded_context(_pipe);
1823 struct pipe_context *pipe = tc->pipe;
1824
1825 tc_sync(tc);
1826 pipe->fence_server_sync(pipe, fence);
1827 }
1828
1829 static struct pipe_video_codec *
1830 tc_create_video_codec(struct pipe_context *_pipe,
1831 const struct pipe_video_codec *templ)
1832 {
1833 unreachable("Threaded context should not be enabled for video APIs");
1834 return NULL;
1835 }
1836
1837 static struct pipe_video_buffer *
1838 tc_create_video_buffer(struct pipe_context *_pipe,
1839 const struct pipe_video_buffer *templ)
1840 {
1841 unreachable("Threaded context should not be enabled for video APIs");
1842 return NULL;
1843 }
1844
1845
1846 /********************************************************************
1847 * draw, launch, clear, blit, copy, flush
1848 */
1849
1850 struct tc_flush_payload {
1851 struct threaded_context *tc;
1852 struct pipe_fence_handle *fence;
1853 unsigned flags;
1854 };
1855
1856 static void
1857 tc_flush_queries(struct threaded_context *tc)
1858 {
1859 struct threaded_query *tq, *tmp;
1860 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
1861 LIST_DEL(&tq->head_unflushed);
1862
1863 /* Memory release semantics: due to a possible race with
1864 * tc_get_query_result, we must ensure that the linked list changes
1865 * are visible before setting tq->flushed.
1866 */
1867 p_atomic_set(&tq->flushed, true);
1868 }
1869 }
1870
1871 static void
1872 tc_call_flush(struct pipe_context *pipe, union tc_payload *payload)
1873 {
1874 struct tc_flush_payload *p = (struct tc_flush_payload *)payload;
1875 struct pipe_screen *screen = pipe->screen;
1876
1877 pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
1878 screen->fence_reference(screen, &p->fence, NULL);
1879
1880 if (!(p->flags & PIPE_FLUSH_DEFERRED))
1881 tc_flush_queries(p->tc);
1882 }
1883
1884 static void
1885 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
1886 unsigned flags)
1887 {
1888 struct threaded_context *tc = threaded_context(_pipe);
1889 struct pipe_context *pipe = tc->pipe;
1890 struct pipe_screen *screen = pipe->screen;
1891 bool async = flags & PIPE_FLUSH_DEFERRED;
1892
1893 if (flags & PIPE_FLUSH_ASYNC) {
1894 struct tc_batch *last = &tc->batch_slots[tc->last];
1895
1896 /* Prefer to do the flush in the driver thread, but avoid the inter-thread
1897 * communication overhead if the driver thread is currently idle and the
1898 * caller is going to wait for the fence immediately anyway.
1899 */
1900 if (!(util_queue_fence_is_signalled(&last->fence) &&
1901 (flags & PIPE_FLUSH_HINT_FINISH)))
1902 async = true;
1903 }
1904
1905 if (async && tc->create_fence) {
1906 if (fence) {
1907 struct tc_unflushed_batch_token *token = NULL;
1908 struct tc_batch *next = &tc->batch_slots[tc->next];
1909
1910 if (!next->token) {
1911 next->token = malloc(sizeof(*next->token));
1912 if (!next->token)
1913 goto out_of_memory;
1914
1915 pipe_reference_init(&next->token->ref, 1);
1916 next->token->tc = tc;
1917 }
1918
1919 screen->fence_reference(screen, fence, tc->create_fence(pipe, token));
1920 if (!*fence)
1921 goto out_of_memory;
1922 }
1923
1924 struct tc_flush_payload *p =
1925 tc_add_struct_typed_call(tc, TC_CALL_flush, tc_flush_payload);
1926 p->tc = tc;
1927 p->fence = fence ? *fence : NULL;
1928 p->flags = flags | TC_FLUSH_ASYNC;
1929
1930 if (!(flags & PIPE_FLUSH_DEFERRED))
1931 tc_batch_flush(tc);
1932 return;
1933 }
1934
1935 out_of_memory:
1936 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
1937 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
1938
1939 if (!(flags & PIPE_FLUSH_DEFERRED))
1940 tc_flush_queries(tc);
1941 pipe->flush(pipe, fence, flags);
1942 }
1943
1944 /* This is actually variable-sized, because indirect isn't allocated if it's
1945 * not needed. */
1946 struct tc_full_draw_info {
1947 struct pipe_draw_info draw;
1948 struct pipe_draw_indirect_info indirect;
1949 };
1950
1951 static void
1952 tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload)
1953 {
1954 struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload;
1955
1956 pipe->draw_vbo(pipe, &info->draw);
1957 pipe_so_target_reference(&info->draw.count_from_stream_output, NULL);
1958 if (info->draw.index_size)
1959 pipe_resource_reference(&info->draw.index.resource, NULL);
1960 if (info->draw.indirect) {
1961 pipe_resource_reference(&info->indirect.buffer, NULL);
1962 pipe_resource_reference(&info->indirect.indirect_draw_count, NULL);
1963 }
1964 }
1965
1966 static struct tc_full_draw_info *
1967 tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect)
1968 {
1969 return (struct tc_full_draw_info*)
1970 tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo,
1971 indirect ? sizeof(struct tc_full_draw_info) :
1972 sizeof(struct pipe_draw_info));
1973 }
1974
1975 static void
1976 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info)
1977 {
1978 struct threaded_context *tc = threaded_context(_pipe);
1979 struct pipe_draw_indirect_info *indirect = info->indirect;
1980 unsigned index_size = info->index_size;
1981 bool has_user_indices = info->has_user_indices;
1982
1983 if (index_size && has_user_indices) {
1984 unsigned size = info->count * index_size;
1985 struct pipe_resource *buffer = NULL;
1986 unsigned offset;
1987
1988 tc_assert(!indirect);
1989
1990 /* This must be done before adding draw_vbo, because it could generate
1991 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
1992 * to the driver if it was done afterwards.
1993 */
1994 u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user,
1995 &offset, &buffer);
1996 if (unlikely(!buffer))
1997 return;
1998
1999 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false);
2000 p->draw.count_from_stream_output = NULL;
2001 pipe_so_target_reference(&p->draw.count_from_stream_output,
2002 info->count_from_stream_output);
2003 memcpy(&p->draw, info, sizeof(*info));
2004 p->draw.has_user_indices = false;
2005 p->draw.index.resource = buffer;
2006 p->draw.start = offset / index_size;
2007 } else {
2008 /* Non-indexed call or indexed with a real index buffer. */
2009 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL);
2010 p->draw.count_from_stream_output = NULL;
2011 pipe_so_target_reference(&p->draw.count_from_stream_output,
2012 info->count_from_stream_output);
2013 if (index_size) {
2014 tc_set_resource_reference(&p->draw.index.resource,
2015 info->index.resource);
2016 }
2017 memcpy(&p->draw, info, sizeof(*info));
2018
2019 if (indirect) {
2020 tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer);
2021 tc_set_resource_reference(&p->indirect.indirect_draw_count,
2022 indirect->indirect_draw_count);
2023 memcpy(&p->indirect, indirect, sizeof(*indirect));
2024 p->draw.indirect = &p->indirect;
2025 }
2026 }
2027 }
2028
2029 static void
2030 tc_call_launch_grid(struct pipe_context *pipe, union tc_payload *payload)
2031 {
2032 struct pipe_grid_info *p = (struct pipe_grid_info *)payload;
2033
2034 pipe->launch_grid(pipe, p);
2035 pipe_resource_reference(&p->indirect, NULL);
2036 }
2037
2038 static void
2039 tc_launch_grid(struct pipe_context *_pipe,
2040 const struct pipe_grid_info *info)
2041 {
2042 struct threaded_context *tc = threaded_context(_pipe);
2043 struct pipe_grid_info *p = tc_add_struct_typed_call(tc, TC_CALL_launch_grid,
2044 pipe_grid_info);
2045 assert(info->input == NULL);
2046
2047 tc_set_resource_reference(&p->indirect, info->indirect);
2048 memcpy(p, info, sizeof(*info));
2049 }
2050
2051 static void
2052 tc_call_resource_copy_region(struct pipe_context *pipe, union tc_payload *payload)
2053 {
2054 struct tc_resource_copy_region *p = (struct tc_resource_copy_region *)payload;
2055
2056 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
2057 p->dstz, p->src, p->src_level, &p->src_box);
2058 pipe_resource_reference(&p->dst, NULL);
2059 pipe_resource_reference(&p->src, NULL);
2060 }
2061
2062 static void
2063 tc_resource_copy_region(struct pipe_context *_pipe,
2064 struct pipe_resource *dst, unsigned dst_level,
2065 unsigned dstx, unsigned dsty, unsigned dstz,
2066 struct pipe_resource *src, unsigned src_level,
2067 const struct pipe_box *src_box)
2068 {
2069 struct threaded_context *tc = threaded_context(_pipe);
2070 struct threaded_resource *tdst = threaded_resource(dst);
2071 struct tc_resource_copy_region *p =
2072 tc_add_struct_typed_call(tc, TC_CALL_resource_copy_region,
2073 tc_resource_copy_region);
2074
2075 tc_set_resource_reference(&p->dst, dst);
2076 p->dst_level = dst_level;
2077 p->dstx = dstx;
2078 p->dsty = dsty;
2079 p->dstz = dstz;
2080 tc_set_resource_reference(&p->src, src);
2081 p->src_level = src_level;
2082 p->src_box = *src_box;
2083
2084 if (dst->target == PIPE_BUFFER)
2085 util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
2086 }
2087
2088 static void
2089 tc_call_blit(struct pipe_context *pipe, union tc_payload *payload)
2090 {
2091 struct pipe_blit_info *blit = (struct pipe_blit_info*)payload;
2092
2093 pipe->blit(pipe, blit);
2094 pipe_resource_reference(&blit->dst.resource, NULL);
2095 pipe_resource_reference(&blit->src.resource, NULL);
2096 }
2097
2098 static void
2099 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
2100 {
2101 struct threaded_context *tc = threaded_context(_pipe);
2102 struct pipe_blit_info *blit =
2103 tc_add_struct_typed_call(tc, TC_CALL_blit, pipe_blit_info);
2104
2105 tc_set_resource_reference(&blit->dst.resource, info->dst.resource);
2106 tc_set_resource_reference(&blit->src.resource, info->src.resource);
2107 memcpy(blit, info, sizeof(*info));
2108 }
2109
2110 struct tc_generate_mipmap {
2111 struct pipe_resource *res;
2112 enum pipe_format format;
2113 unsigned base_level;
2114 unsigned last_level;
2115 unsigned first_layer;
2116 unsigned last_layer;
2117 };
2118
2119 static void
2120 tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
2121 {
2122 struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
2123 bool MAYBE_UNUSED result = pipe->generate_mipmap(pipe, p->res, p->format,
2124 p->base_level,
2125 p->last_level,
2126 p->first_layer,
2127 p->last_layer);
2128 assert(result);
2129 pipe_resource_reference(&p->res, NULL);
2130 }
2131
2132 static boolean
2133 tc_generate_mipmap(struct pipe_context *_pipe,
2134 struct pipe_resource *res,
2135 enum pipe_format format,
2136 unsigned base_level,
2137 unsigned last_level,
2138 unsigned first_layer,
2139 unsigned last_layer)
2140 {
2141 struct threaded_context *tc = threaded_context(_pipe);
2142 struct pipe_context *pipe = tc->pipe;
2143 struct pipe_screen *screen = pipe->screen;
2144 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
2145
2146 if (util_format_is_depth_or_stencil(format))
2147 bind = PIPE_BIND_DEPTH_STENCIL;
2148 else
2149 bind = PIPE_BIND_RENDER_TARGET;
2150
2151 if (!screen->is_format_supported(screen, format, res->target,
2152 res->nr_samples, bind))
2153 return false;
2154
2155 struct tc_generate_mipmap *p =
2156 tc_add_struct_typed_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
2157
2158 tc_set_resource_reference(&p->res, res);
2159 p->format = format;
2160 p->base_level = base_level;
2161 p->last_level = last_level;
2162 p->first_layer = first_layer;
2163 p->last_layer = last_layer;
2164 return true;
2165 }
2166
2167 static void
2168 tc_call_flush_resource(struct pipe_context *pipe, union tc_payload *payload)
2169 {
2170 pipe->flush_resource(pipe, payload->resource);
2171 pipe_resource_reference(&payload->resource, NULL);
2172 }
2173
2174 static void
2175 tc_flush_resource(struct pipe_context *_pipe,
2176 struct pipe_resource *resource)
2177 {
2178 struct threaded_context *tc = threaded_context(_pipe);
2179 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_flush_resource);
2180
2181 tc_set_resource_reference(&payload->resource, resource);
2182 }
2183
2184 static void
2185 tc_call_invalidate_resource(struct pipe_context *pipe, union tc_payload *payload)
2186 {
2187 pipe->invalidate_resource(pipe, payload->resource);
2188 pipe_resource_reference(&payload->resource, NULL);
2189 }
2190
2191 static void
2192 tc_invalidate_resource(struct pipe_context *_pipe,
2193 struct pipe_resource *resource)
2194 {
2195 struct threaded_context *tc = threaded_context(_pipe);
2196
2197 if (resource->target == PIPE_BUFFER) {
2198 tc_invalidate_buffer(tc, threaded_resource(resource));
2199 return;
2200 }
2201
2202 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_invalidate_resource);
2203 tc_set_resource_reference(&payload->resource, resource);
2204 }
2205
2206 struct tc_clear {
2207 unsigned buffers;
2208 union pipe_color_union color;
2209 double depth;
2210 unsigned stencil;
2211 };
2212
2213 static void
2214 tc_call_clear(struct pipe_context *pipe, union tc_payload *payload)
2215 {
2216 struct tc_clear *p = (struct tc_clear *)payload;
2217 pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil);
2218 }
2219
2220 static void
2221 tc_clear(struct pipe_context *_pipe, unsigned buffers,
2222 const union pipe_color_union *color, double depth,
2223 unsigned stencil)
2224 {
2225 struct threaded_context *tc = threaded_context(_pipe);
2226 struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear);
2227
2228 p->buffers = buffers;
2229 p->color = *color;
2230 p->depth = depth;
2231 p->stencil = stencil;
2232 }
2233
2234 static void
2235 tc_clear_render_target(struct pipe_context *_pipe,
2236 struct pipe_surface *dst,
2237 const union pipe_color_union *color,
2238 unsigned dstx, unsigned dsty,
2239 unsigned width, unsigned height,
2240 bool render_condition_enabled)
2241 {
2242 struct threaded_context *tc = threaded_context(_pipe);
2243 struct pipe_context *pipe = tc->pipe;
2244
2245 tc_sync(tc);
2246 pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
2247 render_condition_enabled);
2248 }
2249
2250 static void
2251 tc_clear_depth_stencil(struct pipe_context *_pipe,
2252 struct pipe_surface *dst, unsigned clear_flags,
2253 double depth, unsigned stencil, unsigned dstx,
2254 unsigned dsty, unsigned width, unsigned height,
2255 bool render_condition_enabled)
2256 {
2257 struct threaded_context *tc = threaded_context(_pipe);
2258 struct pipe_context *pipe = tc->pipe;
2259
2260 tc_sync(tc);
2261 pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
2262 dstx, dsty, width, height,
2263 render_condition_enabled);
2264 }
2265
2266 struct tc_clear_buffer {
2267 struct pipe_resource *res;
2268 unsigned offset;
2269 unsigned size;
2270 char clear_value[16];
2271 int clear_value_size;
2272 };
2273
2274 static void
2275 tc_call_clear_buffer(struct pipe_context *pipe, union tc_payload *payload)
2276 {
2277 struct tc_clear_buffer *p = (struct tc_clear_buffer *)payload;
2278
2279 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
2280 p->clear_value_size);
2281 pipe_resource_reference(&p->res, NULL);
2282 }
2283
2284 static void
2285 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
2286 unsigned offset, unsigned size,
2287 const void *clear_value, int clear_value_size)
2288 {
2289 struct threaded_context *tc = threaded_context(_pipe);
2290 struct threaded_resource *tres = threaded_resource(res);
2291 struct tc_clear_buffer *p =
2292 tc_add_struct_typed_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
2293
2294 tc_set_resource_reference(&p->res, res);
2295 p->offset = offset;
2296 p->size = size;
2297 memcpy(p->clear_value, clear_value, clear_value_size);
2298 p->clear_value_size = clear_value_size;
2299
2300 util_range_add(&tres->valid_buffer_range, offset, offset + size);
2301 }
2302
2303 struct tc_clear_texture {
2304 struct pipe_resource *res;
2305 unsigned level;
2306 struct pipe_box box;
2307 char data[16];
2308 };
2309
2310 static void
2311 tc_call_clear_texture(struct pipe_context *pipe, union tc_payload *payload)
2312 {
2313 struct tc_clear_texture *p = (struct tc_clear_texture *)payload;
2314
2315 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
2316 pipe_resource_reference(&p->res, NULL);
2317 }
2318
2319 static void
2320 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
2321 unsigned level, const struct pipe_box *box, const void *data)
2322 {
2323 struct threaded_context *tc = threaded_context(_pipe);
2324 struct tc_clear_texture *p =
2325 tc_add_struct_typed_call(tc, TC_CALL_clear_texture, tc_clear_texture);
2326
2327 tc_set_resource_reference(&p->res, res);
2328 p->level = level;
2329 p->box = *box;
2330 memcpy(p->data, data,
2331 util_format_get_blocksize(res->format));
2332 }
2333
2334 struct tc_resource_commit {
2335 struct pipe_resource *res;
2336 unsigned level;
2337 struct pipe_box box;
2338 bool commit;
2339 };
2340
2341 static void
2342 tc_call_resource_commit(struct pipe_context *pipe, union tc_payload *payload)
2343 {
2344 struct tc_resource_commit *p = (struct tc_resource_commit *)payload;
2345
2346 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
2347 pipe_resource_reference(&p->res, NULL);
2348 }
2349
2350 static bool
2351 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
2352 unsigned level, struct pipe_box *box, bool commit)
2353 {
2354 struct threaded_context *tc = threaded_context(_pipe);
2355 struct tc_resource_commit *p =
2356 tc_add_struct_typed_call(tc, TC_CALL_resource_commit, tc_resource_commit);
2357
2358 tc_set_resource_reference(&p->res, res);
2359 p->level = level;
2360 p->box = *box;
2361 p->commit = commit;
2362 return true; /* we don't care about the return value for this call */
2363 }
2364
2365
2366 /********************************************************************
2367 * create & destroy
2368 */
2369
2370 static void
2371 tc_destroy(struct pipe_context *_pipe)
2372 {
2373 struct threaded_context *tc = threaded_context(_pipe);
2374 struct pipe_context *pipe = tc->pipe;
2375
2376 if (tc->base.const_uploader &&
2377 tc->base.stream_uploader != tc->base.const_uploader)
2378 u_upload_destroy(tc->base.const_uploader);
2379
2380 if (tc->base.stream_uploader)
2381 u_upload_destroy(tc->base.stream_uploader);
2382
2383 tc_sync(tc);
2384
2385 if (util_queue_is_initialized(&tc->queue)) {
2386 util_queue_destroy(&tc->queue);
2387
2388 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
2389 util_queue_fence_destroy(&tc->batch_slots[i].fence);
2390 assert(!tc->batch_slots[i].token);
2391 }
2392 }
2393
2394 slab_destroy_child(&tc->pool_transfers);
2395 assert(tc->batch_slots[tc->next].num_total_call_slots == 0);
2396 pipe->destroy(pipe);
2397 os_free_aligned(tc);
2398 }
2399
2400 static const tc_execute execute_func[TC_NUM_CALLS] = {
2401 #define CALL(name) tc_call_##name,
2402 #include "u_threaded_context_calls.h"
2403 #undef CALL
2404 };
2405
2406 /**
2407 * Wrap an existing pipe_context into a threaded_context.
2408 *
2409 * \param pipe pipe_context to wrap
2410 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
2411 * transfer objects; the driver should have one
2412 * in pipe_screen.
2413 * \param replace_buffer callback for replacing a pipe_resource's storage
2414 * with another pipe_resource's storage.
2415 * \param out if successful, the threaded_context will be returned here in
2416 * addition to the return value if "out" != NULL
2417 */
2418 struct pipe_context *
2419 threaded_context_create(struct pipe_context *pipe,
2420 struct slab_parent_pool *parent_transfer_pool,
2421 tc_replace_buffer_storage_func replace_buffer,
2422 tc_create_fence_func create_fence,
2423 struct threaded_context **out)
2424 {
2425 struct threaded_context *tc;
2426
2427 STATIC_ASSERT(sizeof(union tc_payload) <= 8);
2428 STATIC_ASSERT(sizeof(struct tc_call) <= 16);
2429
2430 if (!pipe)
2431 return NULL;
2432
2433 util_cpu_detect();
2434
2435 if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1))
2436 return pipe;
2437
2438 tc = os_malloc_aligned(sizeof(struct threaded_context), 16);
2439 if (!tc) {
2440 pipe->destroy(pipe);
2441 return NULL;
2442 }
2443 memset(tc, 0, sizeof(*tc));
2444
2445 assert((uintptr_t)tc % 16 == 0);
2446 /* These should be static asserts, but they don't work with MSVC */
2447 assert(offsetof(struct threaded_context, batch_slots) % 16 == 0);
2448 assert(offsetof(struct threaded_context, batch_slots[0].call) % 16 == 0);
2449 assert(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0);
2450 assert(offsetof(struct threaded_context, batch_slots[1].call) % 16 == 0);
2451
2452 /* The driver context isn't wrapped, so set its "priv" to NULL. */
2453 pipe->priv = NULL;
2454
2455 tc->pipe = pipe;
2456 tc->replace_buffer_storage = replace_buffer;
2457 tc->create_fence = create_fence;
2458 tc->map_buffer_alignment =
2459 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
2460 tc->base.priv = pipe; /* priv points to the wrapped driver context */
2461 tc->base.screen = pipe->screen;
2462 tc->base.destroy = tc_destroy;
2463
2464 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
2465 if (pipe->stream_uploader == pipe->const_uploader)
2466 tc->base.const_uploader = tc->base.stream_uploader;
2467 else
2468 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
2469
2470 if (!tc->base.stream_uploader || !tc->base.const_uploader)
2471 goto fail;
2472
2473 /* The queue size is the number of batches "waiting". Batches are removed
2474 * from the queue before being executed, so keep one tc_batch slot for that
2475 * execution. Also, keep one unused slot for an unflushed batch.
2476 */
2477 if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1, 0))
2478 goto fail;
2479
2480 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
2481 tc->batch_slots[i].sentinel = TC_SENTINEL;
2482 tc->batch_slots[i].pipe = pipe;
2483 util_queue_fence_init(&tc->batch_slots[i].fence);
2484 }
2485
2486 LIST_INITHEAD(&tc->unflushed_queries);
2487
2488 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
2489
2490 #define CTX_INIT(_member) \
2491 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
2492
2493 CTX_INIT(flush);
2494 CTX_INIT(draw_vbo);
2495 CTX_INIT(launch_grid);
2496 CTX_INIT(resource_copy_region);
2497 CTX_INIT(blit);
2498 CTX_INIT(clear);
2499 CTX_INIT(clear_render_target);
2500 CTX_INIT(clear_depth_stencil);
2501 CTX_INIT(clear_buffer);
2502 CTX_INIT(clear_texture);
2503 CTX_INIT(flush_resource);
2504 CTX_INIT(generate_mipmap);
2505 CTX_INIT(render_condition);
2506 CTX_INIT(create_query);
2507 CTX_INIT(create_batch_query);
2508 CTX_INIT(destroy_query);
2509 CTX_INIT(begin_query);
2510 CTX_INIT(end_query);
2511 CTX_INIT(get_query_result);
2512 CTX_INIT(get_query_result_resource);
2513 CTX_INIT(set_active_query_state);
2514 CTX_INIT(create_blend_state);
2515 CTX_INIT(bind_blend_state);
2516 CTX_INIT(delete_blend_state);
2517 CTX_INIT(create_sampler_state);
2518 CTX_INIT(bind_sampler_states);
2519 CTX_INIT(delete_sampler_state);
2520 CTX_INIT(create_rasterizer_state);
2521 CTX_INIT(bind_rasterizer_state);
2522 CTX_INIT(delete_rasterizer_state);
2523 CTX_INIT(create_depth_stencil_alpha_state);
2524 CTX_INIT(bind_depth_stencil_alpha_state);
2525 CTX_INIT(delete_depth_stencil_alpha_state);
2526 CTX_INIT(create_fs_state);
2527 CTX_INIT(bind_fs_state);
2528 CTX_INIT(delete_fs_state);
2529 CTX_INIT(create_vs_state);
2530 CTX_INIT(bind_vs_state);
2531 CTX_INIT(delete_vs_state);
2532 CTX_INIT(create_gs_state);
2533 CTX_INIT(bind_gs_state);
2534 CTX_INIT(delete_gs_state);
2535 CTX_INIT(create_tcs_state);
2536 CTX_INIT(bind_tcs_state);
2537 CTX_INIT(delete_tcs_state);
2538 CTX_INIT(create_tes_state);
2539 CTX_INIT(bind_tes_state);
2540 CTX_INIT(delete_tes_state);
2541 CTX_INIT(create_compute_state);
2542 CTX_INIT(bind_compute_state);
2543 CTX_INIT(delete_compute_state);
2544 CTX_INIT(create_vertex_elements_state);
2545 CTX_INIT(bind_vertex_elements_state);
2546 CTX_INIT(delete_vertex_elements_state);
2547 CTX_INIT(set_blend_color);
2548 CTX_INIT(set_stencil_ref);
2549 CTX_INIT(set_sample_mask);
2550 CTX_INIT(set_min_samples);
2551 CTX_INIT(set_clip_state);
2552 CTX_INIT(set_constant_buffer);
2553 CTX_INIT(set_framebuffer_state);
2554 CTX_INIT(set_polygon_stipple);
2555 CTX_INIT(set_scissor_states);
2556 CTX_INIT(set_viewport_states);
2557 CTX_INIT(set_window_rectangles);
2558 CTX_INIT(set_sampler_views);
2559 CTX_INIT(set_tess_state);
2560 CTX_INIT(set_shader_buffers);
2561 CTX_INIT(set_shader_images);
2562 CTX_INIT(set_vertex_buffers);
2563 CTX_INIT(create_stream_output_target);
2564 CTX_INIT(stream_output_target_destroy);
2565 CTX_INIT(set_stream_output_targets);
2566 CTX_INIT(create_sampler_view);
2567 CTX_INIT(sampler_view_destroy);
2568 CTX_INIT(create_surface);
2569 CTX_INIT(surface_destroy);
2570 CTX_INIT(transfer_map);
2571 CTX_INIT(transfer_flush_region);
2572 CTX_INIT(transfer_unmap);
2573 CTX_INIT(buffer_subdata);
2574 CTX_INIT(texture_subdata);
2575 CTX_INIT(texture_barrier);
2576 CTX_INIT(memory_barrier);
2577 CTX_INIT(resource_commit);
2578 CTX_INIT(create_video_codec);
2579 CTX_INIT(create_video_buffer);
2580 CTX_INIT(set_compute_resources);
2581 CTX_INIT(set_global_binding);
2582 CTX_INIT(get_sample_position);
2583 CTX_INIT(invalidate_resource);
2584 CTX_INIT(get_device_reset_status);
2585 CTX_INIT(set_device_reset_callback);
2586 CTX_INIT(dump_debug_state);
2587 CTX_INIT(emit_string_marker);
2588 CTX_INIT(set_debug_callback);
2589 CTX_INIT(create_fence_fd);
2590 CTX_INIT(fence_server_sync);
2591 CTX_INIT(get_timestamp);
2592 CTX_INIT(create_texture_handle);
2593 CTX_INIT(delete_texture_handle);
2594 CTX_INIT(make_texture_handle_resident);
2595 CTX_INIT(create_image_handle);
2596 CTX_INIT(delete_image_handle);
2597 CTX_INIT(make_image_handle_resident);
2598 #undef CTX_INIT
2599
2600 if (out)
2601 *out = tc;
2602
2603 return &tc->base;
2604
2605 fail:
2606 tc_destroy(&tc->base);
2607 return NULL;
2608 }