gallium/u_threaded: align batches and call slots to 16 bytes
[mesa.git] / src / gallium / auxiliary / util / u_threaded_context.c
1 /**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27 #include "util/u_threaded_context.h"
28 #include "util/u_cpu_detect.h"
29 #include "util/u_format.h"
30 #include "util/u_inlines.h"
31 #include "util/u_memory.h"
32 #include "util/u_upload_mgr.h"
33
34 /* 0 = disabled, 1 = assertions, 2 = printfs */
35 #define TC_DEBUG 0
36
37 #if TC_DEBUG >= 1
38 #define tc_assert assert
39 #else
40 #define tc_assert(x)
41 #endif
42
43 #if TC_DEBUG >= 2
44 #define tc_printf printf
45 #define tc_asprintf asprintf
46 #define tc_strcmp strcmp
47 #else
48 #define tc_printf(...)
49 #define tc_asprintf(...) 0
50 #define tc_strcmp(...) 0
51 #endif
52
53 #define TC_SENTINEL 0x5ca1ab1e
54
55 enum tc_call_id {
56 #define CALL(name) TC_CALL_##name,
57 #include "u_threaded_context_calls.h"
58 #undef CALL
59 TC_NUM_CALLS,
60 };
61
62 typedef void (*tc_execute)(struct pipe_context *pipe, union tc_payload *payload);
63
64 static const tc_execute execute_func[TC_NUM_CALLS];
65
66 static void
67 tc_batch_check(struct tc_batch *batch)
68 {
69 tc_assert(batch->sentinel == TC_SENTINEL);
70 tc_assert(batch->sentinel2 == TC_SENTINEL);
71 tc_assert(batch->num_total_call_slots <= TC_CALLS_PER_BATCH);
72 }
73
74 static void
75 tc_debug_check(struct threaded_context *tc)
76 {
77 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
78 tc_batch_check(&tc->batch_slots[i]);
79 tc_assert(tc->batch_slots[i].pipe == tc->pipe);
80 }
81 }
82
83 static void
84 tc_batch_execute(void *job, int thread_index)
85 {
86 struct tc_batch *batch = job;
87 struct pipe_context *pipe = batch->pipe;
88 struct tc_call *last = &batch->call[batch->num_total_call_slots];
89
90 tc_batch_check(batch);
91
92 for (struct tc_call *iter = batch->call; iter != last;
93 iter += iter->num_call_slots) {
94 tc_assert(iter->sentinel == TC_SENTINEL);
95 execute_func[iter->call_id](pipe, &iter->payload);
96 }
97
98 tc_batch_check(batch);
99 batch->num_total_call_slots = 0;
100 }
101
102 static void
103 tc_batch_flush(struct threaded_context *tc)
104 {
105 struct tc_batch *next = &tc->batch_slots[tc->next];
106
107 tc_assert(next->num_total_call_slots != 0);
108 tc_batch_check(next);
109 tc_debug_check(tc);
110 p_atomic_add(&tc->num_offloaded_slots, next->num_total_call_slots);
111
112 util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
113 NULL);
114 tc->last = tc->next;
115 tc->next = (tc->next + 1) % TC_MAX_BATCHES;
116 }
117
118 /* This is the function that adds variable-sized calls into the current
119 * batch. It also flushes the batch if there is not enough space there.
120 * All other higher-level "add" functions use it.
121 */
122 static union tc_payload *
123 tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
124 unsigned payload_size)
125 {
126 struct tc_batch *next = &tc->batch_slots[tc->next];
127 unsigned total_size = offsetof(struct tc_call, payload) + payload_size;
128 unsigned num_call_slots = DIV_ROUND_UP(total_size, sizeof(struct tc_call));
129
130 tc_debug_check(tc);
131
132 if (unlikely(next->num_total_call_slots + num_call_slots > TC_CALLS_PER_BATCH)) {
133 tc_batch_flush(tc);
134 next = &tc->batch_slots[tc->next];
135 tc_assert(next->num_total_call_slots == 0);
136 }
137
138 tc_assert(util_queue_fence_is_signalled(&next->fence));
139
140 struct tc_call *call = &next->call[next->num_total_call_slots];
141 next->num_total_call_slots += num_call_slots;
142
143 call->sentinel = TC_SENTINEL;
144 call->call_id = id;
145 call->num_call_slots = num_call_slots;
146
147 tc_debug_check(tc);
148 return &call->payload;
149 }
150
151 #define tc_add_struct_typed_call(tc, execute, type) \
152 ((struct type*)tc_add_sized_call(tc, execute, sizeof(struct type)))
153
154 #define tc_add_slot_based_call(tc, execute, type, num_slots) \
155 ((struct type*)tc_add_sized_call(tc, execute, \
156 sizeof(struct type) + \
157 sizeof(((struct type*)NULL)->slot[0]) * \
158 (num_slots)))
159
160 static union tc_payload *
161 tc_add_small_call(struct threaded_context *tc, enum tc_call_id id)
162 {
163 return tc_add_sized_call(tc, id, 0);
164 }
165
166 static void
167 _tc_sync(struct threaded_context *tc, const char *info, const char *func)
168 {
169 struct tc_batch *last = &tc->batch_slots[tc->last];
170 struct tc_batch *next = &tc->batch_slots[tc->next];
171 bool synced = false;
172
173 tc_debug_check(tc);
174
175 /* Only wait for queued calls... */
176 if (!util_queue_fence_is_signalled(&last->fence)) {
177 util_queue_fence_wait(&last->fence);
178 synced = true;
179 }
180
181 tc_debug_check(tc);
182
183 /* .. and execute unflushed calls directly. */
184 if (next->num_total_call_slots) {
185 p_atomic_add(&tc->num_direct_slots, next->num_total_call_slots);
186 tc_batch_execute(next, 0);
187 synced = true;
188 }
189
190 if (synced) {
191 p_atomic_inc(&tc->num_syncs);
192
193 if (tc_strcmp(func, "tc_destroy") != 0)
194 tc_printf("sync %s %s\n", func, info);
195 }
196
197 tc_debug_check(tc);
198 }
199
200 #define tc_sync(tc) _tc_sync(tc, "", __func__)
201 #define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
202
203 static void
204 tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
205 {
206 *dst = NULL;
207 pipe_resource_reference(dst, src);
208 }
209
210 void
211 threaded_resource_init(struct pipe_resource *res)
212 {
213 struct threaded_resource *tres = threaded_resource(res);
214
215 tres->latest = &tres->b;
216 util_range_init(&tres->valid_buffer_range);
217 tres->base_valid_buffer_range = &tres->valid_buffer_range;
218 tres->is_shared = false;
219 tres->is_user_ptr = false;
220 }
221
222 void
223 threaded_resource_deinit(struct pipe_resource *res)
224 {
225 struct threaded_resource *tres = threaded_resource(res);
226
227 if (tres->latest != &tres->b)
228 pipe_resource_reference(&tres->latest, NULL);
229 util_range_destroy(&tres->valid_buffer_range);
230 }
231
232 struct pipe_context *
233 threaded_context_unwrap_sync(struct pipe_context *pipe)
234 {
235 if (!pipe || !pipe->priv)
236 return pipe;
237
238 tc_sync(threaded_context(pipe));
239 return (struct pipe_context*)pipe->priv;
240 }
241
242
243 /********************************************************************
244 * simple functions
245 */
246
247 #define TC_FUNC1(func, m_payload, qualifier, type, deref, deref2) \
248 static void \
249 tc_call_##func(struct pipe_context *pipe, union tc_payload *payload) \
250 { \
251 pipe->func(pipe, deref2((type*)payload)); \
252 } \
253 \
254 static void \
255 tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
256 { \
257 struct threaded_context *tc = threaded_context(_pipe); \
258 type *p = (type*)tc_add_sized_call(tc, TC_CALL_##func, sizeof(type)); \
259 *p = deref(param); \
260 }
261
262 TC_FUNC1(set_active_query_state, flags, , boolean, , *)
263
264 TC_FUNC1(set_blend_color, blend_color, const, struct pipe_blend_color, *, )
265 TC_FUNC1(set_stencil_ref, stencil_ref, const, struct pipe_stencil_ref, *, )
266 TC_FUNC1(set_clip_state, clip_state, const, struct pipe_clip_state, *, )
267 TC_FUNC1(set_sample_mask, sample_mask, , unsigned, , *)
268 TC_FUNC1(set_min_samples, min_samples, , unsigned, , *)
269 TC_FUNC1(set_polygon_stipple, polygon_stipple, const, struct pipe_poly_stipple, *, )
270
271 TC_FUNC1(texture_barrier, flags, , unsigned, , *)
272 TC_FUNC1(memory_barrier, flags, , unsigned, , *)
273
274
275 /********************************************************************
276 * queries
277 */
278
279 static struct pipe_query *
280 tc_create_query(struct pipe_context *_pipe, unsigned query_type,
281 unsigned index)
282 {
283 struct threaded_context *tc = threaded_context(_pipe);
284 struct pipe_context *pipe = tc->pipe;
285
286 return pipe->create_query(pipe, query_type, index);
287 }
288
289 static struct pipe_query *
290 tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
291 unsigned *query_types)
292 {
293 struct threaded_context *tc = threaded_context(_pipe);
294 struct pipe_context *pipe = tc->pipe;
295
296 return pipe->create_batch_query(pipe, num_queries, query_types);
297 }
298
299 static void
300 tc_call_destroy_query(struct pipe_context *pipe, union tc_payload *payload)
301 {
302 pipe->destroy_query(pipe, payload->query);
303 }
304
305 static void
306 tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
307 {
308 struct threaded_context *tc = threaded_context(_pipe);
309 struct threaded_query *tq = threaded_query(query);
310
311 if (tq->head_unflushed.next)
312 LIST_DEL(&tq->head_unflushed);
313
314 tc_add_small_call(tc, TC_CALL_destroy_query)->query = query;
315 }
316
317 static void
318 tc_call_begin_query(struct pipe_context *pipe, union tc_payload *payload)
319 {
320 pipe->begin_query(pipe, payload->query);
321 }
322
323 static boolean
324 tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
325 {
326 struct threaded_context *tc = threaded_context(_pipe);
327 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_begin_query);
328
329 payload->query = query;
330 return true; /* we don't care about the return value for this call */
331 }
332
333 static void
334 tc_call_end_query(struct pipe_context *pipe, union tc_payload *payload)
335 {
336 pipe->end_query(pipe, payload->query);
337 }
338
339 static bool
340 tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
341 {
342 struct threaded_context *tc = threaded_context(_pipe);
343 struct threaded_query *tq = threaded_query(query);
344 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_end_query);
345
346 payload->query = query;
347
348 tq->flushed = false;
349 if (!tq->head_unflushed.next)
350 LIST_ADD(&tq->head_unflushed, &tc->unflushed_queries);
351
352 return true; /* we don't care about the return value for this call */
353 }
354
355 static boolean
356 tc_get_query_result(struct pipe_context *_pipe,
357 struct pipe_query *query, boolean wait,
358 union pipe_query_result *result)
359 {
360 struct threaded_context *tc = threaded_context(_pipe);
361 struct threaded_query *tq = threaded_query(query);
362 struct pipe_context *pipe = tc->pipe;
363
364 if (!tq->flushed)
365 tc_sync_msg(tc, wait ? "wait" : "nowait");
366
367 bool success = pipe->get_query_result(pipe, query, wait, result);
368
369 if (success) {
370 tq->flushed = true;
371 if (tq->head_unflushed.next)
372 LIST_DEL(&tq->head_unflushed);
373 }
374 return success;
375 }
376
377 struct tc_query_result_resource {
378 struct pipe_query *query;
379 boolean wait;
380 enum pipe_query_value_type result_type;
381 int index;
382 struct pipe_resource *resource;
383 unsigned offset;
384 };
385
386 static void
387 tc_call_get_query_result_resource(struct pipe_context *pipe,
388 union tc_payload *payload)
389 {
390 struct tc_query_result_resource *p = (struct tc_query_result_resource *)payload;
391
392 pipe->get_query_result_resource(pipe, p->query, p->wait, p->result_type,
393 p->index, p->resource, p->offset);
394 pipe_resource_reference(&p->resource, NULL);
395 }
396
397 static void
398 tc_get_query_result_resource(struct pipe_context *_pipe,
399 struct pipe_query *query, boolean wait,
400 enum pipe_query_value_type result_type, int index,
401 struct pipe_resource *resource, unsigned offset)
402 {
403 struct threaded_context *tc = threaded_context(_pipe);
404 struct tc_query_result_resource *p =
405 tc_add_struct_typed_call(tc, TC_CALL_get_query_result_resource,
406 tc_query_result_resource);
407
408 p->query = query;
409 p->wait = wait;
410 p->result_type = result_type;
411 p->index = index;
412 tc_set_resource_reference(&p->resource, resource);
413 p->offset = offset;
414 }
415
416 struct tc_render_condition {
417 struct pipe_query *query;
418 bool condition;
419 unsigned mode;
420 };
421
422 static void
423 tc_call_render_condition(struct pipe_context *pipe, union tc_payload *payload)
424 {
425 struct tc_render_condition *p = (struct tc_render_condition *)payload;
426 pipe->render_condition(pipe, p->query, p->condition, p->mode);
427 }
428
429 static void
430 tc_render_condition(struct pipe_context *_pipe,
431 struct pipe_query *query, boolean condition,
432 uint mode)
433 {
434 struct threaded_context *tc = threaded_context(_pipe);
435 struct tc_render_condition *p =
436 tc_add_struct_typed_call(tc, TC_CALL_render_condition, tc_render_condition);
437
438 p->query = query;
439 p->condition = condition;
440 p->mode = mode;
441 }
442
443
444 /********************************************************************
445 * constant (immutable) states
446 */
447
448 #define TC_CSO_CREATE(name, sname) \
449 static void * \
450 tc_create_##name##_state(struct pipe_context *_pipe, \
451 const struct pipe_##sname##_state *state) \
452 { \
453 struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
454 return pipe->create_##name##_state(pipe, state); \
455 }
456
457 #define TC_CSO_BIND(name) TC_FUNC1(bind_##name##_state, cso, , void *, , *)
458 #define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, cso, , void *, , *)
459
460 #define TC_CSO_WHOLE2(name, sname) \
461 TC_CSO_CREATE(name, sname) \
462 TC_CSO_BIND(name) \
463 TC_CSO_DELETE(name)
464
465 #define TC_CSO_WHOLE(name) TC_CSO_WHOLE2(name, name)
466
467 TC_CSO_WHOLE(blend)
468 TC_CSO_WHOLE(rasterizer)
469 TC_CSO_WHOLE(depth_stencil_alpha)
470 TC_CSO_WHOLE(compute)
471 TC_CSO_WHOLE2(fs, shader)
472 TC_CSO_WHOLE2(vs, shader)
473 TC_CSO_WHOLE2(gs, shader)
474 TC_CSO_WHOLE2(tcs, shader)
475 TC_CSO_WHOLE2(tes, shader)
476 TC_CSO_CREATE(sampler, sampler)
477 TC_CSO_DELETE(sampler)
478 TC_CSO_BIND(vertex_elements)
479 TC_CSO_DELETE(vertex_elements)
480
481 static void *
482 tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
483 const struct pipe_vertex_element *elems)
484 {
485 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
486
487 return pipe->create_vertex_elements_state(pipe, count, elems);
488 }
489
490 struct tc_sampler_states {
491 ubyte shader, start, count;
492 void *slot[0]; /* more will be allocated if needed */
493 };
494
495 static void
496 tc_call_bind_sampler_states(struct pipe_context *pipe, union tc_payload *payload)
497 {
498 struct tc_sampler_states *p = (struct tc_sampler_states *)payload;
499 pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
500 }
501
502 static void
503 tc_bind_sampler_states(struct pipe_context *_pipe,
504 enum pipe_shader_type shader,
505 unsigned start, unsigned count, void **states)
506 {
507 if (!count)
508 return;
509
510 struct threaded_context *tc = threaded_context(_pipe);
511 struct tc_sampler_states *p =
512 tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
513
514 p->shader = shader;
515 p->start = start;
516 p->count = count;
517 memcpy(p->slot, states, count * sizeof(states[0]));
518 }
519
520
521 /********************************************************************
522 * immediate states
523 */
524
525 static void
526 tc_call_set_framebuffer_state(struct pipe_context *pipe, union tc_payload *payload)
527 {
528 struct pipe_framebuffer_state *p = (struct pipe_framebuffer_state *)payload;
529
530 pipe->set_framebuffer_state(pipe, p);
531
532 unsigned nr_cbufs = p->nr_cbufs;
533 for (unsigned i = 0; i < nr_cbufs; i++)
534 pipe_surface_reference(&p->cbufs[i], NULL);
535 pipe_surface_reference(&p->zsbuf, NULL);
536 }
537
538 static void
539 tc_set_framebuffer_state(struct pipe_context *_pipe,
540 const struct pipe_framebuffer_state *fb)
541 {
542 struct threaded_context *tc = threaded_context(_pipe);
543 struct pipe_framebuffer_state *p =
544 tc_add_struct_typed_call(tc, TC_CALL_set_framebuffer_state,
545 pipe_framebuffer_state);
546 unsigned nr_cbufs = fb->nr_cbufs;
547
548 p->width = fb->width;
549 p->height = fb->height;
550 p->samples = fb->samples;
551 p->layers = fb->layers;
552 p->nr_cbufs = nr_cbufs;
553
554 for (unsigned i = 0; i < nr_cbufs; i++) {
555 p->cbufs[i] = NULL;
556 pipe_surface_reference(&p->cbufs[i], fb->cbufs[i]);
557 }
558 p->zsbuf = NULL;
559 pipe_surface_reference(&p->zsbuf, fb->zsbuf);
560 }
561
562 static void
563 tc_call_set_tess_state(struct pipe_context *pipe, union tc_payload *payload)
564 {
565 float *p = (float*)payload;
566 pipe->set_tess_state(pipe, p, p + 4);
567 }
568
569 static void
570 tc_set_tess_state(struct pipe_context *_pipe,
571 const float default_outer_level[4],
572 const float default_inner_level[2])
573 {
574 struct threaded_context *tc = threaded_context(_pipe);
575 float *p = (float*)tc_add_sized_call(tc, TC_CALL_set_tess_state,
576 sizeof(float) * 6);
577
578 memcpy(p, default_outer_level, 4 * sizeof(float));
579 memcpy(p + 4, default_inner_level, 2 * sizeof(float));
580 }
581
582 struct tc_constant_buffer {
583 ubyte shader, index;
584 struct pipe_constant_buffer cb;
585 };
586
587 static void
588 tc_call_set_constant_buffer(struct pipe_context *pipe, union tc_payload *payload)
589 {
590 struct tc_constant_buffer *p = (struct tc_constant_buffer *)payload;
591
592 pipe->set_constant_buffer(pipe,
593 p->shader,
594 p->index,
595 &p->cb);
596 pipe_resource_reference(&p->cb.buffer, NULL);
597 }
598
599 static void
600 tc_set_constant_buffer(struct pipe_context *_pipe,
601 uint shader, uint index,
602 const struct pipe_constant_buffer *cb)
603 {
604 struct threaded_context *tc = threaded_context(_pipe);
605 struct pipe_resource *buffer = NULL;
606 unsigned offset;
607
608 /* This must be done before adding set_constant_buffer, because it could
609 * generate e.g. transfer_unmap and flush partially-uninitialized
610 * set_constant_buffer to the driver if it was done afterwards.
611 */
612 if (cb && cb->user_buffer) {
613 u_upload_data(tc->base.const_uploader, 0, cb->buffer_size, 64,
614 cb->user_buffer, &offset, &buffer);
615 }
616
617 struct tc_constant_buffer *p =
618 tc_add_struct_typed_call(tc, TC_CALL_set_constant_buffer,
619 tc_constant_buffer);
620 p->shader = shader;
621 p->index = index;
622
623 if (cb) {
624 if (cb->user_buffer) {
625 p->cb.buffer_size = cb->buffer_size;
626 p->cb.user_buffer = NULL;
627 p->cb.buffer_offset = offset;
628 p->cb.buffer = buffer;
629 } else {
630 tc_set_resource_reference(&p->cb.buffer,
631 cb->buffer);
632 memcpy(&p->cb, cb, sizeof(*cb));
633 }
634 } else {
635 memset(&p->cb, 0, sizeof(*cb));
636 }
637 }
638
639 struct tc_scissors {
640 ubyte start, count;
641 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
642 };
643
644 static void
645 tc_call_set_scissor_states(struct pipe_context *pipe, union tc_payload *payload)
646 {
647 struct tc_scissors *p = (struct tc_scissors *)payload;
648 pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
649 }
650
651 static void
652 tc_set_scissor_states(struct pipe_context *_pipe,
653 unsigned start, unsigned count,
654 const struct pipe_scissor_state *states)
655 {
656 struct threaded_context *tc = threaded_context(_pipe);
657 struct tc_scissors *p =
658 tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
659
660 p->start = start;
661 p->count = count;
662 memcpy(&p->slot, states, count * sizeof(states[0]));
663 }
664
665 struct tc_viewports {
666 ubyte start, count;
667 struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
668 };
669
670 static void
671 tc_call_set_viewport_states(struct pipe_context *pipe, union tc_payload *payload)
672 {
673 struct tc_viewports *p = (struct tc_viewports *)payload;
674 pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
675 }
676
677 static void
678 tc_set_viewport_states(struct pipe_context *_pipe,
679 unsigned start, unsigned count,
680 const struct pipe_viewport_state *states)
681 {
682 if (!count)
683 return;
684
685 struct threaded_context *tc = threaded_context(_pipe);
686 struct tc_viewports *p =
687 tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
688
689 p->start = start;
690 p->count = count;
691 memcpy(&p->slot, states, count * sizeof(states[0]));
692 }
693
694 struct tc_window_rects {
695 bool include;
696 ubyte count;
697 struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
698 };
699
700 static void
701 tc_call_set_window_rectangles(struct pipe_context *pipe,
702 union tc_payload *payload)
703 {
704 struct tc_window_rects *p = (struct tc_window_rects *)payload;
705 pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
706 }
707
708 static void
709 tc_set_window_rectangles(struct pipe_context *_pipe, boolean include,
710 unsigned count,
711 const struct pipe_scissor_state *rects)
712 {
713 struct threaded_context *tc = threaded_context(_pipe);
714 struct tc_window_rects *p =
715 tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
716
717 p->include = include;
718 p->count = count;
719 memcpy(p->slot, rects, count * sizeof(rects[0]));
720 }
721
722 struct tc_sampler_views {
723 ubyte shader, start, count;
724 struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
725 };
726
727 static void
728 tc_call_set_sampler_views(struct pipe_context *pipe, union tc_payload *payload)
729 {
730 struct tc_sampler_views *p = (struct tc_sampler_views *)payload;
731 unsigned count = p->count;
732
733 pipe->set_sampler_views(pipe, p->shader, p->start, p->count, p->slot);
734 for (unsigned i = 0; i < count; i++)
735 pipe_sampler_view_reference(&p->slot[i], NULL);
736 }
737
738 static void
739 tc_set_sampler_views(struct pipe_context *_pipe,
740 enum pipe_shader_type shader,
741 unsigned start, unsigned count,
742 struct pipe_sampler_view **views)
743 {
744 if (!count)
745 return;
746
747 struct threaded_context *tc = threaded_context(_pipe);
748 struct tc_sampler_views *p =
749 tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views, count);
750
751 p->shader = shader;
752 p->start = start;
753 p->count = count;
754
755 if (views) {
756 for (unsigned i = 0; i < count; i++) {
757 p->slot[i] = NULL;
758 pipe_sampler_view_reference(&p->slot[i], views[i]);
759 }
760 } else {
761 memset(p->slot, 0, count * sizeof(views[0]));
762 }
763 }
764
765 struct tc_shader_images {
766 ubyte shader, start, count;
767 bool unbind;
768 struct pipe_image_view slot[0]; /* more will be allocated if needed */
769 };
770
771 static void
772 tc_call_set_shader_images(struct pipe_context *pipe, union tc_payload *payload)
773 {
774 struct tc_shader_images *p = (struct tc_shader_images *)payload;
775 unsigned count = p->count;
776
777 if (p->unbind) {
778 pipe->set_shader_images(pipe, p->shader, p->start, p->count, NULL);
779 return;
780 }
781
782 pipe->set_shader_images(pipe, p->shader, p->start, p->count, p->slot);
783
784 for (unsigned i = 0; i < count; i++)
785 pipe_resource_reference(&p->slot[i].resource, NULL);
786 }
787
788 static void
789 tc_set_shader_images(struct pipe_context *_pipe,
790 enum pipe_shader_type shader,
791 unsigned start, unsigned count,
792 const struct pipe_image_view *images)
793 {
794 if (!count)
795 return;
796
797 struct threaded_context *tc = threaded_context(_pipe);
798 struct tc_shader_images *p =
799 tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
800 images ? count : 0);
801
802 p->shader = shader;
803 p->start = start;
804 p->count = count;
805 p->unbind = images == NULL;
806
807 if (images) {
808 for (unsigned i = 0; i < count; i++) {
809 tc_set_resource_reference(&p->slot[i].resource, images[i].resource);
810
811 if (images[i].access & PIPE_IMAGE_ACCESS_WRITE &&
812 images[i].resource &&
813 images[i].resource->target == PIPE_BUFFER) {
814 struct threaded_resource *tres =
815 threaded_resource(images[i].resource);
816
817 util_range_add(&tres->valid_buffer_range, images[i].u.buf.offset,
818 images[i].u.buf.offset + images[i].u.buf.size);
819 }
820 }
821 memcpy(p->slot, images, count * sizeof(images[0]));
822 }
823 }
824
825 struct tc_shader_buffers {
826 ubyte shader, start, count;
827 bool unbind;
828 struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
829 };
830
831 static void
832 tc_call_set_shader_buffers(struct pipe_context *pipe, union tc_payload *payload)
833 {
834 struct tc_shader_buffers *p = (struct tc_shader_buffers *)payload;
835 unsigned count = p->count;
836
837 if (p->unbind) {
838 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL);
839 return;
840 }
841
842 pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot);
843
844 for (unsigned i = 0; i < count; i++)
845 pipe_resource_reference(&p->slot[i].buffer, NULL);
846 }
847
848 static void
849 tc_set_shader_buffers(struct pipe_context *_pipe, unsigned shader,
850 unsigned start, unsigned count,
851 const struct pipe_shader_buffer *buffers)
852 {
853 if (!count)
854 return;
855
856 struct threaded_context *tc = threaded_context(_pipe);
857 struct tc_shader_buffers *p =
858 tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
859 buffers ? count : 0);
860
861 p->shader = shader;
862 p->start = start;
863 p->count = count;
864 p->unbind = buffers == NULL;
865
866 if (buffers) {
867 for (unsigned i = 0; i < count; i++) {
868 struct pipe_shader_buffer *dst = &p->slot[i];
869 const struct pipe_shader_buffer *src = buffers + i;
870
871 tc_set_resource_reference(&dst->buffer, src->buffer);
872 dst->buffer_offset = src->buffer_offset;
873 dst->buffer_size = src->buffer_size;
874
875 if (src->buffer) {
876 struct threaded_resource *tres = threaded_resource(src->buffer);
877
878 util_range_add(&tres->valid_buffer_range, src->buffer_offset,
879 src->buffer_offset + src->buffer_size);
880 }
881 }
882 }
883 }
884
885 struct tc_vertex_buffers {
886 ubyte start, count;
887 bool unbind;
888 struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
889 };
890
891 static void
892 tc_call_set_vertex_buffers(struct pipe_context *pipe, union tc_payload *payload)
893 {
894 struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)payload;
895 unsigned count = p->count;
896
897 if (p->unbind) {
898 pipe->set_vertex_buffers(pipe, p->start, count, NULL);
899 return;
900 }
901
902 for (unsigned i = 0; i < count; i++)
903 tc_assert(!p->slot[i].is_user_buffer);
904
905 pipe->set_vertex_buffers(pipe, p->start, count, p->slot);
906 for (unsigned i = 0; i < count; i++)
907 pipe_resource_reference(&p->slot[i].buffer.resource, NULL);
908 }
909
910 static void
911 tc_set_vertex_buffers(struct pipe_context *_pipe,
912 unsigned start, unsigned count,
913 const struct pipe_vertex_buffer *buffers)
914 {
915 struct threaded_context *tc = threaded_context(_pipe);
916
917 if (!count)
918 return;
919
920 if (buffers) {
921 struct tc_vertex_buffers *p =
922 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
923 p->start = start;
924 p->count = count;
925 p->unbind = false;
926
927 for (unsigned i = 0; i < count; i++) {
928 struct pipe_vertex_buffer *dst = &p->slot[i];
929 const struct pipe_vertex_buffer *src = buffers + i;
930
931 tc_assert(!src->is_user_buffer);
932 dst->stride = src->stride;
933 dst->is_user_buffer = false;
934 tc_set_resource_reference(&dst->buffer.resource,
935 src->buffer.resource);
936 dst->buffer_offset = src->buffer_offset;
937 }
938 } else {
939 struct tc_vertex_buffers *p =
940 tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
941 p->start = start;
942 p->count = count;
943 p->unbind = true;
944 }
945 }
946
947 struct tc_stream_outputs {
948 unsigned count;
949 struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
950 unsigned offsets[PIPE_MAX_SO_BUFFERS];
951 };
952
953 static void
954 tc_call_set_stream_output_targets(struct pipe_context *pipe, union tc_payload *payload)
955 {
956 struct tc_stream_outputs *p = (struct tc_stream_outputs *)payload;
957 unsigned count = p->count;
958
959 pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
960 for (unsigned i = 0; i < count; i++)
961 pipe_so_target_reference(&p->targets[i], NULL);
962 }
963
964 static void
965 tc_set_stream_output_targets(struct pipe_context *_pipe,
966 unsigned count,
967 struct pipe_stream_output_target **tgs,
968 const unsigned *offsets)
969 {
970 struct threaded_context *tc = threaded_context(_pipe);
971 struct tc_stream_outputs *p =
972 tc_add_struct_typed_call(tc, TC_CALL_set_stream_output_targets,
973 tc_stream_outputs);
974
975 for (unsigned i = 0; i < count; i++) {
976 p->targets[i] = NULL;
977 pipe_so_target_reference(&p->targets[i], tgs[i]);
978 }
979 p->count = count;
980 memcpy(p->offsets, offsets, count * sizeof(unsigned));
981 }
982
983 static void
984 tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
985 unsigned count, struct pipe_surface **resources)
986 {
987 struct threaded_context *tc = threaded_context(_pipe);
988 struct pipe_context *pipe = tc->pipe;
989
990 tc_sync(tc);
991 pipe->set_compute_resources(pipe, start, count, resources);
992 }
993
994 static void
995 tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
996 unsigned count, struct pipe_resource **resources,
997 uint32_t **handles)
998 {
999 struct threaded_context *tc = threaded_context(_pipe);
1000 struct pipe_context *pipe = tc->pipe;
1001
1002 tc_sync(tc);
1003 pipe->set_global_binding(pipe, first, count, resources, handles);
1004 }
1005
1006
1007 /********************************************************************
1008 * views
1009 */
1010
1011 static struct pipe_surface *
1012 tc_create_surface(struct pipe_context *_pipe,
1013 struct pipe_resource *resource,
1014 const struct pipe_surface *surf_tmpl)
1015 {
1016 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1017 struct pipe_surface *view =
1018 pipe->create_surface(pipe, resource, surf_tmpl);
1019
1020 if (view)
1021 view->context = _pipe;
1022 return view;
1023 }
1024
1025 static void
1026 tc_surface_destroy(struct pipe_context *_pipe,
1027 struct pipe_surface *surf)
1028 {
1029 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1030
1031 pipe->surface_destroy(pipe, surf);
1032 }
1033
1034 static struct pipe_sampler_view *
1035 tc_create_sampler_view(struct pipe_context *_pipe,
1036 struct pipe_resource *resource,
1037 const struct pipe_sampler_view *templ)
1038 {
1039 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1040 struct pipe_sampler_view *view =
1041 pipe->create_sampler_view(pipe, resource, templ);
1042
1043 if (view)
1044 view->context = _pipe;
1045 return view;
1046 }
1047
1048 static void
1049 tc_sampler_view_destroy(struct pipe_context *_pipe,
1050 struct pipe_sampler_view *view)
1051 {
1052 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1053
1054 pipe->sampler_view_destroy(pipe, view);
1055 }
1056
1057 static struct pipe_stream_output_target *
1058 tc_create_stream_output_target(struct pipe_context *_pipe,
1059 struct pipe_resource *res,
1060 unsigned buffer_offset,
1061 unsigned buffer_size)
1062 {
1063 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1064 struct threaded_resource *tres = threaded_resource(res);
1065 struct pipe_stream_output_target *view;
1066
1067 tc_sync(threaded_context(_pipe));
1068 util_range_add(&tres->valid_buffer_range, buffer_offset,
1069 buffer_offset + buffer_size);
1070
1071 view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1072 buffer_size);
1073 if (view)
1074 view->context = _pipe;
1075 return view;
1076 }
1077
1078 static void
1079 tc_stream_output_target_destroy(struct pipe_context *_pipe,
1080 struct pipe_stream_output_target *target)
1081 {
1082 struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1083
1084 pipe->stream_output_target_destroy(pipe, target);
1085 }
1086
1087
1088 /********************************************************************
1089 * transfer
1090 */
1091
1092 struct tc_replace_buffer_storage {
1093 struct pipe_resource *dst;
1094 struct pipe_resource *src;
1095 tc_replace_buffer_storage_func func;
1096 };
1097
1098 static void
1099 tc_call_replace_buffer_storage(struct pipe_context *pipe,
1100 union tc_payload *payload)
1101 {
1102 struct tc_replace_buffer_storage *p =
1103 (struct tc_replace_buffer_storage *)payload;
1104
1105 p->func(pipe, p->dst, p->src);
1106 pipe_resource_reference(&p->dst, NULL);
1107 pipe_resource_reference(&p->src, NULL);
1108 }
1109
1110 static bool
1111 tc_invalidate_buffer(struct threaded_context *tc,
1112 struct threaded_resource *tbuf)
1113 {
1114 /* We can't check if the buffer is idle, so we invalidate it
1115 * unconditionally. */
1116 struct pipe_screen *screen = tc->base.screen;
1117 struct pipe_resource *new_buf;
1118
1119 /* Shared, pinned, and sparse buffers can't be reallocated. */
1120 if (tbuf->is_shared ||
1121 tbuf->is_user_ptr ||
1122 tbuf->b.flags & PIPE_RESOURCE_FLAG_SPARSE)
1123 return false;
1124
1125 /* Allocate a new one. */
1126 new_buf = screen->resource_create(screen, &tbuf->b);
1127 if (!new_buf)
1128 return false;
1129
1130 /* Replace the "latest" pointer. */
1131 if (tbuf->latest != &tbuf->b)
1132 pipe_resource_reference(&tbuf->latest, NULL);
1133
1134 tbuf->latest = new_buf;
1135 util_range_set_empty(&tbuf->valid_buffer_range);
1136
1137 /* The valid range should point to the original buffer. */
1138 threaded_resource(new_buf)->base_valid_buffer_range =
1139 &tbuf->valid_buffer_range;
1140
1141 /* Enqueue storage replacement of the original buffer. */
1142 struct tc_replace_buffer_storage *p =
1143 tc_add_struct_typed_call(tc, TC_CALL_replace_buffer_storage,
1144 tc_replace_buffer_storage);
1145
1146 p->func = tc->replace_buffer_storage;
1147 tc_set_resource_reference(&p->dst, &tbuf->b);
1148 tc_set_resource_reference(&p->src, new_buf);
1149 return true;
1150 }
1151
1152 static unsigned
1153 tc_improve_map_buffer_flags(struct threaded_context *tc,
1154 struct threaded_resource *tres, unsigned usage,
1155 unsigned offset, unsigned size)
1156 {
1157 /* Sparse buffers can't be mapped directly and can't be reallocated
1158 * (fully invalidated). That may just be a radeonsi limitation, but
1159 * the threaded context must obey it with radeonsi.
1160 */
1161 if (tres->b.flags & PIPE_RESOURCE_FLAG_SPARSE) {
1162 /* We can use DISCARD_RANGE instead of full discard. This is the only
1163 * fast path for sparse buffers that doesn't need thread synchronization.
1164 */
1165 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE)
1166 usage |= PIPE_TRANSFER_DISCARD_RANGE;
1167
1168 /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
1169 * The threaded context doesn't do unsychronized mappings and invalida-
1170 * tions of sparse buffers, therefore a correct driver behavior won't
1171 * result in an incorrect behavior with the threaded context.
1172 */
1173 return usage;
1174 }
1175
1176 /* Handle CPU reads trivially. */
1177 if (usage & PIPE_TRANSFER_READ) {
1178 /* Driver aren't allowed to do buffer invalidations. */
1179 return (usage & ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) |
1180 TC_TRANSFER_MAP_NO_INVALIDATE |
1181 TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
1182 }
1183
1184 /* See if the buffer range being mapped has never been initialized,
1185 * in which case it can be mapped unsynchronized. */
1186 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
1187 !tres->is_shared &&
1188 !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size))
1189 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1190
1191 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
1192 /* If discarding the entire range, discard the whole resource instead. */
1193 if (usage & PIPE_TRANSFER_DISCARD_RANGE &&
1194 offset == 0 && size == tres->b.width0)
1195 usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1196
1197 /* Discard the whole resource if needed. */
1198 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
1199 if (tc_invalidate_buffer(tc, tres))
1200 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1201 else
1202 usage |= PIPE_TRANSFER_DISCARD_RANGE; /* fallback */
1203 }
1204 }
1205
1206 /* We won't need this flag anymore. */
1207 /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
1208 usage &= ~PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
1209
1210 /* GL_AMD_pinned_memory and persistent mappings can't use staging
1211 * buffers. */
1212 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1213 PIPE_TRANSFER_PERSISTENT) ||
1214 tres->is_user_ptr)
1215 usage &= ~PIPE_TRANSFER_DISCARD_RANGE;
1216
1217 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1218 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
1219 usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
1220
1221 /* Never invalidate inside the driver and never infer "unsynchronized". */
1222 return usage |
1223 TC_TRANSFER_MAP_NO_INVALIDATE |
1224 TC_TRANSFER_MAP_IGNORE_VALID_RANGE;
1225 }
1226
1227 static void *
1228 tc_transfer_map(struct pipe_context *_pipe,
1229 struct pipe_resource *resource, unsigned level,
1230 unsigned usage, const struct pipe_box *box,
1231 struct pipe_transfer **transfer)
1232 {
1233 struct threaded_context *tc = threaded_context(_pipe);
1234 struct threaded_resource *tres = threaded_resource(resource);
1235 struct pipe_context *pipe = tc->pipe;
1236
1237 if (resource->target == PIPE_BUFFER) {
1238 usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
1239
1240 /* Do a staging transfer within the threaded context. The driver should
1241 * only get resource_copy_region.
1242 */
1243 if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
1244 struct threaded_transfer *ttrans = slab_alloc(&tc->pool_transfers);
1245 uint8_t *map;
1246
1247 ttrans->staging = NULL;
1248
1249 u_upload_alloc(tc->base.stream_uploader, 0,
1250 box->width + (box->x % tc->map_buffer_alignment),
1251 64, &ttrans->offset, &ttrans->staging, (void**)&map);
1252 if (!map) {
1253 slab_free(&tc->pool_transfers, ttrans);
1254 return NULL;
1255 }
1256
1257 tc_set_resource_reference(&ttrans->b.resource, resource);
1258 ttrans->b.level = 0;
1259 ttrans->b.usage = usage;
1260 ttrans->b.box = *box;
1261 ttrans->b.stride = 0;
1262 ttrans->b.layer_stride = 0;
1263 *transfer = &ttrans->b;
1264 return map + (box->x % tc->map_buffer_alignment);
1265 }
1266 }
1267
1268 /* Unsychronized buffer mappings don't have to synchronize the thread. */
1269 if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
1270 tc_sync_msg(tc, resource->target != PIPE_BUFFER ? " texture" :
1271 usage & PIPE_TRANSFER_DISCARD_RANGE ? " discard_range" :
1272 usage & PIPE_TRANSFER_READ ? " read" : " ??");
1273
1274 return pipe->transfer_map(pipe, tres->latest ? tres->latest : resource,
1275 level, usage, box, transfer);
1276 }
1277
1278 struct tc_transfer_flush_region {
1279 struct pipe_transfer *transfer;
1280 struct pipe_box box;
1281 };
1282
1283 static void
1284 tc_call_transfer_flush_region(struct pipe_context *pipe,
1285 union tc_payload *payload)
1286 {
1287 struct tc_transfer_flush_region *p =
1288 (struct tc_transfer_flush_region *)payload;
1289
1290 pipe->transfer_flush_region(pipe, p->transfer, &p->box);
1291 }
1292
1293 struct tc_resource_copy_region {
1294 struct pipe_resource *dst;
1295 unsigned dst_level;
1296 unsigned dstx, dsty, dstz;
1297 struct pipe_resource *src;
1298 unsigned src_level;
1299 struct pipe_box src_box;
1300 };
1301
1302 static void
1303 tc_resource_copy_region(struct pipe_context *_pipe,
1304 struct pipe_resource *dst, unsigned dst_level,
1305 unsigned dstx, unsigned dsty, unsigned dstz,
1306 struct pipe_resource *src, unsigned src_level,
1307 const struct pipe_box *src_box);
1308
1309 static void
1310 tc_buffer_do_flush_region(struct threaded_context *tc,
1311 struct threaded_transfer *ttrans,
1312 const struct pipe_box *box)
1313 {
1314 struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
1315
1316 if (ttrans->staging) {
1317 struct pipe_box src_box;
1318
1319 u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
1320 box->width, &src_box);
1321
1322 /* Copy the staging buffer into the original one. */
1323 tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
1324 ttrans->staging, 0, &src_box);
1325 }
1326
1327 util_range_add(tres->base_valid_buffer_range, box->x, box->x + box->width);
1328 }
1329
1330 static void
1331 tc_transfer_flush_region(struct pipe_context *_pipe,
1332 struct pipe_transfer *transfer,
1333 const struct pipe_box *rel_box)
1334 {
1335 struct threaded_context *tc = threaded_context(_pipe);
1336 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1337 struct threaded_resource *tres = threaded_resource(transfer->resource);
1338 unsigned required_usage = PIPE_TRANSFER_WRITE |
1339 PIPE_TRANSFER_FLUSH_EXPLICIT;
1340
1341 if (tres->b.target == PIPE_BUFFER) {
1342 if ((transfer->usage & required_usage) == required_usage) {
1343 struct pipe_box box;
1344
1345 u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
1346 tc_buffer_do_flush_region(tc, ttrans, &box);
1347 }
1348
1349 /* Staging transfers don't send the call to the driver. */
1350 if (ttrans->staging)
1351 return;
1352 }
1353
1354 struct tc_transfer_flush_region *p =
1355 tc_add_struct_typed_call(tc, TC_CALL_transfer_flush_region,
1356 tc_transfer_flush_region);
1357 p->transfer = transfer;
1358 p->box = *rel_box;
1359 }
1360
1361 static void
1362 tc_call_transfer_unmap(struct pipe_context *pipe, union tc_payload *payload)
1363 {
1364 pipe->transfer_unmap(pipe, payload->transfer);
1365 }
1366
1367 static void
1368 tc_transfer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
1369 {
1370 struct threaded_context *tc = threaded_context(_pipe);
1371 struct threaded_transfer *ttrans = threaded_transfer(transfer);
1372 struct threaded_resource *tres = threaded_resource(transfer->resource);
1373
1374 if (tres->b.target == PIPE_BUFFER) {
1375 if (transfer->usage & PIPE_TRANSFER_WRITE &&
1376 !(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
1377 tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
1378
1379 /* Staging transfers don't send the call to the driver. */
1380 if (ttrans->staging) {
1381 pipe_resource_reference(&ttrans->staging, NULL);
1382 pipe_resource_reference(&ttrans->b.resource, NULL);
1383 slab_free(&tc->pool_transfers, ttrans);
1384 return;
1385 }
1386 }
1387
1388 tc_add_small_call(tc, TC_CALL_transfer_unmap)->transfer = transfer;
1389 }
1390
1391 struct tc_buffer_subdata {
1392 struct pipe_resource *resource;
1393 unsigned usage, offset, size;
1394 char slot[0]; /* more will be allocated if needed */
1395 };
1396
1397 static void
1398 tc_call_buffer_subdata(struct pipe_context *pipe, union tc_payload *payload)
1399 {
1400 struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)payload;
1401
1402 pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
1403 p->slot);
1404 pipe_resource_reference(&p->resource, NULL);
1405 }
1406
1407 static void
1408 tc_buffer_subdata(struct pipe_context *_pipe,
1409 struct pipe_resource *resource,
1410 unsigned usage, unsigned offset,
1411 unsigned size, const void *data)
1412 {
1413 struct threaded_context *tc = threaded_context(_pipe);
1414 struct threaded_resource *tres = threaded_resource(resource);
1415
1416 if (!size)
1417 return;
1418
1419 usage |= PIPE_TRANSFER_WRITE |
1420 PIPE_TRANSFER_DISCARD_RANGE;
1421
1422 usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
1423
1424 /* Unsychronized and big transfers should use transfer_map. Also handle
1425 * full invalidations, because drivers aren't allowed to do them.
1426 */
1427 if (usage & (PIPE_TRANSFER_UNSYNCHRONIZED |
1428 PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) ||
1429 size > TC_MAX_SUBDATA_BYTES) {
1430 struct pipe_transfer *transfer;
1431 struct pipe_box box;
1432 uint8_t *map = NULL;
1433
1434 u_box_1d(offset, size, &box);
1435
1436 map = tc_transfer_map(_pipe, resource, 0, usage, &box, &transfer);
1437 if (map) {
1438 memcpy(map, data, size);
1439 tc_transfer_unmap(_pipe, transfer);
1440 }
1441 return;
1442 }
1443
1444 util_range_add(&tres->valid_buffer_range, offset, offset + size);
1445
1446 /* The upload is small. Enqueue it. */
1447 struct tc_buffer_subdata *p =
1448 tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
1449
1450 tc_set_resource_reference(&p->resource, resource);
1451 p->usage = usage;
1452 p->offset = offset;
1453 p->size = size;
1454 memcpy(p->slot, data, size);
1455 }
1456
1457 struct tc_texture_subdata {
1458 struct pipe_resource *resource;
1459 unsigned level, usage, stride, layer_stride;
1460 struct pipe_box box;
1461 char slot[0]; /* more will be allocated if needed */
1462 };
1463
1464 static void
1465 tc_call_texture_subdata(struct pipe_context *pipe, union tc_payload *payload)
1466 {
1467 struct tc_texture_subdata *p = (struct tc_texture_subdata *)payload;
1468
1469 pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
1470 p->slot, p->stride, p->layer_stride);
1471 pipe_resource_reference(&p->resource, NULL);
1472 }
1473
1474 static void
1475 tc_texture_subdata(struct pipe_context *_pipe,
1476 struct pipe_resource *resource,
1477 unsigned level, unsigned usage,
1478 const struct pipe_box *box,
1479 const void *data, unsigned stride,
1480 unsigned layer_stride)
1481 {
1482 struct threaded_context *tc = threaded_context(_pipe);
1483 unsigned size;
1484
1485 assert(box->height >= 1);
1486 assert(box->depth >= 1);
1487
1488 size = (box->depth - 1) * layer_stride +
1489 (box->height - 1) * stride +
1490 box->width * util_format_get_blocksize(resource->format);
1491 if (!size)
1492 return;
1493
1494 /* Small uploads can be enqueued, big uploads must sync. */
1495 if (size <= TC_MAX_SUBDATA_BYTES) {
1496 struct tc_texture_subdata *p =
1497 tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
1498
1499 tc_set_resource_reference(&p->resource, resource);
1500 p->level = level;
1501 p->usage = usage;
1502 p->box = *box;
1503 p->stride = stride;
1504 p->layer_stride = layer_stride;
1505 memcpy(p->slot, data, size);
1506 } else {
1507 struct pipe_context *pipe = tc->pipe;
1508
1509 tc_sync(tc);
1510 pipe->texture_subdata(pipe, resource, level, usage, box, data,
1511 stride, layer_stride);
1512 }
1513 }
1514
1515
1516 /********************************************************************
1517 * miscellaneous
1518 */
1519
1520 #define TC_FUNC_SYNC_RET0(ret_type, func) \
1521 static ret_type \
1522 tc_##func(struct pipe_context *_pipe) \
1523 { \
1524 struct threaded_context *tc = threaded_context(_pipe); \
1525 struct pipe_context *pipe = tc->pipe; \
1526 tc_sync(tc); \
1527 return pipe->func(pipe); \
1528 }
1529
1530 TC_FUNC_SYNC_RET0(enum pipe_reset_status, get_device_reset_status)
1531 TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
1532
1533 static void
1534 tc_get_sample_position(struct pipe_context *_pipe,
1535 unsigned sample_count, unsigned sample_index,
1536 float *out_value)
1537 {
1538 struct threaded_context *tc = threaded_context(_pipe);
1539 struct pipe_context *pipe = tc->pipe;
1540
1541 tc_sync(tc);
1542 pipe->get_sample_position(pipe, sample_count, sample_index,
1543 out_value);
1544 }
1545
1546 static void
1547 tc_set_device_reset_callback(struct pipe_context *_pipe,
1548 const struct pipe_device_reset_callback *cb)
1549 {
1550 struct threaded_context *tc = threaded_context(_pipe);
1551 struct pipe_context *pipe = tc->pipe;
1552
1553 tc_sync(tc);
1554 pipe->set_device_reset_callback(pipe, cb);
1555 }
1556
1557 struct tc_string_marker {
1558 int len;
1559 char slot[0]; /* more will be allocated if needed */
1560 };
1561
1562 static void
1563 tc_call_emit_string_marker(struct pipe_context *pipe, union tc_payload *payload)
1564 {
1565 struct tc_string_marker *p = (struct tc_string_marker *)payload;
1566 pipe->emit_string_marker(pipe, p->slot, p->len);
1567 }
1568
1569 static void
1570 tc_emit_string_marker(struct pipe_context *_pipe,
1571 const char *string, int len)
1572 {
1573 struct threaded_context *tc = threaded_context(_pipe);
1574
1575 if (len <= TC_MAX_STRING_MARKER_BYTES) {
1576 struct tc_string_marker *p =
1577 tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
1578
1579 memcpy(p->slot, string, len);
1580 p->len = len;
1581 } else {
1582 struct pipe_context *pipe = tc->pipe;
1583
1584 tc_sync(tc);
1585 pipe->emit_string_marker(pipe, string, len);
1586 }
1587 }
1588
1589 static void
1590 tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
1591 unsigned flags)
1592 {
1593 struct threaded_context *tc = threaded_context(_pipe);
1594 struct pipe_context *pipe = tc->pipe;
1595
1596 tc_sync(tc);
1597 pipe->dump_debug_state(pipe, stream, flags);
1598 }
1599
1600 static void
1601 tc_set_debug_callback(struct pipe_context *_pipe,
1602 const struct pipe_debug_callback *cb)
1603 {
1604 struct threaded_context *tc = threaded_context(_pipe);
1605 struct pipe_context *pipe = tc->pipe;
1606
1607 /* Drop all synchronous debug callbacks. Drivers are expected to be OK
1608 * with this. shader-db will use an environment variable to disable
1609 * the threaded context.
1610 */
1611 if (cb && cb->debug_message && !cb->async)
1612 return;
1613
1614 tc_sync(tc);
1615 pipe->set_debug_callback(pipe, cb);
1616 }
1617
1618 static void
1619 tc_create_fence_fd(struct pipe_context *_pipe,
1620 struct pipe_fence_handle **fence, int fd)
1621 {
1622 struct threaded_context *tc = threaded_context(_pipe);
1623 struct pipe_context *pipe = tc->pipe;
1624
1625 tc_sync(tc);
1626 pipe->create_fence_fd(pipe, fence, fd);
1627 }
1628
1629 static void
1630 tc_fence_server_sync(struct pipe_context *_pipe,
1631 struct pipe_fence_handle *fence)
1632 {
1633 struct threaded_context *tc = threaded_context(_pipe);
1634 struct pipe_context *pipe = tc->pipe;
1635
1636 tc_sync(tc);
1637 pipe->fence_server_sync(pipe, fence);
1638 }
1639
1640 static struct pipe_video_codec *
1641 tc_create_video_codec(struct pipe_context *_pipe,
1642 const struct pipe_video_codec *templ)
1643 {
1644 unreachable("Threaded context should not be enabled for video APIs");
1645 return NULL;
1646 }
1647
1648 static struct pipe_video_buffer *
1649 tc_create_video_buffer(struct pipe_context *_pipe,
1650 const struct pipe_video_buffer *templ)
1651 {
1652 unreachable("Threaded context should not be enabled for video APIs");
1653 return NULL;
1654 }
1655
1656
1657 /********************************************************************
1658 * draw, launch, clear, blit, copy, flush
1659 */
1660
1661 static void
1662 tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
1663 unsigned flags)
1664 {
1665 struct threaded_context *tc = threaded_context(_pipe);
1666 struct pipe_context *pipe = tc->pipe;
1667 struct threaded_query *tq, *tmp;
1668
1669 LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
1670 tq->flushed = true;
1671 LIST_DEL(&tq->head_unflushed);
1672 }
1673
1674 /* TODO: deferred flushes? */
1675 tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
1676 flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
1677 pipe->flush(pipe, fence, flags);
1678 }
1679
1680 /* This is actually variable-sized, because indirect isn't allocated if it's
1681 * not needed. */
1682 struct tc_full_draw_info {
1683 struct pipe_draw_info draw;
1684 struct pipe_draw_indirect_info indirect;
1685 };
1686
1687 static void
1688 tc_call_draw_vbo(struct pipe_context *pipe, union tc_payload *payload)
1689 {
1690 struct tc_full_draw_info *info = (struct tc_full_draw_info*)payload;
1691
1692 pipe->draw_vbo(pipe, &info->draw);
1693 pipe_so_target_reference(&info->draw.count_from_stream_output, NULL);
1694 if (info->draw.index_size)
1695 pipe_resource_reference(&info->draw.index.resource, NULL);
1696 if (info->draw.indirect) {
1697 pipe_resource_reference(&info->indirect.buffer, NULL);
1698 pipe_resource_reference(&info->indirect.indirect_draw_count, NULL);
1699 }
1700 }
1701
1702 static struct tc_full_draw_info *
1703 tc_add_draw_vbo(struct pipe_context *_pipe, bool indirect)
1704 {
1705 return (struct tc_full_draw_info*)
1706 tc_add_sized_call(threaded_context(_pipe), TC_CALL_draw_vbo,
1707 indirect ? sizeof(struct tc_full_draw_info) :
1708 sizeof(struct pipe_draw_info));
1709 }
1710
1711 static void
1712 tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info)
1713 {
1714 struct threaded_context *tc = threaded_context(_pipe);
1715 struct pipe_draw_indirect_info *indirect = info->indirect;
1716 unsigned index_size = info->index_size;
1717 bool has_user_indices = info->has_user_indices;
1718
1719 if (index_size && has_user_indices) {
1720 unsigned size = info->count * index_size;
1721 struct pipe_resource *buffer = NULL;
1722 unsigned offset;
1723
1724 tc_assert(!indirect);
1725
1726 /* This must be done before adding draw_vbo, because it could generate
1727 * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
1728 * to the driver if it was done afterwards.
1729 */
1730 u_upload_data(tc->base.stream_uploader, 0, size, 4, info->index.user,
1731 &offset, &buffer);
1732 if (unlikely(!buffer))
1733 return;
1734
1735 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, false);
1736 p->draw.count_from_stream_output = NULL;
1737 pipe_so_target_reference(&p->draw.count_from_stream_output,
1738 info->count_from_stream_output);
1739 memcpy(&p->draw, info, sizeof(*info));
1740 p->draw.has_user_indices = false;
1741 p->draw.index.resource = buffer;
1742 p->draw.start = offset / index_size;
1743 } else {
1744 /* Non-indexed call or indexed with a real index buffer. */
1745 struct tc_full_draw_info *p = tc_add_draw_vbo(_pipe, indirect != NULL);
1746 p->draw.count_from_stream_output = NULL;
1747 pipe_so_target_reference(&p->draw.count_from_stream_output,
1748 info->count_from_stream_output);
1749 if (index_size) {
1750 tc_set_resource_reference(&p->draw.index.resource,
1751 info->index.resource);
1752 }
1753 memcpy(&p->draw, info, sizeof(*info));
1754
1755 if (indirect) {
1756 tc_set_resource_reference(&p->draw.indirect->buffer, indirect->buffer);
1757 tc_set_resource_reference(&p->indirect.indirect_draw_count,
1758 indirect->indirect_draw_count);
1759 memcpy(&p->indirect, indirect, sizeof(*indirect));
1760 p->draw.indirect = &p->indirect;
1761 }
1762 }
1763 }
1764
1765 static void
1766 tc_call_launch_grid(struct pipe_context *pipe, union tc_payload *payload)
1767 {
1768 struct pipe_grid_info *p = (struct pipe_grid_info *)payload;
1769
1770 pipe->launch_grid(pipe, p);
1771 pipe_resource_reference(&p->indirect, NULL);
1772 }
1773
1774 static void
1775 tc_launch_grid(struct pipe_context *_pipe,
1776 const struct pipe_grid_info *info)
1777 {
1778 struct threaded_context *tc = threaded_context(_pipe);
1779 struct pipe_grid_info *p = tc_add_struct_typed_call(tc, TC_CALL_launch_grid,
1780 pipe_grid_info);
1781 assert(info->input == NULL);
1782
1783 tc_set_resource_reference(&p->indirect, info->indirect);
1784 memcpy(p, info, sizeof(*info));
1785 }
1786
1787 static void
1788 tc_call_resource_copy_region(struct pipe_context *pipe, union tc_payload *payload)
1789 {
1790 struct tc_resource_copy_region *p = (struct tc_resource_copy_region *)payload;
1791
1792 pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
1793 p->dstz, p->src, p->src_level, &p->src_box);
1794 pipe_resource_reference(&p->dst, NULL);
1795 pipe_resource_reference(&p->src, NULL);
1796 }
1797
1798 static void
1799 tc_resource_copy_region(struct pipe_context *_pipe,
1800 struct pipe_resource *dst, unsigned dst_level,
1801 unsigned dstx, unsigned dsty, unsigned dstz,
1802 struct pipe_resource *src, unsigned src_level,
1803 const struct pipe_box *src_box)
1804 {
1805 struct threaded_context *tc = threaded_context(_pipe);
1806 struct threaded_resource *tdst = threaded_resource(dst);
1807 struct tc_resource_copy_region *p =
1808 tc_add_struct_typed_call(tc, TC_CALL_resource_copy_region,
1809 tc_resource_copy_region);
1810
1811 tc_set_resource_reference(&p->dst, dst);
1812 p->dst_level = dst_level;
1813 p->dstx = dstx;
1814 p->dsty = dsty;
1815 p->dstz = dstz;
1816 tc_set_resource_reference(&p->src, src);
1817 p->src_level = src_level;
1818 p->src_box = *src_box;
1819
1820 if (dst->target == PIPE_BUFFER)
1821 util_range_add(&tdst->valid_buffer_range, dstx, dstx + src_box->width);
1822 }
1823
1824 static void
1825 tc_call_blit(struct pipe_context *pipe, union tc_payload *payload)
1826 {
1827 struct pipe_blit_info *blit = (struct pipe_blit_info*)payload;
1828
1829 pipe->blit(pipe, blit);
1830 pipe_resource_reference(&blit->dst.resource, NULL);
1831 pipe_resource_reference(&blit->src.resource, NULL);
1832 }
1833
1834 static void
1835 tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
1836 {
1837 struct threaded_context *tc = threaded_context(_pipe);
1838 struct pipe_blit_info *blit =
1839 tc_add_struct_typed_call(tc, TC_CALL_blit, pipe_blit_info);
1840
1841 tc_set_resource_reference(&blit->dst.resource, info->dst.resource);
1842 tc_set_resource_reference(&blit->src.resource, info->src.resource);
1843 memcpy(blit, info, sizeof(*info));
1844 }
1845
1846 struct tc_generate_mipmap {
1847 struct pipe_resource *res;
1848 enum pipe_format format;
1849 unsigned base_level;
1850 unsigned last_level;
1851 unsigned first_layer;
1852 unsigned last_layer;
1853 };
1854
1855 static void
1856 tc_call_generate_mipmap(struct pipe_context *pipe, union tc_payload *payload)
1857 {
1858 struct tc_generate_mipmap *p = (struct tc_generate_mipmap *)payload;
1859 bool result = pipe->generate_mipmap(pipe, p->res, p->format, p->base_level,
1860 p->last_level, p->first_layer,
1861 p->last_layer);
1862 assert(result);
1863 pipe_resource_reference(&p->res, NULL);
1864 }
1865
1866 static boolean
1867 tc_generate_mipmap(struct pipe_context *_pipe,
1868 struct pipe_resource *res,
1869 enum pipe_format format,
1870 unsigned base_level,
1871 unsigned last_level,
1872 unsigned first_layer,
1873 unsigned last_layer)
1874 {
1875 struct threaded_context *tc = threaded_context(_pipe);
1876 struct pipe_context *pipe = tc->pipe;
1877 struct pipe_screen *screen = pipe->screen;
1878 unsigned bind = PIPE_BIND_SAMPLER_VIEW;
1879
1880 if (util_format_is_depth_or_stencil(format))
1881 bind = PIPE_BIND_DEPTH_STENCIL;
1882 else
1883 bind = PIPE_BIND_RENDER_TARGET;
1884
1885 if (!screen->is_format_supported(screen, format, res->target,
1886 res->nr_samples, bind))
1887 return false;
1888
1889 struct tc_generate_mipmap *p =
1890 tc_add_struct_typed_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
1891
1892 tc_set_resource_reference(&p->res, res);
1893 p->format = format;
1894 p->base_level = base_level;
1895 p->last_level = last_level;
1896 p->first_layer = first_layer;
1897 p->last_layer = last_layer;
1898 return true;
1899 }
1900
1901 static void
1902 tc_call_flush_resource(struct pipe_context *pipe, union tc_payload *payload)
1903 {
1904 pipe->flush_resource(pipe, payload->resource);
1905 pipe_resource_reference(&payload->resource, NULL);
1906 }
1907
1908 static void
1909 tc_flush_resource(struct pipe_context *_pipe,
1910 struct pipe_resource *resource)
1911 {
1912 struct threaded_context *tc = threaded_context(_pipe);
1913 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_flush_resource);
1914
1915 tc_set_resource_reference(&payload->resource, resource);
1916 }
1917
1918 static void
1919 tc_call_invalidate_resource(struct pipe_context *pipe, union tc_payload *payload)
1920 {
1921 pipe->invalidate_resource(pipe, payload->resource);
1922 pipe_resource_reference(&payload->resource, NULL);
1923 }
1924
1925 static void
1926 tc_invalidate_resource(struct pipe_context *_pipe,
1927 struct pipe_resource *resource)
1928 {
1929 struct threaded_context *tc = threaded_context(_pipe);
1930
1931 if (resource->target == PIPE_BUFFER) {
1932 tc_invalidate_buffer(tc, threaded_resource(resource));
1933 return;
1934 }
1935
1936 union tc_payload *payload = tc_add_small_call(tc, TC_CALL_invalidate_resource);
1937 tc_set_resource_reference(&payload->resource, resource);
1938 }
1939
1940 struct tc_clear {
1941 unsigned buffers;
1942 union pipe_color_union color;
1943 double depth;
1944 unsigned stencil;
1945 };
1946
1947 static void
1948 tc_call_clear(struct pipe_context *pipe, union tc_payload *payload)
1949 {
1950 struct tc_clear *p = (struct tc_clear *)payload;
1951 pipe->clear(pipe, p->buffers, &p->color, p->depth, p->stencil);
1952 }
1953
1954 static void
1955 tc_clear(struct pipe_context *_pipe, unsigned buffers,
1956 const union pipe_color_union *color, double depth,
1957 unsigned stencil)
1958 {
1959 struct threaded_context *tc = threaded_context(_pipe);
1960 struct tc_clear *p = tc_add_struct_typed_call(tc, TC_CALL_clear, tc_clear);
1961
1962 p->buffers = buffers;
1963 p->color = *color;
1964 p->depth = depth;
1965 p->stencil = stencil;
1966 }
1967
1968 static void
1969 tc_clear_render_target(struct pipe_context *_pipe,
1970 struct pipe_surface *dst,
1971 const union pipe_color_union *color,
1972 unsigned dstx, unsigned dsty,
1973 unsigned width, unsigned height,
1974 bool render_condition_enabled)
1975 {
1976 struct threaded_context *tc = threaded_context(_pipe);
1977 struct pipe_context *pipe = tc->pipe;
1978
1979 tc_sync(tc);
1980 pipe->clear_render_target(pipe, dst, color, dstx, dsty, width, height,
1981 render_condition_enabled);
1982 }
1983
1984 static void
1985 tc_clear_depth_stencil(struct pipe_context *_pipe,
1986 struct pipe_surface *dst, unsigned clear_flags,
1987 double depth, unsigned stencil, unsigned dstx,
1988 unsigned dsty, unsigned width, unsigned height,
1989 bool render_condition_enabled)
1990 {
1991 struct threaded_context *tc = threaded_context(_pipe);
1992 struct pipe_context *pipe = tc->pipe;
1993
1994 tc_sync(tc);
1995 pipe->clear_depth_stencil(pipe, dst, clear_flags, depth, stencil,
1996 dstx, dsty, width, height,
1997 render_condition_enabled);
1998 }
1999
2000 struct tc_clear_buffer {
2001 struct pipe_resource *res;
2002 unsigned offset;
2003 unsigned size;
2004 char clear_value[16];
2005 int clear_value_size;
2006 };
2007
2008 static void
2009 tc_call_clear_buffer(struct pipe_context *pipe, union tc_payload *payload)
2010 {
2011 struct tc_clear_buffer *p = (struct tc_clear_buffer *)payload;
2012
2013 pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
2014 p->clear_value_size);
2015 pipe_resource_reference(&p->res, NULL);
2016 }
2017
2018 static void
2019 tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
2020 unsigned offset, unsigned size,
2021 const void *clear_value, int clear_value_size)
2022 {
2023 struct threaded_context *tc = threaded_context(_pipe);
2024 struct threaded_resource *tres = threaded_resource(res);
2025 struct tc_clear_buffer *p =
2026 tc_add_struct_typed_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
2027
2028 tc_set_resource_reference(&p->res, res);
2029 p->offset = offset;
2030 p->size = size;
2031 memcpy(p->clear_value, clear_value, clear_value_size);
2032 p->clear_value_size = clear_value_size;
2033
2034 util_range_add(&tres->valid_buffer_range, offset, offset + size);
2035 }
2036
2037 struct tc_clear_texture {
2038 struct pipe_resource *res;
2039 unsigned level;
2040 struct pipe_box box;
2041 char data[16];
2042 };
2043
2044 static void
2045 tc_call_clear_texture(struct pipe_context *pipe, union tc_payload *payload)
2046 {
2047 struct tc_clear_texture *p = (struct tc_clear_texture *)payload;
2048
2049 pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
2050 pipe_resource_reference(&p->res, NULL);
2051 }
2052
2053 static void
2054 tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
2055 unsigned level, const struct pipe_box *box, const void *data)
2056 {
2057 struct threaded_context *tc = threaded_context(_pipe);
2058 struct tc_clear_texture *p =
2059 tc_add_struct_typed_call(tc, TC_CALL_clear_texture, tc_clear_texture);
2060
2061 tc_set_resource_reference(&p->res, res);
2062 p->level = level;
2063 p->box = *box;
2064 memcpy(p->data, data,
2065 util_format_get_blocksize(res->format));
2066 }
2067
2068 struct tc_resource_commit {
2069 struct pipe_resource *res;
2070 unsigned level;
2071 struct pipe_box box;
2072 bool commit;
2073 };
2074
2075 static void
2076 tc_call_resource_commit(struct pipe_context *pipe, union tc_payload *payload)
2077 {
2078 struct tc_resource_commit *p = (struct tc_resource_commit *)payload;
2079
2080 pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
2081 pipe_resource_reference(&p->res, NULL);
2082 }
2083
2084 static bool
2085 tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
2086 unsigned level, struct pipe_box *box, bool commit)
2087 {
2088 struct threaded_context *tc = threaded_context(_pipe);
2089 struct tc_resource_commit *p =
2090 tc_add_struct_typed_call(tc, TC_CALL_resource_commit, tc_resource_commit);
2091
2092 tc_set_resource_reference(&p->res, res);
2093 p->level = level;
2094 p->box = *box;
2095 p->commit = commit;
2096 return true; /* we don't care about the return value for this call */
2097 }
2098
2099
2100 /********************************************************************
2101 * create & destroy
2102 */
2103
2104 static void
2105 tc_destroy(struct pipe_context *_pipe)
2106 {
2107 struct threaded_context *tc = threaded_context(_pipe);
2108 struct pipe_context *pipe = tc->pipe;
2109
2110 tc_sync(tc);
2111
2112 if (util_queue_is_initialized(&tc->queue)) {
2113 util_queue_destroy(&tc->queue);
2114
2115 for (unsigned i = 0; i < TC_MAX_BATCHES; i++)
2116 util_queue_fence_destroy(&tc->batch_slots[i].fence);
2117 }
2118
2119 if (tc->base.const_uploader &&
2120 tc->base.stream_uploader != tc->base.const_uploader)
2121 u_upload_destroy(tc->base.const_uploader);
2122
2123 if (tc->base.stream_uploader)
2124 u_upload_destroy(tc->base.stream_uploader);
2125
2126 slab_destroy_child(&tc->pool_transfers);
2127 pipe->destroy(pipe);
2128 os_free_aligned(tc);
2129 }
2130
2131 static const tc_execute execute_func[TC_NUM_CALLS] = {
2132 #define CALL(name) tc_call_##name,
2133 #include "u_threaded_context_calls.h"
2134 #undef CALL
2135 };
2136
2137 /**
2138 * Wrap an existing pipe_context into a threaded_context.
2139 *
2140 * \param pipe pipe_context to wrap
2141 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
2142 * transfer objects; the driver should have one
2143 * in pipe_screen.
2144 * \param replace_buffer callback for replacing a pipe_resource's storage
2145 * with another pipe_resource's storage.
2146 * \param out if successful, the threaded_context will be returned here in
2147 * addition to the return value if "out" != NULL
2148 */
2149 struct pipe_context *
2150 threaded_context_create(struct pipe_context *pipe,
2151 struct slab_parent_pool *parent_transfer_pool,
2152 tc_replace_buffer_storage_func replace_buffer,
2153 struct threaded_context **out)
2154 {
2155 struct threaded_context *tc;
2156
2157 STATIC_ASSERT(sizeof(union tc_payload) <= 8);
2158 STATIC_ASSERT(sizeof(struct tc_call) <= 16);
2159
2160 if (!pipe)
2161 return NULL;
2162
2163 util_cpu_detect();
2164
2165 if (!debug_get_bool_option("GALLIUM_THREAD", util_cpu_caps.nr_cpus > 1))
2166 return pipe;
2167
2168 tc = os_malloc_aligned(sizeof(struct threaded_context), 16);
2169 if (!tc) {
2170 pipe->destroy(pipe);
2171 return NULL;
2172 }
2173 memset(tc, 0, sizeof(*tc));
2174
2175 assert((uintptr_t)tc % 16 == 0);
2176 STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0]) % 16 == 0);
2177 STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0].call[0]) % 16 == 0);
2178 STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[0].call[1]) % 16 == 0);
2179 STATIC_ASSERT(offsetof(struct threaded_context, batch_slots[1].call[0]) % 16 == 0);
2180
2181 /* The driver context isn't wrapped, so set its "priv" to NULL. */
2182 pipe->priv = NULL;
2183
2184 tc->pipe = pipe;
2185 tc->replace_buffer_storage = replace_buffer;
2186 tc->map_buffer_alignment =
2187 pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
2188 tc->base.priv = pipe; /* priv points to the wrapped driver context */
2189 tc->base.screen = pipe->screen;
2190 tc->base.destroy = tc_destroy;
2191
2192 tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
2193 if (pipe->stream_uploader == pipe->const_uploader)
2194 tc->base.const_uploader = tc->base.stream_uploader;
2195 else
2196 tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
2197
2198 if (!tc->base.stream_uploader || !tc->base.const_uploader)
2199 goto fail;
2200
2201 /* The queue size is the number of batches "waiting". Batches are removed
2202 * from the queue before being executed, so keep one tc_batch slot for that
2203 * execution. Also, keep one unused slot for an unflushed batch.
2204 */
2205 if (!util_queue_init(&tc->queue, "gallium_drv", TC_MAX_BATCHES - 2, 1))
2206 goto fail;
2207
2208 for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
2209 tc->batch_slots[i].sentinel = TC_SENTINEL;
2210 tc->batch_slots[i].sentinel2 = TC_SENTINEL;
2211 tc->batch_slots[i].pipe = pipe;
2212 util_queue_fence_init(&tc->batch_slots[i].fence);
2213 }
2214
2215 LIST_INITHEAD(&tc->unflushed_queries);
2216
2217 slab_create_child(&tc->pool_transfers, parent_transfer_pool);
2218
2219 #define CTX_INIT(_member) \
2220 tc->base._member = tc->pipe->_member ? tc_##_member : NULL
2221
2222 CTX_INIT(flush);
2223 CTX_INIT(draw_vbo);
2224 CTX_INIT(launch_grid);
2225 CTX_INIT(resource_copy_region);
2226 CTX_INIT(blit);
2227 CTX_INIT(clear);
2228 CTX_INIT(clear_render_target);
2229 CTX_INIT(clear_depth_stencil);
2230 CTX_INIT(clear_buffer);
2231 CTX_INIT(clear_texture);
2232 CTX_INIT(flush_resource);
2233 CTX_INIT(generate_mipmap);
2234 CTX_INIT(render_condition);
2235 CTX_INIT(create_query);
2236 CTX_INIT(create_batch_query);
2237 CTX_INIT(destroy_query);
2238 CTX_INIT(begin_query);
2239 CTX_INIT(end_query);
2240 CTX_INIT(get_query_result);
2241 CTX_INIT(get_query_result_resource);
2242 CTX_INIT(set_active_query_state);
2243 CTX_INIT(create_blend_state);
2244 CTX_INIT(bind_blend_state);
2245 CTX_INIT(delete_blend_state);
2246 CTX_INIT(create_sampler_state);
2247 CTX_INIT(bind_sampler_states);
2248 CTX_INIT(delete_sampler_state);
2249 CTX_INIT(create_rasterizer_state);
2250 CTX_INIT(bind_rasterizer_state);
2251 CTX_INIT(delete_rasterizer_state);
2252 CTX_INIT(create_depth_stencil_alpha_state);
2253 CTX_INIT(bind_depth_stencil_alpha_state);
2254 CTX_INIT(delete_depth_stencil_alpha_state);
2255 CTX_INIT(create_fs_state);
2256 CTX_INIT(bind_fs_state);
2257 CTX_INIT(delete_fs_state);
2258 CTX_INIT(create_vs_state);
2259 CTX_INIT(bind_vs_state);
2260 CTX_INIT(delete_vs_state);
2261 CTX_INIT(create_gs_state);
2262 CTX_INIT(bind_gs_state);
2263 CTX_INIT(delete_gs_state);
2264 CTX_INIT(create_tcs_state);
2265 CTX_INIT(bind_tcs_state);
2266 CTX_INIT(delete_tcs_state);
2267 CTX_INIT(create_tes_state);
2268 CTX_INIT(bind_tes_state);
2269 CTX_INIT(delete_tes_state);
2270 CTX_INIT(create_compute_state);
2271 CTX_INIT(bind_compute_state);
2272 CTX_INIT(delete_compute_state);
2273 CTX_INIT(create_vertex_elements_state);
2274 CTX_INIT(bind_vertex_elements_state);
2275 CTX_INIT(delete_vertex_elements_state);
2276 CTX_INIT(set_blend_color);
2277 CTX_INIT(set_stencil_ref);
2278 CTX_INIT(set_sample_mask);
2279 CTX_INIT(set_min_samples);
2280 CTX_INIT(set_clip_state);
2281 CTX_INIT(set_constant_buffer);
2282 CTX_INIT(set_framebuffer_state);
2283 CTX_INIT(set_polygon_stipple);
2284 CTX_INIT(set_scissor_states);
2285 CTX_INIT(set_viewport_states);
2286 CTX_INIT(set_window_rectangles);
2287 CTX_INIT(set_sampler_views);
2288 CTX_INIT(set_tess_state);
2289 CTX_INIT(set_shader_buffers);
2290 CTX_INIT(set_shader_images);
2291 CTX_INIT(set_vertex_buffers);
2292 CTX_INIT(create_stream_output_target);
2293 CTX_INIT(stream_output_target_destroy);
2294 CTX_INIT(set_stream_output_targets);
2295 CTX_INIT(create_sampler_view);
2296 CTX_INIT(sampler_view_destroy);
2297 CTX_INIT(create_surface);
2298 CTX_INIT(surface_destroy);
2299 CTX_INIT(transfer_map);
2300 CTX_INIT(transfer_flush_region);
2301 CTX_INIT(transfer_unmap);
2302 CTX_INIT(buffer_subdata);
2303 CTX_INIT(texture_subdata);
2304 CTX_INIT(texture_barrier);
2305 CTX_INIT(memory_barrier);
2306 CTX_INIT(resource_commit);
2307 CTX_INIT(create_video_codec);
2308 CTX_INIT(create_video_buffer);
2309 CTX_INIT(set_compute_resources);
2310 CTX_INIT(set_global_binding);
2311 CTX_INIT(get_sample_position);
2312 CTX_INIT(invalidate_resource);
2313 CTX_INIT(get_device_reset_status);
2314 CTX_INIT(set_device_reset_callback);
2315 CTX_INIT(dump_debug_state);
2316 CTX_INIT(emit_string_marker);
2317 CTX_INIT(set_debug_callback);
2318 CTX_INIT(create_fence_fd);
2319 CTX_INIT(fence_server_sync);
2320 CTX_INIT(get_timestamp);
2321 #undef CTX_INIT
2322
2323 if (out)
2324 *out = tc;
2325
2326 return &tc->base;
2327
2328 fail:
2329 tc_destroy(&tc->base);
2330 return NULL;
2331 }