1 /**************************************************************************
3 * Copyright 2012 Marek Olšák <maraeo@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "util/u_cpu_detect.h"
29 #include "util/u_helpers.h"
30 #include "util/u_inlines.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/u_thread.h"
33 #include "util/os_time.h"
37 * This function is used to copy an array of pipe_vertex_buffer structures,
38 * while properly referencing the pipe_vertex_buffer::buffer member.
40 * enabled_buffers is updated such that the bits corresponding to the indices
41 * of disabled buffers are set to 0 and the enabled ones are set to 1.
43 * \sa util_copy_framebuffer_state
45 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer
*dst
,
46 uint32_t *enabled_buffers
,
47 const struct pipe_vertex_buffer
*src
,
48 unsigned start_slot
, unsigned count
)
56 for (i
= 0; i
< count
; i
++) {
57 if (src
[i
].buffer
.resource
)
60 pipe_vertex_buffer_unreference(&dst
[i
]);
62 if (!src
[i
].is_user_buffer
)
63 pipe_resource_reference(&dst
[i
].buffer
.resource
, src
[i
].buffer
.resource
);
66 /* Copy over the other members of pipe_vertex_buffer. */
67 memcpy(dst
, src
, count
* sizeof(struct pipe_vertex_buffer
));
69 *enabled_buffers
&= ~(((1ull << count
) - 1) << start_slot
);
70 *enabled_buffers
|= bitmask
<< start_slot
;
73 /* Unreference the buffers. */
74 for (i
= 0; i
< count
; i
++)
75 pipe_vertex_buffer_unreference(&dst
[i
]);
77 *enabled_buffers
&= ~(((1ull << count
) - 1) << start_slot
);
82 * Same as util_set_vertex_buffers_mask, but it only returns the number
85 void util_set_vertex_buffers_count(struct pipe_vertex_buffer
*dst
,
87 const struct pipe_vertex_buffer
*src
,
88 unsigned start_slot
, unsigned count
)
91 uint32_t enabled_buffers
= 0;
93 for (i
= 0; i
< *dst_count
; i
++) {
94 if (dst
[i
].buffer
.resource
)
95 enabled_buffers
|= (1ull << i
);
98 util_set_vertex_buffers_mask(dst
, &enabled_buffers
, src
, start_slot
,
101 *dst_count
= util_last_bit(enabled_buffers
);
105 * Given a user index buffer, save the structure to "saved", and upload it.
108 util_upload_index_buffer(struct pipe_context
*pipe
,
109 const struct pipe_draw_info
*info
,
110 struct pipe_resource
**out_buffer
,
111 unsigned *out_offset
)
113 unsigned start_offset
= info
->start
* info
->index_size
;
115 u_upload_data(pipe
->stream_uploader
, start_offset
,
116 info
->count
* info
->index_size
, 4,
117 (char*)info
->index
.user
+ start_offset
,
118 out_offset
, out_buffer
);
119 u_upload_unmap(pipe
->stream_uploader
);
120 *out_offset
-= start_offset
;
121 return *out_buffer
!= NULL
;
124 #ifdef HAVE_PTHREAD_SETAFFINITY
126 static unsigned L3_cache_number
;
127 static once_flag thread_pinning_once_flag
= ONCE_FLAG_INIT
;
130 util_set_full_cpu_affinity(void)
135 for (unsigned i
= 0; i
< CPU_SETSIZE
; i
++)
138 pthread_setaffinity_np(pthread_self(), sizeof(cpuset
), &cpuset
);
142 util_init_thread_pinning(void)
144 /* Get a semi-random number. */
145 int64_t t
= os_time_get_nano();
146 L3_cache_number
= (t
^ (t
>> 8) ^ (t
>> 16));
148 /* Reset thread affinity for all child processes to prevent them from
149 * inheriting the current thread's affinity.
151 * XXX: If the driver is unloaded after this, and the app later calls
152 * fork(), the child process will likely crash before fork() returns,
153 * because the address where util_set_full_cpu_affinity was located
154 * will either be unmapped or point to random other contents.
156 pthread_atfork(NULL
, NULL
, util_set_full_cpu_affinity
);
162 * Called by MakeCurrent. Used to notify the driver that the application
163 * thread may have been changed.
165 * The function pins the current thread and driver threads to a group of
166 * CPU cores that share the same L3 cache. This is needed for good multi-
167 * threading performance on AMD Zen CPUs.
169 * \param upper_thread thread in the state tracker that also needs to be
173 util_context_thread_changed(struct pipe_context
*ctx
, thrd_t
*upper_thread
)
175 #ifdef HAVE_PTHREAD_SETAFFINITY
176 /* If pinning has no effect, don't do anything. */
177 if (util_cpu_caps
.nr_cpus
== util_cpu_caps
.cores_per_L3
)
180 thrd_t current
= thrd_current();
181 int cache
= util_get_L3_for_pinned_thread(current
,
182 util_cpu_caps
.cores_per_L3
);
184 call_once(&thread_pinning_once_flag
, util_init_thread_pinning
);
186 /* If the main thread is not pinned, choose the L3 cache. */
188 unsigned num_L3_caches
= util_cpu_caps
.nr_cpus
/
189 util_cpu_caps
.cores_per_L3
;
191 /* Choose a different L3 cache for each subsequent MakeCurrent. */
192 cache
= p_atomic_inc_return(&L3_cache_number
) % num_L3_caches
;
193 util_pin_thread_to_L3(current
, cache
, util_cpu_caps
.cores_per_L3
);
196 /* Tell the driver to pin its threads to the same L3 cache. */
197 if (ctx
->set_context_param
) {
198 ctx
->set_context_param(ctx
, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE
,
202 /* Do the same for the upper level thread if there is any (e.g. glthread) */
204 util_pin_thread_to_L3(*upper_thread
, cache
, util_cpu_caps
.cores_per_L3
);
208 /* This is a helper for hardware bring-up. Don't remove. */
210 util_begin_pipestat_query(struct pipe_context
*ctx
)
212 struct pipe_query
*q
=
213 ctx
->create_query(ctx
, PIPE_QUERY_PIPELINE_STATISTICS
, 0);
217 ctx
->begin_query(ctx
, q
);
221 /* This is a helper for hardware bring-up. Don't remove. */
223 util_end_pipestat_query(struct pipe_context
*ctx
, struct pipe_query
*q
,
226 static unsigned counter
;
227 struct pipe_query_data_pipeline_statistics stats
;
229 ctx
->end_query(ctx
, q
);
230 ctx
->get_query_result(ctx
, q
, true, (void*)&stats
);
231 ctx
->destroy_query(ctx
, q
);
235 " ia_vertices = %"PRIu64
"\n"
236 " ia_primitives = %"PRIu64
"\n"
237 " vs_invocations = %"PRIu64
"\n"
238 " gs_invocations = %"PRIu64
"\n"
239 " gs_primitives = %"PRIu64
"\n"
240 " c_invocations = %"PRIu64
"\n"
241 " c_primitives = %"PRIu64
"\n"
242 " ps_invocations = %"PRIu64
"\n"
243 " hs_invocations = %"PRIu64
"\n"
244 " ds_invocations = %"PRIu64
"\n"
245 " cs_invocations = %"PRIu64
"\n",
246 p_atomic_inc_return(&counter
),
249 stats
.vs_invocations
,
250 stats
.gs_invocations
,
254 stats
.ps_invocations
,
255 stats
.hs_invocations
,
256 stats
.ds_invocations
,
257 stats
.cs_invocations
);
260 /* This is a helper for hardware bring-up. Don't remove. */
262 util_wait_for_idle(struct pipe_context
*ctx
)
264 struct pipe_fence_handle
*fence
= NULL
;
266 ctx
->flush(ctx
, &fence
, 0);
267 ctx
->screen
->fence_finish(ctx
->screen
, NULL
, fence
, PIPE_TIMEOUT_INFINITE
);
271 util_throttle_init(struct util_throttle
*t
, uint64_t max_mem_usage
)
273 t
->max_mem_usage
= max_mem_usage
;
277 util_throttle_deinit(struct pipe_screen
*screen
, struct util_throttle
*t
)
279 for (unsigned i
= 0; i
< ARRAY_SIZE(t
->ring
); i
++)
280 screen
->fence_reference(screen
, &t
->ring
[i
].fence
, NULL
);
284 util_get_throttle_total_memory_usage(struct util_throttle
*t
)
286 uint64_t total_usage
= 0;
288 for (unsigned i
= 0; i
< ARRAY_SIZE(t
->ring
); i
++)
289 total_usage
+= t
->ring
[i
].mem_usage
;
293 static void util_dump_throttle_ring(struct util_throttle
*t
)
295 printf("Throttle:\n");
296 for (unsigned i
= 0; i
< ARRAY_SIZE(t
->ring
); i
++) {
297 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64
"%s%s\n",
298 i
, t
->ring
[i
].fence
? "yes" : " no",
299 t
->ring
[i
].mem_usage
,
300 t
->flush_index
== i
? " [flush]" : "",
301 t
->wait_index
== i
? " [wait]" : "");
306 * Notify util_throttle that the next operation allocates memory.
307 * util_throttle tracks memory usage and waits for fences until its tracked
308 * memory usage decreases.
311 * util_throttle_memory_usage(..., w*h*d*Bpp);
312 * TexSubImage(..., w, h, d, ...);
314 * This means that TexSubImage can't allocate more memory its maximum limit
315 * set during initialization.
318 util_throttle_memory_usage(struct pipe_context
*pipe
,
319 struct util_throttle
*t
, uint64_t memory_size
)
321 (void)util_dump_throttle_ring
; /* silence warning */
323 if (!t
->max_mem_usage
)
326 struct pipe_screen
*screen
= pipe
->screen
;
327 struct pipe_fence_handle
**fence
= NULL
;
328 unsigned ring_size
= ARRAY_SIZE(t
->ring
);
329 uint64_t total
= util_get_throttle_total_memory_usage(t
);
331 /* If there is not enough memory, walk the list of fences and find
332 * the latest one that we need to wait for.
334 while (t
->wait_index
!= t
->flush_index
&&
335 total
&& total
+ memory_size
> t
->max_mem_usage
) {
336 assert(t
->ring
[t
->wait_index
].fence
);
338 /* Release an older fence if we need to wait for a newer one. */
340 screen
->fence_reference(screen
, fence
, NULL
);
342 fence
= &t
->ring
[t
->wait_index
].fence
;
343 t
->ring
[t
->wait_index
].mem_usage
= 0;
344 t
->wait_index
= (t
->wait_index
+ 1) % ring_size
;
346 total
= util_get_throttle_total_memory_usage(t
);
349 /* Wait for the fence to decrease memory usage. */
351 screen
->fence_finish(screen
, pipe
, *fence
, PIPE_TIMEOUT_INFINITE
);
352 screen
->fence_reference(screen
, fence
, NULL
);
355 /* Flush and get a fence if we've exhausted memory usage for the current
358 if (t
->ring
[t
->flush_index
].mem_usage
&&
359 t
->ring
[t
->flush_index
].mem_usage
+ memory_size
>
360 t
->max_mem_usage
/ (ring_size
/ 2)) {
361 struct pipe_fence_handle
**fence
=
362 &t
->ring
[t
->flush_index
].fence
;
364 /* Expect that the current flush slot doesn't have a fence yet. */
367 pipe
->flush(pipe
, fence
, PIPE_FLUSH_ASYNC
);
368 t
->flush_index
= (t
->flush_index
+ 1) % ring_size
;
370 /* Vacate the next slot if it's occupied. This should be rare. */
371 if (t
->flush_index
== t
->wait_index
) {
372 struct pipe_fence_handle
**fence
=
373 &t
->ring
[t
->wait_index
].fence
;
375 t
->ring
[t
->wait_index
].mem_usage
= 0;
376 t
->wait_index
= (t
->wait_index
+ 1) % ring_size
;
379 screen
->fence_finish(screen
, pipe
, *fence
, PIPE_TIMEOUT_INFINITE
);
380 screen
->fence_reference(screen
, fence
, NULL
);
383 assert(!t
->ring
[t
->flush_index
].mem_usage
);
384 assert(!t
->ring
[t
->flush_index
].fence
);
387 t
->ring
[t
->flush_index
].mem_usage
+= memory_size
;