util: Make util_context_thread_changed a no-op on Windows.
[mesa.git] / src / gallium / auxiliary / util / u_helpers.c
1 /**************************************************************************
2 *
3 * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_cpu_detect.h"
29 #include "util/u_helpers.h"
30 #include "util/u_inlines.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/u_thread.h"
33 #include <inttypes.h>
34
35 /**
36 * This function is used to copy an array of pipe_vertex_buffer structures,
37 * while properly referencing the pipe_vertex_buffer::buffer member.
38 *
39 * enabled_buffers is updated such that the bits corresponding to the indices
40 * of disabled buffers are set to 0 and the enabled ones are set to 1.
41 *
42 * \sa util_copy_framebuffer_state
43 */
44 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
45 uint32_t *enabled_buffers,
46 const struct pipe_vertex_buffer *src,
47 unsigned start_slot, unsigned count)
48 {
49 unsigned i;
50 uint32_t bitmask = 0;
51
52 dst += start_slot;
53
54 if (src) {
55 for (i = 0; i < count; i++) {
56 if (src[i].buffer.resource)
57 bitmask |= 1 << i;
58
59 pipe_vertex_buffer_unreference(&dst[i]);
60
61 if (!src[i].is_user_buffer)
62 pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
63 }
64
65 /* Copy over the other members of pipe_vertex_buffer. */
66 memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
67
68 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
69 *enabled_buffers |= bitmask << start_slot;
70 }
71 else {
72 /* Unreference the buffers. */
73 for (i = 0; i < count; i++)
74 pipe_vertex_buffer_unreference(&dst[i]);
75
76 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
77 }
78 }
79
80 /**
81 * Same as util_set_vertex_buffers_mask, but it only returns the number
82 * of bound buffers.
83 */
84 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
85 unsigned *dst_count,
86 const struct pipe_vertex_buffer *src,
87 unsigned start_slot, unsigned count)
88 {
89 unsigned i;
90 uint32_t enabled_buffers = 0;
91
92 for (i = 0; i < *dst_count; i++) {
93 if (dst[i].buffer.resource)
94 enabled_buffers |= (1ull << i);
95 }
96
97 util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
98 count);
99
100 *dst_count = util_last_bit(enabled_buffers);
101 }
102
103 /**
104 * Given a user index buffer, save the structure to "saved", and upload it.
105 */
106 bool
107 util_upload_index_buffer(struct pipe_context *pipe,
108 const struct pipe_draw_info *info,
109 struct pipe_resource **out_buffer,
110 unsigned *out_offset)
111 {
112 unsigned start_offset = info->start * info->index_size;
113
114 u_upload_data(pipe->stream_uploader, start_offset,
115 info->count * info->index_size, 4,
116 (char*)info->index.user + start_offset,
117 out_offset, out_buffer);
118 u_upload_unmap(pipe->stream_uploader);
119 *out_offset -= start_offset;
120 return *out_buffer != NULL;
121 }
122
123 /**
124 * Called by MakeCurrent. Used to notify the driver that the application
125 * thread may have been changed.
126 *
127 * The function pins the current thread and driver threads to a group of
128 * CPU cores that share the same L3 cache. This is needed for good multi-
129 * threading performance on AMD Zen CPUs.
130 *
131 * \param upper_thread thread in the state tracker that also needs to be
132 * pinned.
133 */
134 void
135 util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
136 {
137 #ifdef HAVE_PTHREAD
138 thrd_t current = thrd_current();
139 int cache = util_get_L3_for_pinned_thread(current,
140 util_cpu_caps.cores_per_L3);
141
142 /* If the main thread is not pinned, choose the L3 cache. */
143 if (cache == -1) {
144 unsigned num_caches = util_cpu_caps.nr_cpus /
145 util_cpu_caps.cores_per_L3;
146 static unsigned last_cache;
147
148 /* Choose a different L3 cache for each subsequent MakeCurrent. */
149 cache = p_atomic_inc_return(&last_cache) % num_caches;
150 util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
151 }
152
153 /* Tell the driver to pin its threads to the same L3 cache. */
154 if (ctx->set_context_param) {
155 ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
156 cache);
157 }
158
159 /* Do the same for the upper level thread if there is any (e.g. glthread) */
160 if (upper_thread)
161 util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
162 #endif
163 }
164
165 /* This is a helper for hardware bring-up. Don't remove. */
166 struct pipe_query *
167 util_begin_pipestat_query(struct pipe_context *ctx)
168 {
169 struct pipe_query *q =
170 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
171 if (!q)
172 return NULL;
173
174 ctx->begin_query(ctx, q);
175 return q;
176 }
177
178 /* This is a helper for hardware bring-up. Don't remove. */
179 void
180 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
181 FILE *f)
182 {
183 static unsigned counter;
184 struct pipe_query_data_pipeline_statistics stats;
185
186 ctx->end_query(ctx, q);
187 ctx->get_query_result(ctx, q, true, (void*)&stats);
188 ctx->destroy_query(ctx, q);
189
190 fprintf(f,
191 "Draw call %u:\n"
192 " ia_vertices = %"PRIu64"\n"
193 " ia_primitives = %"PRIu64"\n"
194 " vs_invocations = %"PRIu64"\n"
195 " gs_invocations = %"PRIu64"\n"
196 " gs_primitives = %"PRIu64"\n"
197 " c_invocations = %"PRIu64"\n"
198 " c_primitives = %"PRIu64"\n"
199 " ps_invocations = %"PRIu64"\n"
200 " hs_invocations = %"PRIu64"\n"
201 " ds_invocations = %"PRIu64"\n"
202 " cs_invocations = %"PRIu64"\n",
203 p_atomic_inc_return(&counter),
204 stats.ia_vertices,
205 stats.ia_primitives,
206 stats.vs_invocations,
207 stats.gs_invocations,
208 stats.gs_primitives,
209 stats.c_invocations,
210 stats.c_primitives,
211 stats.ps_invocations,
212 stats.hs_invocations,
213 stats.ds_invocations,
214 stats.cs_invocations);
215 }
216
217 /* This is a helper for hardware bring-up. Don't remove. */
218 void
219 util_wait_for_idle(struct pipe_context *ctx)
220 {
221 struct pipe_fence_handle *fence = NULL;
222
223 ctx->flush(ctx, &fence, 0);
224 ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
225 }
226
227 void
228 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
229 {
230 t->max_mem_usage = max_mem_usage;
231 }
232
233 void
234 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
235 {
236 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
237 screen->fence_reference(screen, &t->ring[i].fence, NULL);
238 }
239
240 static uint64_t
241 util_get_throttle_total_memory_usage(struct util_throttle *t)
242 {
243 uint64_t total_usage = 0;
244
245 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
246 total_usage += t->ring[i].mem_usage;
247 return total_usage;
248 }
249
250 static void util_dump_throttle_ring(struct util_throttle *t)
251 {
252 printf("Throttle:\n");
253 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
254 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
255 i, t->ring[i].fence ? "yes" : " no",
256 t->ring[i].mem_usage,
257 t->flush_index == i ? " [flush]" : "",
258 t->wait_index == i ? " [wait]" : "");
259 }
260 }
261
262 /**
263 * Notify util_throttle that the next operation allocates memory.
264 * util_throttle tracks memory usage and waits for fences until its tracked
265 * memory usage decreases.
266 *
267 * Example:
268 * util_throttle_memory_usage(..., w*h*d*Bpp);
269 * TexSubImage(..., w, h, d, ...);
270 *
271 * This means that TexSubImage can't allocate more memory its maximum limit
272 * set during initialization.
273 */
274 void
275 util_throttle_memory_usage(struct pipe_context *pipe,
276 struct util_throttle *t, uint64_t memory_size)
277 {
278 (void)util_dump_throttle_ring; /* silence warning */
279
280 if (!t->max_mem_usage)
281 return;
282
283 struct pipe_screen *screen = pipe->screen;
284 struct pipe_fence_handle **fence = NULL;
285 unsigned ring_size = ARRAY_SIZE(t->ring);
286 uint64_t total = util_get_throttle_total_memory_usage(t);
287
288 /* If there is not enough memory, walk the list of fences and find
289 * the latest one that we need to wait for.
290 */
291 while (t->wait_index != t->flush_index &&
292 total && total + memory_size > t->max_mem_usage) {
293 assert(t->ring[t->wait_index].fence);
294
295 /* Release an older fence if we need to wait for a newer one. */
296 if (fence)
297 screen->fence_reference(screen, fence, NULL);
298
299 fence = &t->ring[t->wait_index].fence;
300 t->ring[t->wait_index].mem_usage = 0;
301 t->wait_index = (t->wait_index + 1) % ring_size;
302
303 total = util_get_throttle_total_memory_usage(t);
304 }
305
306 /* Wait for the fence to decrease memory usage. */
307 if (fence) {
308 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
309 screen->fence_reference(screen, fence, NULL);
310 }
311
312 /* Flush and get a fence if we've exhausted memory usage for the current
313 * slot.
314 */
315 if (t->ring[t->flush_index].mem_usage &&
316 t->ring[t->flush_index].mem_usage + memory_size >
317 t->max_mem_usage / (ring_size / 2)) {
318 struct pipe_fence_handle **fence =
319 &t->ring[t->flush_index].fence;
320
321 /* Expect that the current flush slot doesn't have a fence yet. */
322 assert(!*fence);
323
324 pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
325 t->flush_index = (t->flush_index + 1) % ring_size;
326
327 /* Vacate the next slot if it's occupied. This should be rare. */
328 if (t->flush_index == t->wait_index) {
329 struct pipe_fence_handle **fence =
330 &t->ring[t->wait_index].fence;
331
332 t->ring[t->wait_index].mem_usage = 0;
333 t->wait_index = (t->wait_index + 1) % ring_size;
334
335 assert(*fence);
336 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
337 screen->fence_reference(screen, fence, NULL);
338 }
339
340 assert(!t->ring[t->flush_index].mem_usage);
341 assert(!t->ring[t->flush_index].fence);
342 }
343
344 t->ring[t->flush_index].mem_usage += memory_size;
345 }