mesa: avoid warning on Windows
[mesa.git] / src / gallium / auxiliary / util / u_helpers.c
1 /**************************************************************************
2 *
3 * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "util/u_cpu_detect.h"
29 #include "util/u_helpers.h"
30 #include "util/u_inlines.h"
31 #include "util/u_upload_mgr.h"
32 #include "util/u_thread.h"
33 #include "util/os_time.h"
34 #include <inttypes.h>
35
36 /**
37 * This function is used to copy an array of pipe_vertex_buffer structures,
38 * while properly referencing the pipe_vertex_buffer::buffer member.
39 *
40 * enabled_buffers is updated such that the bits corresponding to the indices
41 * of disabled buffers are set to 0 and the enabled ones are set to 1.
42 *
43 * \sa util_copy_framebuffer_state
44 */
45 void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
46 uint32_t *enabled_buffers,
47 const struct pipe_vertex_buffer *src,
48 unsigned start_slot, unsigned count)
49 {
50 unsigned i;
51 uint32_t bitmask = 0;
52
53 dst += start_slot;
54
55 if (src) {
56 for (i = 0; i < count; i++) {
57 if (src[i].buffer.resource)
58 bitmask |= 1 << i;
59
60 pipe_vertex_buffer_unreference(&dst[i]);
61
62 if (!src[i].is_user_buffer)
63 pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
64 }
65
66 /* Copy over the other members of pipe_vertex_buffer. */
67 memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
68
69 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
70 *enabled_buffers |= bitmask << start_slot;
71 }
72 else {
73 /* Unreference the buffers. */
74 for (i = 0; i < count; i++)
75 pipe_vertex_buffer_unreference(&dst[i]);
76
77 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
78 }
79 }
80
81 /**
82 * Same as util_set_vertex_buffers_mask, but it only returns the number
83 * of bound buffers.
84 */
85 void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
86 unsigned *dst_count,
87 const struct pipe_vertex_buffer *src,
88 unsigned start_slot, unsigned count)
89 {
90 unsigned i;
91 uint32_t enabled_buffers = 0;
92
93 for (i = 0; i < *dst_count; i++) {
94 if (dst[i].buffer.resource)
95 enabled_buffers |= (1ull << i);
96 }
97
98 util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
99 count);
100
101 *dst_count = util_last_bit(enabled_buffers);
102 }
103
104 /**
105 * This function is used to copy an array of pipe_shader_buffer structures,
106 * while properly referencing the pipe_shader_buffer::buffer member.
107 *
108 * \sa util_set_vertex_buffer_mask
109 */
110 void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
111 uint32_t *enabled_buffers,
112 const struct pipe_shader_buffer *src,
113 unsigned start_slot, unsigned count)
114 {
115 unsigned i;
116
117 dst += start_slot;
118
119 if (src) {
120 for (i = 0; i < count; i++) {
121 pipe_resource_reference(&dst[i].buffer, src[i].buffer);
122
123 if (src[i].buffer)
124 *enabled_buffers |= (1ull << (start_slot + i));
125 else
126 *enabled_buffers &= ~(1ull << (start_slot + i));
127 }
128
129 /* Copy over the other members of pipe_shader_buffer. */
130 memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
131 }
132 else {
133 /* Unreference the buffers. */
134 for (i = 0; i < count; i++)
135 pipe_resource_reference(&dst[i].buffer, NULL);
136
137 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
138 }
139 }
140
141 /**
142 * Given a user index buffer, save the structure to "saved", and upload it.
143 */
144 bool
145 util_upload_index_buffer(struct pipe_context *pipe,
146 const struct pipe_draw_info *info,
147 struct pipe_resource **out_buffer,
148 unsigned *out_offset)
149 {
150 unsigned start_offset = info->start * info->index_size;
151
152 u_upload_data(pipe->stream_uploader, start_offset,
153 info->count * info->index_size, 4,
154 (char*)info->index.user + start_offset,
155 out_offset, out_buffer);
156 u_upload_unmap(pipe->stream_uploader);
157 *out_offset -= start_offset;
158 return *out_buffer != NULL;
159 }
160
161 /**
162 * Called by MakeCurrent. Used to notify the driver that the application
163 * thread may have been changed.
164 *
165 * The function pins the current thread and driver threads to a group of
166 * CPU cores that share the same L3 cache. This is needed for good multi-
167 * threading performance on AMD Zen CPUs.
168 *
169 * \param upper_thread thread in the state tracker that also needs to be
170 * pinned.
171 */
172 void
173 util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
174 thrd_t *upper_thread)
175 {
176 /* If pinning has no effect, don't do anything. */
177 if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
178 return;
179
180 unsigned num_L3_caches = util_cpu_caps.nr_cpus /
181 util_cpu_caps.cores_per_L3;
182
183 /* Get a semi-random number. */
184 int64_t t = os_time_get_nano();
185 unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
186
187 /* Tell the driver to pin its threads to the selected L3 cache. */
188 if (ctx->set_context_param) {
189 ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
190 cache);
191 }
192
193 /* Do the same for the upper level thread if there is any (e.g. glthread) */
194 if (upper_thread)
195 util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
196 }
197
198 /* This is a helper for hardware bring-up. Don't remove. */
199 struct pipe_query *
200 util_begin_pipestat_query(struct pipe_context *ctx)
201 {
202 struct pipe_query *q =
203 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
204 if (!q)
205 return NULL;
206
207 ctx->begin_query(ctx, q);
208 return q;
209 }
210
211 /* This is a helper for hardware bring-up. Don't remove. */
212 void
213 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
214 FILE *f)
215 {
216 static unsigned counter;
217 struct pipe_query_data_pipeline_statistics stats;
218
219 ctx->end_query(ctx, q);
220 ctx->get_query_result(ctx, q, true, (void*)&stats);
221 ctx->destroy_query(ctx, q);
222
223 fprintf(f,
224 "Draw call %u:\n"
225 " ia_vertices = %"PRIu64"\n"
226 " ia_primitives = %"PRIu64"\n"
227 " vs_invocations = %"PRIu64"\n"
228 " gs_invocations = %"PRIu64"\n"
229 " gs_primitives = %"PRIu64"\n"
230 " c_invocations = %"PRIu64"\n"
231 " c_primitives = %"PRIu64"\n"
232 " ps_invocations = %"PRIu64"\n"
233 " hs_invocations = %"PRIu64"\n"
234 " ds_invocations = %"PRIu64"\n"
235 " cs_invocations = %"PRIu64"\n",
236 (unsigned)p_atomic_inc_return(&counter),
237 stats.ia_vertices,
238 stats.ia_primitives,
239 stats.vs_invocations,
240 stats.gs_invocations,
241 stats.gs_primitives,
242 stats.c_invocations,
243 stats.c_primitives,
244 stats.ps_invocations,
245 stats.hs_invocations,
246 stats.ds_invocations,
247 stats.cs_invocations);
248 }
249
250 /* This is a helper for hardware bring-up. Don't remove. */
251 void
252 util_wait_for_idle(struct pipe_context *ctx)
253 {
254 struct pipe_fence_handle *fence = NULL;
255
256 ctx->flush(ctx, &fence, 0);
257 ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
258 }
259
260 void
261 util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
262 {
263 t->max_mem_usage = max_mem_usage;
264 }
265
266 void
267 util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
268 {
269 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
270 screen->fence_reference(screen, &t->ring[i].fence, NULL);
271 }
272
273 static uint64_t
274 util_get_throttle_total_memory_usage(struct util_throttle *t)
275 {
276 uint64_t total_usage = 0;
277
278 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
279 total_usage += t->ring[i].mem_usage;
280 return total_usage;
281 }
282
283 static void util_dump_throttle_ring(struct util_throttle *t)
284 {
285 printf("Throttle:\n");
286 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
287 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
288 i, t->ring[i].fence ? "yes" : " no",
289 t->ring[i].mem_usage,
290 t->flush_index == i ? " [flush]" : "",
291 t->wait_index == i ? " [wait]" : "");
292 }
293 }
294
295 /**
296 * Notify util_throttle that the next operation allocates memory.
297 * util_throttle tracks memory usage and waits for fences until its tracked
298 * memory usage decreases.
299 *
300 * Example:
301 * util_throttle_memory_usage(..., w*h*d*Bpp);
302 * TexSubImage(..., w, h, d, ...);
303 *
304 * This means that TexSubImage can't allocate more memory its maximum limit
305 * set during initialization.
306 */
307 void
308 util_throttle_memory_usage(struct pipe_context *pipe,
309 struct util_throttle *t, uint64_t memory_size)
310 {
311 (void)util_dump_throttle_ring; /* silence warning */
312
313 if (!t->max_mem_usage)
314 return;
315
316 struct pipe_screen *screen = pipe->screen;
317 struct pipe_fence_handle **fence = NULL;
318 unsigned ring_size = ARRAY_SIZE(t->ring);
319 uint64_t total = util_get_throttle_total_memory_usage(t);
320
321 /* If there is not enough memory, walk the list of fences and find
322 * the latest one that we need to wait for.
323 */
324 while (t->wait_index != t->flush_index &&
325 total && total + memory_size > t->max_mem_usage) {
326 assert(t->ring[t->wait_index].fence);
327
328 /* Release an older fence if we need to wait for a newer one. */
329 if (fence)
330 screen->fence_reference(screen, fence, NULL);
331
332 fence = &t->ring[t->wait_index].fence;
333 t->ring[t->wait_index].mem_usage = 0;
334 t->wait_index = (t->wait_index + 1) % ring_size;
335
336 total = util_get_throttle_total_memory_usage(t);
337 }
338
339 /* Wait for the fence to decrease memory usage. */
340 if (fence) {
341 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
342 screen->fence_reference(screen, fence, NULL);
343 }
344
345 /* Flush and get a fence if we've exhausted memory usage for the current
346 * slot.
347 */
348 if (t->ring[t->flush_index].mem_usage &&
349 t->ring[t->flush_index].mem_usage + memory_size >
350 t->max_mem_usage / (ring_size / 2)) {
351 struct pipe_fence_handle **fence =
352 &t->ring[t->flush_index].fence;
353
354 /* Expect that the current flush slot doesn't have a fence yet. */
355 assert(!*fence);
356
357 pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
358 t->flush_index = (t->flush_index + 1) % ring_size;
359
360 /* Vacate the next slot if it's occupied. This should be rare. */
361 if (t->flush_index == t->wait_index) {
362 struct pipe_fence_handle **fence =
363 &t->ring[t->wait_index].fence;
364
365 t->ring[t->wait_index].mem_usage = 0;
366 t->wait_index = (t->wait_index + 1) % ring_size;
367
368 assert(*fence);
369 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
370 screen->fence_reference(screen, fence, NULL);
371 }
372
373 assert(!t->ring[t->flush_index].mem_usage);
374 assert(!t->ring[t->flush_index].fence);
375 }
376
377 t->ring[t->flush_index].mem_usage += memory_size;
378 }