vbo: disable the minmax cache when the hit rate is low
[mesa.git] / src / mesa / vbo / vbo_minmax_index.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
6 * All Rights Reserved.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
36
37
38 struct minmax_cache_key {
39 GLintptr offset;
40 GLuint count;
41 GLenum type;
42 };
43
44
45 struct minmax_cache_entry {
46 struct minmax_cache_key key;
47 GLuint min;
48 GLuint max;
49 };
50
51
52 static uint32_t
53 vbo_minmax_cache_hash(const struct minmax_cache_key *key)
54 {
55 return _mesa_hash_data(key, sizeof(*key));
56 }
57
58
59 static bool
60 vbo_minmax_cache_key_equal(const struct minmax_cache_key *a,
61 const struct minmax_cache_key *b)
62 {
63 return (a->offset == b->offset) && (a->count == b->count) && (a->type == b->type);
64 }
65
66
67 static void
68 vbo_minmax_cache_delete_entry(struct hash_entry *entry)
69 {
70 free(entry->data);
71 }
72
73
74 static GLboolean
75 vbo_use_minmax_cache(struct gl_buffer_object *bufferObj)
76 {
77 if (bufferObj->UsageHistory & (USAGE_TEXTURE_BUFFER |
78 USAGE_ATOMIC_COUNTER_BUFFER |
79 USAGE_SHADER_STORAGE_BUFFER |
80 USAGE_TRANSFORM_FEEDBACK_BUFFER |
81 USAGE_PIXEL_PACK_BUFFER |
82 USAGE_DISABLE_MINMAX_CACHE))
83 return GL_FALSE;
84
85 if ((bufferObj->Mappings[MAP_USER].AccessFlags &
86 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT)) ==
87 (GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT))
88 return GL_FALSE;
89
90 return GL_TRUE;
91 }
92
93
94 void
95 vbo_delete_minmax_cache(struct gl_buffer_object *bufferObj)
96 {
97 _mesa_hash_table_destroy(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
98 bufferObj->MinMaxCache = NULL;
99 }
100
101
102 static GLboolean
103 vbo_get_minmax_cached(struct gl_buffer_object *bufferObj,
104 GLenum type, GLintptr offset, GLuint count,
105 GLuint *min_index, GLuint *max_index)
106 {
107 GLboolean found = GL_FALSE;
108 struct minmax_cache_key key;
109 uint32_t hash;
110 struct hash_entry *result;
111
112 if (!bufferObj->MinMaxCache)
113 return GL_FALSE;
114 if (!vbo_use_minmax_cache(bufferObj))
115 return GL_FALSE;
116
117 mtx_lock(&bufferObj->Mutex);
118
119 if (bufferObj->MinMaxCacheDirty) {
120 /* Disable the cache permanently for this BO if the number of hits
121 * is asymptotically less than the number of misses. This happens when
122 * applications use the BO for streaming.
123 *
124 * However, some initial optimism allows applications that interleave
125 * draw calls with glBufferSubData during warmup.
126 */
127 unsigned optimism = bufferObj->Size;
128 if (bufferObj->MinMaxCacheMissIndices > optimism &&
129 bufferObj->MinMaxCacheHitIndices < bufferObj->MinMaxCacheMissIndices - optimism) {
130 bufferObj->UsageHistory |= USAGE_DISABLE_MINMAX_CACHE;
131 vbo_delete_minmax_cache(bufferObj);
132 goto out_disable;
133 }
134
135 _mesa_hash_table_clear(bufferObj->MinMaxCache, vbo_minmax_cache_delete_entry);
136 bufferObj->MinMaxCacheDirty = false;
137 goto out_invalidate;
138 }
139
140 key.type = type;
141 key.offset = offset;
142 key.count = count;
143 hash = vbo_minmax_cache_hash(&key);
144 result = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache, hash, &key);
145 if (result) {
146 struct minmax_cache_entry *entry = result->data;
147 *min_index = entry->min;
148 *max_index = entry->max;
149 found = GL_TRUE;
150 }
151
152 out_invalidate:
153 if (found) {
154 /* The hit counter saturates so that we don't accidently disable the
155 * cache in a long-running program.
156 */
157 unsigned new_hit_count = bufferObj->MinMaxCacheHitIndices + count;
158
159 if (new_hit_count >= bufferObj->MinMaxCacheHitIndices)
160 bufferObj->MinMaxCacheHitIndices = new_hit_count;
161 else
162 bufferObj->MinMaxCacheHitIndices = ~(unsigned)0;
163 } else {
164 bufferObj->MinMaxCacheMissIndices += count;
165 }
166
167 out_disable:
168 mtx_unlock(&bufferObj->Mutex);
169 return found;
170 }
171
172
173 static void
174 vbo_minmax_cache_store(struct gl_context *ctx,
175 struct gl_buffer_object *bufferObj,
176 GLenum type, GLintptr offset, GLuint count,
177 GLuint min, GLuint max)
178 {
179 struct minmax_cache_entry *entry;
180 struct hash_entry *table_entry;
181 uint32_t hash;
182
183 if (!vbo_use_minmax_cache(bufferObj))
184 return;
185
186 mtx_lock(&bufferObj->Mutex);
187
188 if (!bufferObj->MinMaxCache) {
189 bufferObj->MinMaxCache =
190 _mesa_hash_table_create(NULL,
191 (uint32_t (*)(const void *))vbo_minmax_cache_hash,
192 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal);
193 if (!bufferObj->MinMaxCache)
194 goto out;
195 }
196
197 entry = MALLOC_STRUCT(minmax_cache_entry);
198 if (!entry)
199 goto out;
200
201 entry->key.offset = offset;
202 entry->key.count = count;
203 entry->key.type = type;
204 entry->min = min;
205 entry->max = max;
206 hash = vbo_minmax_cache_hash(&entry->key);
207
208 table_entry = _mesa_hash_table_search_pre_hashed(bufferObj->MinMaxCache,
209 hash, &entry->key);
210 if (table_entry) {
211 /* It seems like this could happen when two contexts are rendering using
212 * the same buffer object from multiple threads.
213 */
214 _mesa_debug(ctx, "duplicate entry in minmax cache\n");
215 free(entry);
216 goto out;
217 }
218
219 table_entry = _mesa_hash_table_insert_pre_hashed(bufferObj->MinMaxCache,
220 hash, &entry->key, entry);
221 if (!table_entry)
222 free(entry);
223
224 out:
225 mtx_unlock(&bufferObj->Mutex);
226 }
227
228
229 /**
230 * Compute min and max elements by scanning the index buffer for
231 * glDraw[Range]Elements() calls.
232 * If primitive restart is enabled, we need to ignore restart
233 * indexes when computing min/max.
234 */
235 static void
236 vbo_get_minmax_index(struct gl_context *ctx,
237 const struct _mesa_prim *prim,
238 const struct _mesa_index_buffer *ib,
239 GLuint *min_index, GLuint *max_index,
240 const GLuint count)
241 {
242 const GLboolean restart = ctx->Array._PrimitiveRestart;
243 const GLuint restartIndex = _mesa_primitive_restart_index(ctx, ib->type);
244 const int index_size = vbo_sizeof_ib_type(ib->type);
245 const char *indices;
246 GLuint i;
247
248 indices = (char *) ib->ptr + prim->start * index_size;
249 if (_mesa_is_bufferobj(ib->obj)) {
250 GLsizeiptr size = MIN2(count * index_size, ib->obj->Size);
251
252 if (vbo_get_minmax_cached(ib->obj, ib->type, (GLintptr) indices, count,
253 min_index, max_index))
254 return;
255
256 indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
257 GL_MAP_READ_BIT, ib->obj,
258 MAP_INTERNAL);
259 }
260
261 switch (ib->type) {
262 case GL_UNSIGNED_INT: {
263 const GLuint *ui_indices = (const GLuint *)indices;
264 GLuint max_ui = 0;
265 GLuint min_ui = ~0U;
266 if (restart) {
267 for (i = 0; i < count; i++) {
268 if (ui_indices[i] != restartIndex) {
269 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
270 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
271 }
272 }
273 }
274 else {
275 #if defined(USE_SSE41)
276 if (cpu_has_sse4_1) {
277 _mesa_uint_array_min_max(ui_indices, &min_ui, &max_ui, count);
278 }
279 else
280 #endif
281 for (i = 0; i < count; i++) {
282 if (ui_indices[i] > max_ui) max_ui = ui_indices[i];
283 if (ui_indices[i] < min_ui) min_ui = ui_indices[i];
284 }
285 }
286 *min_index = min_ui;
287 *max_index = max_ui;
288 break;
289 }
290 case GL_UNSIGNED_SHORT: {
291 const GLushort *us_indices = (const GLushort *)indices;
292 GLuint max_us = 0;
293 GLuint min_us = ~0U;
294 if (restart) {
295 for (i = 0; i < count; i++) {
296 if (us_indices[i] != restartIndex) {
297 if (us_indices[i] > max_us) max_us = us_indices[i];
298 if (us_indices[i] < min_us) min_us = us_indices[i];
299 }
300 }
301 }
302 else {
303 for (i = 0; i < count; i++) {
304 if (us_indices[i] > max_us) max_us = us_indices[i];
305 if (us_indices[i] < min_us) min_us = us_indices[i];
306 }
307 }
308 *min_index = min_us;
309 *max_index = max_us;
310 break;
311 }
312 case GL_UNSIGNED_BYTE: {
313 const GLubyte *ub_indices = (const GLubyte *)indices;
314 GLuint max_ub = 0;
315 GLuint min_ub = ~0U;
316 if (restart) {
317 for (i = 0; i < count; i++) {
318 if (ub_indices[i] != restartIndex) {
319 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
320 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
321 }
322 }
323 }
324 else {
325 for (i = 0; i < count; i++) {
326 if (ub_indices[i] > max_ub) max_ub = ub_indices[i];
327 if (ub_indices[i] < min_ub) min_ub = ub_indices[i];
328 }
329 }
330 *min_index = min_ub;
331 *max_index = max_ub;
332 break;
333 }
334 default:
335 unreachable("not reached");
336 }
337
338 if (_mesa_is_bufferobj(ib->obj)) {
339 vbo_minmax_cache_store(ctx, ib->obj, ib->type, prim->start, count,
340 *min_index, *max_index);
341 ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
342 }
343 }
344
345 /**
346 * Compute min and max elements for nr_prims
347 */
348 void
349 vbo_get_minmax_indices(struct gl_context *ctx,
350 const struct _mesa_prim *prims,
351 const struct _mesa_index_buffer *ib,
352 GLuint *min_index,
353 GLuint *max_index,
354 GLuint nr_prims)
355 {
356 GLuint tmp_min, tmp_max;
357 GLuint i;
358 GLuint count;
359
360 *min_index = ~0;
361 *max_index = 0;
362
363 for (i = 0; i < nr_prims; i++) {
364 const struct _mesa_prim *start_prim;
365
366 start_prim = &prims[i];
367 count = start_prim->count;
368 /* Do combination if possible to reduce map/unmap count */
369 while ((i + 1 < nr_prims) &&
370 (prims[i].start + prims[i].count == prims[i+1].start)) {
371 count += prims[i+1].count;
372 i++;
373 }
374 vbo_get_minmax_index(ctx, start_prim, ib, &tmp_min, &tmp_max, count);
375 *min_index = MIN2(*min_index, tmp_min);
376 *max_index = MAX2(*max_index, tmp_max);
377 }
378 }