2 * Mesa 3-D graphics library
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
38 struct minmax_cache_key
{
45 struct minmax_cache_entry
{
46 struct minmax_cache_key key
;
53 vbo_minmax_cache_hash(const struct minmax_cache_key
*key
)
55 return _mesa_hash_data(key
, sizeof(*key
));
60 vbo_minmax_cache_key_equal(const struct minmax_cache_key
*a
,
61 const struct minmax_cache_key
*b
)
63 return (a
->offset
== b
->offset
) && (a
->count
== b
->count
) &&
64 (a
->index_size
== b
->index_size
);
69 vbo_minmax_cache_delete_entry(struct hash_entry
*entry
)
76 vbo_use_minmax_cache(struct gl_buffer_object
*bufferObj
)
78 if (bufferObj
->UsageHistory
& (USAGE_TEXTURE_BUFFER
|
79 USAGE_ATOMIC_COUNTER_BUFFER
|
80 USAGE_SHADER_STORAGE_BUFFER
|
81 USAGE_TRANSFORM_FEEDBACK_BUFFER
|
82 USAGE_PIXEL_PACK_BUFFER
|
83 USAGE_DISABLE_MINMAX_CACHE
))
86 if ((bufferObj
->Mappings
[MAP_USER
].AccessFlags
&
87 (GL_MAP_PERSISTENT_BIT
| GL_MAP_WRITE_BIT
)) ==
88 (GL_MAP_PERSISTENT_BIT
| GL_MAP_WRITE_BIT
))
96 vbo_delete_minmax_cache(struct gl_buffer_object
*bufferObj
)
98 _mesa_hash_table_destroy(bufferObj
->MinMaxCache
, vbo_minmax_cache_delete_entry
);
99 bufferObj
->MinMaxCache
= NULL
;
104 vbo_get_minmax_cached(struct gl_buffer_object
*bufferObj
,
105 unsigned index_size
, GLintptr offset
, GLuint count
,
106 GLuint
*min_index
, GLuint
*max_index
)
108 GLboolean found
= GL_FALSE
;
109 struct minmax_cache_key key
;
111 struct hash_entry
*result
;
113 if (!bufferObj
->MinMaxCache
)
115 if (!vbo_use_minmax_cache(bufferObj
))
118 simple_mtx_lock(&bufferObj
->MinMaxCacheMutex
);
120 if (bufferObj
->MinMaxCacheDirty
) {
121 /* Disable the cache permanently for this BO if the number of hits
122 * is asymptotically less than the number of misses. This happens when
123 * applications use the BO for streaming.
125 * However, some initial optimism allows applications that interleave
126 * draw calls with glBufferSubData during warmup.
128 unsigned optimism
= bufferObj
->Size
;
129 if (bufferObj
->MinMaxCacheMissIndices
> optimism
&&
130 bufferObj
->MinMaxCacheHitIndices
< bufferObj
->MinMaxCacheMissIndices
- optimism
) {
131 bufferObj
->UsageHistory
|= USAGE_DISABLE_MINMAX_CACHE
;
132 vbo_delete_minmax_cache(bufferObj
);
136 _mesa_hash_table_clear(bufferObj
->MinMaxCache
, vbo_minmax_cache_delete_entry
);
137 bufferObj
->MinMaxCacheDirty
= false;
141 key
.index_size
= index_size
;
144 hash
= vbo_minmax_cache_hash(&key
);
145 result
= _mesa_hash_table_search_pre_hashed(bufferObj
->MinMaxCache
, hash
, &key
);
147 struct minmax_cache_entry
*entry
= result
->data
;
148 *min_index
= entry
->min
;
149 *max_index
= entry
->max
;
155 /* The hit counter saturates so that we don't accidently disable the
156 * cache in a long-running program.
158 unsigned new_hit_count
= bufferObj
->MinMaxCacheHitIndices
+ count
;
160 if (new_hit_count
>= bufferObj
->MinMaxCacheHitIndices
)
161 bufferObj
->MinMaxCacheHitIndices
= new_hit_count
;
163 bufferObj
->MinMaxCacheHitIndices
= ~(unsigned)0;
165 bufferObj
->MinMaxCacheMissIndices
+= count
;
169 simple_mtx_unlock(&bufferObj
->MinMaxCacheMutex
);
175 vbo_minmax_cache_store(struct gl_context
*ctx
,
176 struct gl_buffer_object
*bufferObj
,
177 unsigned index_size
, GLintptr offset
, GLuint count
,
178 GLuint min
, GLuint max
)
180 struct minmax_cache_entry
*entry
;
181 struct hash_entry
*table_entry
;
184 if (!vbo_use_minmax_cache(bufferObj
))
187 simple_mtx_lock(&bufferObj
->MinMaxCacheMutex
);
189 if (!bufferObj
->MinMaxCache
) {
190 bufferObj
->MinMaxCache
=
191 _mesa_hash_table_create(NULL
,
192 (uint32_t (*)(const void *))vbo_minmax_cache_hash
,
193 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal
);
194 if (!bufferObj
->MinMaxCache
)
198 entry
= MALLOC_STRUCT(minmax_cache_entry
);
202 entry
->key
.offset
= offset
;
203 entry
->key
.count
= count
;
204 entry
->key
.index_size
= index_size
;
207 hash
= vbo_minmax_cache_hash(&entry
->key
);
209 table_entry
= _mesa_hash_table_search_pre_hashed(bufferObj
->MinMaxCache
,
212 /* It seems like this could happen when two contexts are rendering using
213 * the same buffer object from multiple threads.
215 _mesa_debug(ctx
, "duplicate entry in minmax cache\n");
220 table_entry
= _mesa_hash_table_insert_pre_hashed(bufferObj
->MinMaxCache
,
221 hash
, &entry
->key
, entry
);
226 simple_mtx_unlock(&bufferObj
->MinMaxCacheMutex
);
231 vbo_get_minmax_index_mapped(unsigned count
, unsigned index_size
,
232 unsigned restartIndex
, bool restart
,
234 unsigned *min_index
, unsigned *max_index
)
236 switch (index_size
) {
238 const GLuint
*ui_indices
= (const GLuint
*)indices
;
242 for (unsigned i
= 0; i
< count
; i
++) {
243 if (ui_indices
[i
] != restartIndex
) {
244 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
245 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
250 #if defined(USE_SSE41)
251 if (cpu_has_sse4_1
) {
252 _mesa_uint_array_min_max(ui_indices
, &min_ui
, &max_ui
, count
);
256 for (unsigned i
= 0; i
< count
; i
++) {
257 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
258 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
266 const GLushort
*us_indices
= (const GLushort
*)indices
;
270 for (unsigned i
= 0; i
< count
; i
++) {
271 if (us_indices
[i
] != restartIndex
) {
272 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
273 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
278 for (unsigned i
= 0; i
< count
; i
++) {
279 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
280 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
288 const GLubyte
*ub_indices
= (const GLubyte
*)indices
;
292 for (unsigned i
= 0; i
< count
; i
++) {
293 if (ub_indices
[i
] != restartIndex
) {
294 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
295 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
300 for (unsigned i
= 0; i
< count
; i
++) {
301 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
302 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
310 unreachable("not reached");
316 * Compute min and max elements by scanning the index buffer for
317 * glDraw[Range]Elements() calls.
318 * If primitive restart is enabled, we need to ignore restart
319 * indexes when computing min/max.
322 vbo_get_minmax_index(struct gl_context
*ctx
,
323 const struct _mesa_prim
*prim
,
324 const struct _mesa_index_buffer
*ib
,
325 GLuint
*min_index
, GLuint
*max_index
,
328 const GLboolean restart
= ctx
->Array
._PrimitiveRestart
;
329 const GLuint restartIndex
=
330 ctx
->Array
._RestartIndex
[(1 << ib
->index_size_shift
) - 1];
334 indices
= (char *) ib
->ptr
+ (prim
->start
<< ib
->index_size_shift
);
336 GLsizeiptr size
= MIN2(count
<< ib
->index_size_shift
, ib
->obj
->Size
);
338 if (vbo_get_minmax_cached(ib
->obj
, 1 << ib
->index_size_shift
, (GLintptr
) indices
,
339 count
, min_index
, max_index
))
342 offset
= (GLintptr
) indices
;
343 indices
= ctx
->Driver
.MapBufferRange(ctx
, offset
, size
,
344 GL_MAP_READ_BIT
, ib
->obj
,
348 vbo_get_minmax_index_mapped(count
, 1 << ib
->index_size_shift
, restartIndex
,
349 restart
, indices
, min_index
, max_index
);
352 vbo_minmax_cache_store(ctx
, ib
->obj
, 1 << ib
->index_size_shift
, offset
,
353 count
, *min_index
, *max_index
);
354 ctx
->Driver
.UnmapBuffer(ctx
, ib
->obj
, MAP_INTERNAL
);
359 * Compute min and max elements for nr_prims
362 vbo_get_minmax_indices(struct gl_context
*ctx
,
363 const struct _mesa_prim
*prims
,
364 const struct _mesa_index_buffer
*ib
,
369 GLuint tmp_min
, tmp_max
;
376 for (i
= 0; i
< nr_prims
; i
++) {
377 const struct _mesa_prim
*start_prim
;
379 start_prim
= &prims
[i
];
380 count
= start_prim
->count
;
381 /* Do combination if possible to reduce map/unmap count */
382 while ((i
+ 1 < nr_prims
) &&
383 (prims
[i
].start
+ prims
[i
].count
== prims
[i
+1].start
)) {
384 count
+= prims
[i
+1].count
;
387 vbo_get_minmax_index(ctx
, start_prim
, ib
, &tmp_min
, &tmp_max
, count
);
388 *min_index
= MIN2(*min_index
, tmp_min
);
389 *max_index
= MAX2(*max_index
, tmp_max
);