2 * Mesa 3-D graphics library
4 * Copyright 2003 VMware, Inc.
5 * Copyright 2009 VMware, Inc.
7 * Copyright (C) 2016 Advanced Micro Devices, Inc.
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
24 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
25 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
26 * USE OR OTHER DEALINGS IN THE SOFTWARE.
29 #include "main/glheader.h"
30 #include "main/context.h"
31 #include "main/varray.h"
32 #include "main/macros.h"
33 #include "main/sse_minmax.h"
34 #include "x86/common_x86_asm.h"
35 #include "util/hash_table.h"
38 struct minmax_cache_key
{
45 struct minmax_cache_entry
{
46 struct minmax_cache_key key
;
53 vbo_minmax_cache_hash(const struct minmax_cache_key
*key
)
55 return _mesa_hash_data(key
, sizeof(*key
));
60 vbo_minmax_cache_key_equal(const struct minmax_cache_key
*a
,
61 const struct minmax_cache_key
*b
)
63 return (a
->offset
== b
->offset
) && (a
->count
== b
->count
) &&
64 (a
->index_size
== b
->index_size
);
69 vbo_minmax_cache_delete_entry(struct hash_entry
*entry
)
76 vbo_use_minmax_cache(struct gl_buffer_object
*bufferObj
)
78 if (bufferObj
->UsageHistory
& (USAGE_TEXTURE_BUFFER
|
79 USAGE_ATOMIC_COUNTER_BUFFER
|
80 USAGE_SHADER_STORAGE_BUFFER
|
81 USAGE_TRANSFORM_FEEDBACK_BUFFER
|
82 USAGE_PIXEL_PACK_BUFFER
|
83 USAGE_DISABLE_MINMAX_CACHE
))
86 if ((bufferObj
->Mappings
[MAP_USER
].AccessFlags
&
87 (GL_MAP_PERSISTENT_BIT
| GL_MAP_WRITE_BIT
)) ==
88 (GL_MAP_PERSISTENT_BIT
| GL_MAP_WRITE_BIT
))
96 vbo_delete_minmax_cache(struct gl_buffer_object
*bufferObj
)
98 _mesa_hash_table_destroy(bufferObj
->MinMaxCache
, vbo_minmax_cache_delete_entry
);
99 bufferObj
->MinMaxCache
= NULL
;
104 vbo_get_minmax_cached(struct gl_buffer_object
*bufferObj
,
105 unsigned index_size
, GLintptr offset
, GLuint count
,
106 GLuint
*min_index
, GLuint
*max_index
)
108 GLboolean found
= GL_FALSE
;
109 struct minmax_cache_key key
;
111 struct hash_entry
*result
;
113 if (!bufferObj
->MinMaxCache
)
115 if (!vbo_use_minmax_cache(bufferObj
))
118 simple_mtx_lock(&bufferObj
->MinMaxCacheMutex
);
120 if (bufferObj
->MinMaxCacheDirty
) {
121 /* Disable the cache permanently for this BO if the number of hits
122 * is asymptotically less than the number of misses. This happens when
123 * applications use the BO for streaming.
125 * However, some initial optimism allows applications that interleave
126 * draw calls with glBufferSubData during warmup.
128 unsigned optimism
= bufferObj
->Size
;
129 if (bufferObj
->MinMaxCacheMissIndices
> optimism
&&
130 bufferObj
->MinMaxCacheHitIndices
< bufferObj
->MinMaxCacheMissIndices
- optimism
) {
131 bufferObj
->UsageHistory
|= USAGE_DISABLE_MINMAX_CACHE
;
132 vbo_delete_minmax_cache(bufferObj
);
136 _mesa_hash_table_clear(bufferObj
->MinMaxCache
, vbo_minmax_cache_delete_entry
);
137 bufferObj
->MinMaxCacheDirty
= false;
141 key
.index_size
= index_size
;
144 hash
= vbo_minmax_cache_hash(&key
);
145 result
= _mesa_hash_table_search_pre_hashed(bufferObj
->MinMaxCache
, hash
, &key
);
147 struct minmax_cache_entry
*entry
= result
->data
;
148 *min_index
= entry
->min
;
149 *max_index
= entry
->max
;
155 /* The hit counter saturates so that we don't accidently disable the
156 * cache in a long-running program.
158 unsigned new_hit_count
= bufferObj
->MinMaxCacheHitIndices
+ count
;
160 if (new_hit_count
>= bufferObj
->MinMaxCacheHitIndices
)
161 bufferObj
->MinMaxCacheHitIndices
= new_hit_count
;
163 bufferObj
->MinMaxCacheHitIndices
= ~(unsigned)0;
165 bufferObj
->MinMaxCacheMissIndices
+= count
;
169 simple_mtx_unlock(&bufferObj
->MinMaxCacheMutex
);
175 vbo_minmax_cache_store(struct gl_context
*ctx
,
176 struct gl_buffer_object
*bufferObj
,
177 unsigned index_size
, GLintptr offset
, GLuint count
,
178 GLuint min
, GLuint max
)
180 struct minmax_cache_entry
*entry
;
181 struct hash_entry
*table_entry
;
184 if (!vbo_use_minmax_cache(bufferObj
))
187 simple_mtx_lock(&bufferObj
->MinMaxCacheMutex
);
189 if (!bufferObj
->MinMaxCache
) {
190 bufferObj
->MinMaxCache
=
191 _mesa_hash_table_create(NULL
,
192 (uint32_t (*)(const void *))vbo_minmax_cache_hash
,
193 (bool (*)(const void *, const void *))vbo_minmax_cache_key_equal
);
194 if (!bufferObj
->MinMaxCache
)
198 entry
= MALLOC_STRUCT(minmax_cache_entry
);
202 entry
->key
.offset
= offset
;
203 entry
->key
.count
= count
;
204 entry
->key
.index_size
= index_size
;
207 hash
= vbo_minmax_cache_hash(&entry
->key
);
209 table_entry
= _mesa_hash_table_search_pre_hashed(bufferObj
->MinMaxCache
,
212 /* It seems like this could happen when two contexts are rendering using
213 * the same buffer object from multiple threads.
215 _mesa_debug(ctx
, "duplicate entry in minmax cache\n");
220 table_entry
= _mesa_hash_table_insert_pre_hashed(bufferObj
->MinMaxCache
,
221 hash
, &entry
->key
, entry
);
226 simple_mtx_unlock(&bufferObj
->MinMaxCacheMutex
);
231 * Compute min and max elements by scanning the index buffer for
232 * glDraw[Range]Elements() calls.
233 * If primitive restart is enabled, we need to ignore restart
234 * indexes when computing min/max.
237 vbo_get_minmax_index(struct gl_context
*ctx
,
238 const struct _mesa_prim
*prim
,
239 const struct _mesa_index_buffer
*ib
,
240 GLuint
*min_index
, GLuint
*max_index
,
243 const GLboolean restart
= ctx
->Array
._PrimitiveRestart
;
244 const GLuint restartIndex
=
245 _mesa_primitive_restart_index(ctx
, ib
->index_size
);
250 indices
= (char *) ib
->ptr
+ prim
->start
* ib
->index_size
;
251 if (_mesa_is_bufferobj(ib
->obj
)) {
252 GLsizeiptr size
= MIN2(count
* ib
->index_size
, ib
->obj
->Size
);
254 if (vbo_get_minmax_cached(ib
->obj
, ib
->index_size
, (GLintptr
) indices
,
255 count
, min_index
, max_index
))
258 offset
= (GLintptr
) indices
;
259 indices
= ctx
->Driver
.MapBufferRange(ctx
, offset
, size
,
260 GL_MAP_READ_BIT
, ib
->obj
,
264 switch (ib
->index_size
) {
266 const GLuint
*ui_indices
= (const GLuint
*)indices
;
270 for (i
= 0; i
< count
; i
++) {
271 if (ui_indices
[i
] != restartIndex
) {
272 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
273 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
278 #if defined(USE_SSE41)
279 if (cpu_has_sse4_1
) {
280 _mesa_uint_array_min_max(ui_indices
, &min_ui
, &max_ui
, count
);
284 for (i
= 0; i
< count
; i
++) {
285 if (ui_indices
[i
] > max_ui
) max_ui
= ui_indices
[i
];
286 if (ui_indices
[i
] < min_ui
) min_ui
= ui_indices
[i
];
294 const GLushort
*us_indices
= (const GLushort
*)indices
;
298 for (i
= 0; i
< count
; i
++) {
299 if (us_indices
[i
] != restartIndex
) {
300 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
301 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
306 for (i
= 0; i
< count
; i
++) {
307 if (us_indices
[i
] > max_us
) max_us
= us_indices
[i
];
308 if (us_indices
[i
] < min_us
) min_us
= us_indices
[i
];
316 const GLubyte
*ub_indices
= (const GLubyte
*)indices
;
320 for (i
= 0; i
< count
; i
++) {
321 if (ub_indices
[i
] != restartIndex
) {
322 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
323 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
328 for (i
= 0; i
< count
; i
++) {
329 if (ub_indices
[i
] > max_ub
) max_ub
= ub_indices
[i
];
330 if (ub_indices
[i
] < min_ub
) min_ub
= ub_indices
[i
];
338 unreachable("not reached");
341 if (_mesa_is_bufferobj(ib
->obj
)) {
342 vbo_minmax_cache_store(ctx
, ib
->obj
, ib
->index_size
, offset
,
343 count
, *min_index
, *max_index
);
344 ctx
->Driver
.UnmapBuffer(ctx
, ib
->obj
, MAP_INTERNAL
);
349 * Compute min and max elements for nr_prims
352 vbo_get_minmax_indices(struct gl_context
*ctx
,
353 const struct _mesa_prim
*prims
,
354 const struct _mesa_index_buffer
*ib
,
359 GLuint tmp_min
, tmp_max
;
366 for (i
= 0; i
< nr_prims
; i
++) {
367 const struct _mesa_prim
*start_prim
;
369 start_prim
= &prims
[i
];
370 count
= start_prim
->count
;
371 /* Do combination if possible to reduce map/unmap count */
372 while ((i
+ 1 < nr_prims
) &&
373 (prims
[i
].start
+ prims
[i
].count
== prims
[i
+1].start
)) {
374 count
+= prims
[i
+1].count
;
377 vbo_get_minmax_index(ctx
, start_prim
, ib
, &tmp_min
, &tmp_max
, count
);
378 *min_index
= MIN2(*min_index
, tmp_min
);
379 *max_index
= MAX2(*max_index
, tmp_max
);