#include "util/u_inlines.h"
#include "util/u_upload_mgr.h"
#include "util/u_thread.h"
+#include "util/os_time.h"
#include <inttypes.h>
/**
dst += start_slot;
+ *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
+
if (src) {
for (i = 0; i < count; i++) {
if (src[i].buffer.resource)
/* Copy over the other members of pipe_vertex_buffer. */
memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
- *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
*enabled_buffers |= bitmask << start_slot;
}
else {
/* Unreference the buffers. */
for (i = 0; i < count; i++)
pipe_vertex_buffer_unreference(&dst[i]);
-
- *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
}
}
*dst_count = util_last_bit(enabled_buffers);
}
+/**
+ * This function is used to copy an array of pipe_shader_buffer structures,
+ * while properly referencing the pipe_shader_buffer::buffer member.
+ *
+ * \sa util_set_vertex_buffer_mask
+ */
+void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
+ uint32_t *enabled_buffers,
+ const struct pipe_shader_buffer *src,
+ unsigned start_slot, unsigned count)
+{
+ unsigned i;
+
+ dst += start_slot;
+
+ if (src) {
+ for (i = 0; i < count; i++) {
+ pipe_resource_reference(&dst[i].buffer, src[i].buffer);
+
+ if (src[i].buffer)
+ *enabled_buffers |= (1ull << (start_slot + i));
+ else
+ *enabled_buffers &= ~(1ull << (start_slot + i));
+ }
+
+ /* Copy over the other members of pipe_shader_buffer. */
+ memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
+ }
+ else {
+ /* Unreference the buffers. */
+ for (i = 0; i < count; i++)
+ pipe_resource_reference(&dst[i].buffer, NULL);
+
+ *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
+ }
+}
+
/**
* Given a user index buffer, save the structure to "saved", and upload it.
*/
util_upload_index_buffer(struct pipe_context *pipe,
const struct pipe_draw_info *info,
struct pipe_resource **out_buffer,
- unsigned *out_offset)
+ unsigned *out_offset, unsigned alignment)
{
unsigned start_offset = info->start * info->index_size;
u_upload_data(pipe->stream_uploader, start_offset,
- info->count * info->index_size, 4,
+ info->count * info->index_size, alignment,
(char*)info->index.user + start_offset,
out_offset, out_buffer);
u_upload_unmap(pipe->stream_uploader);
* pinned.
*/
void
-util_context_thread_changed(struct pipe_context *ctx, thrd_t *upper_thread)
+util_pin_driver_threads_to_random_L3(struct pipe_context *ctx,
+ thrd_t *upper_thread)
{
-#ifdef HAVE_PTHREAD
- thrd_t current = thrd_current();
- int cache = util_get_L3_for_pinned_thread(current,
- util_cpu_caps.cores_per_L3);
-
- /* If the main thread is not pinned, choose the L3 cache. */
- if (cache == -1) {
- unsigned num_caches = util_cpu_caps.nr_cpus /
+ /* If pinning has no effect, don't do anything. */
+ if (util_cpu_caps.nr_cpus == util_cpu_caps.cores_per_L3)
+ return;
+
+ unsigned num_L3_caches = util_cpu_caps.nr_cpus /
util_cpu_caps.cores_per_L3;
- static unsigned last_cache;
- /* Choose a different L3 cache for each subsequent MakeCurrent. */
- cache = p_atomic_inc_return(&last_cache) % num_caches;
- util_pin_thread_to_L3(current, cache, util_cpu_caps.cores_per_L3);
- }
+ /* Get a semi-random number. */
+ int64_t t = os_time_get_nano();
+ unsigned cache = (t ^ (t >> 8) ^ (t >> 16)) % num_L3_caches;
- /* Tell the driver to pin its threads to the same L3 cache. */
+ /* Tell the driver to pin its threads to the selected L3 cache. */
if (ctx->set_context_param) {
ctx->set_context_param(ctx, PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE,
cache);
/* Do the same for the upper level thread if there is any (e.g. glthread) */
if (upper_thread)
util_pin_thread_to_L3(*upper_thread, cache, util_cpu_caps.cores_per_L3);
+
+ /* Optionally pin the application thread to the same L3 to get maximum
+ * performance with glthread on AMD Zen. (this function is only called
+ * with glthread) This is used to estimate and remove the overhead of
+ * Infinity Fabric between L3 caches.
+ */
+#if defined(HAVE_PTHREAD)
+ if (debug_get_bool_option("pin_app_thread", false))
+ util_pin_thread_to_L3(pthread_self(), cache, util_cpu_caps.cores_per_L3);
#endif
}
" hs_invocations = %"PRIu64"\n"
" ds_invocations = %"PRIu64"\n"
" cs_invocations = %"PRIu64"\n",
- p_atomic_inc_return(&counter),
+ (unsigned)p_atomic_inc_return(&counter),
stats.ia_vertices,
stats.ia_primitives,
stats.vs_invocations,