}
}
-static LLVMValueRef get_instance_index_for_fetch(
- struct si_shader_context *ctx,
- unsigned param_start_instance, LLVMValueRef divisor)
-{
- LLVMValueRef result = ctx->abi.instance_id;
-
- /* The division must be done before START_INSTANCE is added. */
- if (divisor != ctx->i32_1)
- result = LLVMBuildUDiv(ctx->ac.builder, result, divisor, "");
-
- return LLVMBuildAdd(ctx->ac.builder, result,
- LLVMGetParam(ctx->main_fn, param_start_instance), "");
-}
-
/* Bitcast <4 x float> to <2 x double>, extract the component, and convert
* to float. */
static LLVMValueRef extract_double_to_float(struct si_shader_context *ctx,
key->vs_prolog.states.instance_divisor_is_one & (1u << i);
bool divisor_is_fetched =
key->vs_prolog.states.instance_divisor_is_fetched & (1u << i);
- LLVMValueRef index;
-
- if (divisor_is_one || divisor_is_fetched) {
- LLVMValueRef divisor = ctx->i32_1;
-
- if (divisor_is_fetched) {
- divisor = buffer_load_const(ctx, instance_divisor_constbuf,
- LLVMConstInt(ctx->i32, i * 4, 0));
- divisor = ac_to_integer(&ctx->ac, divisor);
+ LLVMValueRef index = NULL;
+
+ if (divisor_is_one) {
+ index = ctx->abi.instance_id;
+ } else if (divisor_is_fetched) {
+ LLVMValueRef udiv_factors[4];
+
+ for (unsigned j = 0; j < 4; j++) {
+ udiv_factors[j] =
+ buffer_load_const(ctx, instance_divisor_constbuf,
+ LLVMConstInt(ctx->i32, i*16 + j*4, 0));
+ udiv_factors[j] = ac_to_integer(&ctx->ac, udiv_factors[j]);
}
+ /* The faster NUW version doesn't work when InstanceID == UINT_MAX.
+ * Such InstanceID might not be achievable in a reasonable time though.
+ */
+ index = ac_build_fast_udiv_nuw(&ctx->ac, ctx->abi.instance_id,
+ udiv_factors[0], udiv_factors[1],
+ udiv_factors[2], udiv_factors[3]);
+ }
- /* InstanceID / Divisor + StartInstance */
- index = get_instance_index_for_fetch(ctx,
- user_sgpr_base +
- SI_SGPR_START_INSTANCE,
- divisor);
+ if (divisor_is_one || divisor_is_fetched) {
+ /* Add StartInstance. */
+ index = LLVMBuildAdd(ctx->ac.builder, index,
+ LLVMGetParam(ctx->main_fn, user_sgpr_base +
+ SI_SGPR_START_INSTANCE), "");
} else {
/* VertexID + BaseVertex */
index = LLVMBuildAdd(ctx->ac.builder,
#include "util/u_memory.h"
#include "util/u_resource.h"
#include "util/u_upload_mgr.h"
+#include "util/fast_idiv_by_const.h"
static unsigned si_map_swizzle(unsigned swizzle)
{
* Vertex elements & buffers
*/
+struct util_fast_udiv_info32 {
+ unsigned multiplier; /* the "magic number" multiplier */
+ unsigned pre_shift; /* shift for the dividend before multiplying */
+ unsigned post_shift; /* shift for the dividend after multiplying */
+ int increment; /* 0 or 1; if set then increment the numerator, using one of
+ the two strategies */
+};
+
+static struct util_fast_udiv_info32
+util_compute_fast_udiv_info32(uint32_t D, unsigned num_bits)
+{
+ struct util_fast_udiv_info info =
+ util_compute_fast_udiv_info(D, num_bits, 32);
+
+ struct util_fast_udiv_info32 result = {
+ info.multiplier,
+ info.pre_shift,
+ info.post_shift,
+ info.increment,
+ };
+ return result;
+}
+
static void *si_create_vertex_elements(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
struct si_screen *sscreen = (struct si_screen*)ctx->screen;
struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements);
bool used[SI_NUM_VERTEX_BUFFERS] = {};
+ struct util_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {};
+ STATIC_ASSERT(sizeof(struct util_fast_udiv_info32) == 16);
+ STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4);
+ STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4);
+ STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4);
+ STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4);
int i;
assert(count <= SI_MAX_ATTRIBS);
return NULL;
}
- if (elements[i].instance_divisor) {
+ unsigned instance_divisor = elements[i].instance_divisor;
+ if (instance_divisor) {
v->uses_instance_divisors = true;
- v->instance_divisors[i] = elements[i].instance_divisor;
- if (v->instance_divisors[i] == 1)
+ if (instance_divisor == 1) {
v->instance_divisor_is_one |= 1u << i;
- else
+ } else {
v->instance_divisor_is_fetched |= 1u << i;
+ divisor_factors[i] =
+ util_compute_fast_udiv_info32(instance_divisor, 32);
+ }
}
if (!used[vbo_index]) {
S_008F0C_NUM_FORMAT(num_format) |
S_008F0C_DATA_FORMAT(data_format);
}
+
+ if (v->instance_divisor_is_fetched) {
+ unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched);
+
+ v->instance_divisor_factor_buffer =
+ (struct r600_resource*)
+ pipe_buffer_create(&sscreen->b, 0, PIPE_USAGE_DEFAULT,
+ num_divisors * sizeof(divisor_factors[0]));
+ if (!v->instance_divisor_factor_buffer) {
+ FREE(v);
+ return NULL;
+ }
+ void *map = sscreen->ws->buffer_map(v->instance_divisor_factor_buffer->buf,
+ NULL, PIPE_TRANSFER_WRITE);
+ memcpy(map , divisor_factors, num_divisors * sizeof(divisor_factors[0]));
+ }
return v;
}
if (v && v->instance_divisor_is_fetched) {
struct pipe_constant_buffer cb;
- cb.buffer = NULL;
- cb.user_buffer = v->instance_divisors;
+ cb.buffer = &v->instance_divisor_factor_buffer->b.b;
+ cb.user_buffer = NULL;
cb.buffer_offset = 0;
- cb.buffer_size = sizeof(uint32_t) * v->count;
+ cb.buffer_size = 0xffffffff;
si_set_rw_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb);
}
}
static void si_delete_vertex_element(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
+ struct si_vertex_elements *v = (struct si_vertex_elements*)state;
if (sctx->vertex_elements == state)
sctx->vertex_elements = NULL;
+ r600_resource_reference(&v->instance_divisor_factor_buffer, NULL);
FREE(state);
}