assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
}
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
}
static void
-emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
- LLVMValueRef chan_ptr,
- LLVMValueRef chan_ptr2,
- LLVMValueRef value)
+emit_store_64bit_split(struct lp_build_nir_context *bld_base,
+ LLVMValueRef value,
+ LLVMValueRef split_values[2])
{
- struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- struct lp_build_context *float_bld = &bld_base->base;
unsigned i;
- LLVMValueRef temp, temp2;
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
int len = bld_base->base.type.length * 2;
value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
for (i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+ shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
+#endif
}
- temp = LLVMBuildShuffleVector(builder, value,
+ split_values[0] = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles,
bld_base->base.type.length),
"");
- temp2 = LLVMBuildShuffleVector(builder, value,
+ split_values[1] = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles2,
bld_base->base.type.length),
"");
+}
- lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
- lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
+static void
+emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
+ LLVMValueRef chan_ptr,
+ LLVMValueRef chan_ptr2,
+ LLVMValueRef value)
+{
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct lp_build_context *float_bld = &bld_base->base;
+ LLVMValueRef split_vals[2];
+
+ emit_store_64bit_split(bld_base, value, split_vals);
+
+ lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
+ lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
}
static LLVMValueRef
unsigned bit_size,
nir_variable *var,
unsigned vertex_index,
+ LLVMValueRef indir_vertex_index,
unsigned const_index,
LLVMValueRef indir_index,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
int dmul = bit_size == 64 ? 2 : 1;
+ unsigned location = var->data.driver_location;
+ unsigned location_frac = var->data.location_frac;
+
+ if (!var->data.compact && !indir_index)
+ location += const_index;
+ else if (var->data.compact) {
+ location += const_index / 4;
+ location_frac += const_index % 4;
+ const_index = 0;
+ }
switch (deref_mode) {
- case nir_var_shader_in: {
+ case nir_var_shader_in:
for (unsigned i = 0; i < num_components; i++) {
- int idx = (i * dmul) + var->data.location_frac;
+ int idx = (i * dmul) + location_frac;
+
if (bld->gs_iface) {
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
- LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+ LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, location);
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
LLVMValueRef result2;
+
result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
if (bit_size == 64) {
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
}
+ } else if (bld->tes_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+ LLVMValueRef result2;
+
+ if (indir_index) {
+ if (var->data.compact) {
+ swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
+ attrib_index_val = lp_build_const_int32(gallivm, location);
+ } else
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ } else
+ attrib_index_val = lp_build_const_int32(gallivm, location);
+
+ if (var->data.patch) {
+ result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ else {
+ result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+ indir_vertex_index ? true : false,
+ indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+ (indir_index && var->data.compact) ? true : false, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+ indir_vertex_index ? true : false,
+ indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ } else if (bld->tcs_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+ if (indir_index) {
+ if (var->data.compact) {
+ swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
+ attrib_index_val = lp_build_const_int32(gallivm, location);
+ } else
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ } else
+ attrib_index_val = lp_build_const_int32(gallivm, location);
+ result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+ (indir_index && var->data.compact) ? true : false, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val,
+ false, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
} else {
if (indir_index) {
LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
} else {
if (bld->indirects & nir_var_shader_in) {
LLVMValueRef lindex = lp_build_const_int32(gallivm,
- var->data.driver_location * 4 + idx);
+ location * 4 + idx);
LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
bld->inputs_array, lindex);
if (bit_size == 64) {
LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
- var->data.driver_location * 4 + (idx + 1));
+ location * 4 + (idx + 1));
LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
bld->inputs_array, lindex2);
result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
} else {
if (bit_size == 64) {
LLVMValueRef tmp[2];
- tmp[0] = bld->inputs[var->data.driver_location + const_index][idx];
- tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1];
+ tmp[0] = bld->inputs[location][idx];
+ tmp[1] = bld->inputs[location][idx + 1];
result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
} else {
- result[i] = bld->inputs[var->data.driver_location + const_index][idx];
+ result[i] = bld->inputs[location][idx];
}
}
}
}
}
- }
+ break;
+ case nir_var_shader_out:
+ if (bld->fs_iface && bld->fs_iface->fb_fetch) {
+ bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
+ return;
+ }
+ for (unsigned i = 0; i < num_components; i++) {
+ int idx = (i * dmul) + location_frac;
+ if (bld->tcs_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+ if (indir_index)
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ else
+ attrib_index_val = lp_build_const_int32(gallivm, location);
+
+ result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ (indir_index && !var->data.compact) ? true : false, attrib_index_val,
+ (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val,
+ false, swizzle_index_val, 0);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ }
+ break;
default:
break;
}
}
}
+static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
+ bool is_compact,
+ unsigned bit_size,
+ unsigned location,
+ unsigned const_index,
+ LLVMValueRef indir_vertex_index,
+ LLVMValueRef indir_index,
+ unsigned comp,
+ unsigned chan,
+ LLVMValueRef chan_val)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ unsigned swizzle = chan;
+ if (bit_size == 64) {
+ swizzle *= 2;
+ swizzle += comp;
+ if (swizzle >= 4) {
+ swizzle -= 4;
+ location++;
+ }
+ } else
+ swizzle += comp;
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
+
+ if (indir_index) {
+ if (is_compact) {
+ swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
+ } else
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
+ } else
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
+ if (bit_size == 64) {
+ LLVMValueRef split_vals[2];
+ LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
+ emit_store_64bit_split(bld_base, chan_val, split_vals);
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index ? true : false,
+ attrib_index_val,
+ false, swizzle_index_val,
+ split_vals[0], mask_vec(bld_base));
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index ? true : false,
+ attrib_index_val,
+ false, swizzle_index_val2,
+ split_vals[1], mask_vec(bld_base));
+ } else {
+ chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index && !is_compact ? true : false,
+ attrib_index_val,
+ indir_index && is_compact ? true : false,
+ swizzle_index_val,
+ chan_val, mask_vec(bld_base));
+ }
+}
+
static void emit_store_var(struct lp_build_nir_context *bld_base,
nir_variable_mode deref_mode,
- unsigned bit_size,
unsigned num_components,
+ unsigned bit_size,
+ nir_variable *var,
unsigned writemask,
+ LLVMValueRef indir_vertex_index,
unsigned const_index,
- nir_variable *var, LLVMValueRef dst)
+ LLVMValueRef indir_index,
+ LLVMValueRef dst)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
switch (deref_mode) {
case nir_var_shader_out: {
- unsigned location = var->data.driver_location + const_index;
+ unsigned location = var->data.driver_location;
unsigned comp = var->data.location_frac;
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
if (var->data.location == FRAG_RESULT_STENCIL)
else if (var->data.location == FRAG_RESULT_DEPTH)
comp = 2;
}
+
+ if (var->data.compact) {
+ location += const_index / 4;
+ comp += const_index % 4;
+ const_index = 0;
+ }
+
for (unsigned chan = 0; chan < num_components; chan++) {
if (writemask & (1u << chan)) {
LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
- emit_store_chan(bld_base, deref_mode, bit_size, location, comp, chan, chan_val);
+ if (bld->tcs_iface) {
+ emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
+ } else
+ emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
}
}
break;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
int nc = reg->reg->num_components;
- LLVMValueRef vals[4];
+ LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
struct lp_build_context *uint_bld = &bld_base->uint_bld;
if (reg->reg->num_array_elems) {
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
unsigned writemask,
LLVMValueRef indir_src,
LLVMValueRef reg_storage,
- LLVMValueRef dst[4])
+ LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
}
}
+static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
+ unsigned nc,
+ unsigned bit_size,
+ unsigned offset_bit_size,
+ bool offset_is_uniform,
+ LLVMValueRef offset,
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
+{
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
+ LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
+ unsigned size_shift = 0;
+ struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
+ if (bit_size == 16)
+ size_shift = 1;
+ else if (bit_size == 32)
+ size_shift = 2;
+ else if (bit_size == 64)
+ size_shift = 3;
+ if (size_shift)
+ offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
+
+ LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
+ kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
+
+ if (offset_is_uniform) {
+ offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
+
+ for (unsigned c = 0; c < nc; c++) {
+ LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
+
+ LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
+ result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
+ }
+ }
+}
+
+static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ switch (bit_size) {
+ case 8:
+ addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+ break;
+ case 16:
+ addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
+ break;
+ case 32:
+ default:
+ addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+ break;
+ case 64:
+ addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
+ break;
+ }
+ return addr_ptr;
+}
+
+static void emit_load_global(struct lp_build_nir_context *bld_base,
+ unsigned nc,
+ unsigned bit_size,
+ unsigned addr_bit_size,
+ LLVMValueRef addr,
+ LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ struct lp_build_context *res_bld;
+
+ res_bld = get_int_bld(bld_base, true, bit_size);
+
+ for (unsigned c = 0; c < nc; c++) {
+ LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
+
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+ loop_state.counter, "");
+ addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
+
+ LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
+
+ LLVMValueRef temp_res;
+ temp_res = LLVMBuildLoad(builder, result, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, result);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ outval[c] = LLVMBuildLoad(builder, result, "");
+ }
+}
+
+static void emit_store_global(struct lp_build_nir_context *bld_base,
+ unsigned writemask,
+ unsigned nc, unsigned bit_size,
+ unsigned addr_bit_size,
+ LLVMValueRef addr,
+ LLVMValueRef dst)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+ for (unsigned c = 0; c < nc; c++) {
+ if (!(writemask & (1u << c)))
+ continue;
+ LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
+ loop_state.counter, "");
+
+ LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+ loop_state.counter, "");
+ addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
+ switch (bit_size) {
+ case 32:
+ value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+ break;
+ case 64:
+ value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
+ break;
+ default:
+ break;
+ }
+ struct lp_build_if_state ifthen;
+
+ LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+ lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ }
+}
+
+static void emit_atomic_global(struct lp_build_nir_context *bld_base,
+ nir_intrinsic_op nir_op,
+ unsigned addr_bit_size,
+ LLVMValueRef addr,
+ LLVMValueRef val, LLVMValueRef val2,
+ LLVMValueRef *result)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+
+ LLVMValueRef atom_res = lp_build_alloca(gallivm,
+ uint_bld->vec_type, "");
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
+ loop_state.counter, "");
+
+ LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
+ loop_state.counter, "");
+ addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond, temp_res;
+ LLVMValueRef scalar;
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
+ LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
+ loop_state.counter, "");
+ cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
+ scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
+ cas_src_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
+ } else {
+ LLVMAtomicRMWBinOp op;
+ switch (nir_op) {
+ case nir_intrinsic_global_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_global_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case nir_intrinsic_global_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_global_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_global_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_global_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_global_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_global_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_global_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ default:
+ unreachable("unknown atomic op");
+ }
+
+ scalar = LLVMBuildAtomicRMW(builder, op,
+ addr_ptr, value_ptr,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ }
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_else(&ifthen);
+ temp_res = LLVMBuildLoad(builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
+ LLVMBuildStore(builder, temp_res, atom_res);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
+ NULL, LLVMIntUGE);
+ *result = LLVMBuildLoad(builder, atom_res, "");
+}
+
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
unsigned nc,
unsigned bit_size,
bool offset_is_uniform,
LLVMValueRef index,
LLVMValueRef offset,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMValueRef overflow_mask;
LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
- num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), "");
num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
for (unsigned c = 0; c < nc; c++) {
LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
this_offset, num_consts);
-
result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
}
}
unsigned bit_size,
LLVMValueRef index,
LLVMValueRef offset,
- LLVMValueRef outval[4])
+ LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef ssbo_ptr;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMAtomicRMWBinOp op;
LLVMValueRef ssbo_limit = NULL;
if (index) {
} else
ssbo_ptr = bld->shared_ptr;
- switch (nir_op) {
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_ssbo_atomic_add:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_exchange:
- op = LLVMAtomicRMWBinOpXchg;
- break;
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_ssbo_atomic_and:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_ssbo_atomic_or:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_ssbo_atomic_xor:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_ssbo_atomic_umin:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_ssbo_atomic_umax:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_shared_atomic_imin:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_shared_atomic_imax:
- op = LLVMAtomicRMWBinOpMax;
- break;
- default:
- break;
- }
-
offset = lp_build_shr_imm(uint_bld, offset, 2);
LLVMValueRef atom_res = lp_build_alloca(gallivm,
uint_bld->vec_type, "");
false);
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
} else {
+ LLVMAtomicRMWBinOp op;
+
+ switch (nir_op) {
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_ssbo_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_ssbo_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_ssbo_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_shared_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_shared_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ default:
+ unreachable("unknown atomic op");
+ }
scalar = LLVMBuildAtomicRMW(builder, op,
scalar_ptr, value_ptr,
LLVMAtomicOrderingSequentiallyConsistent,
static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base,
LLVMValueRef index)
{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *bld_broad = &bld_base->uint_bld;
- LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, bld_broad->zero, ""));
+ LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
+ LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
return lp_build_broadcast_scalar(bld_broad, size_ptr);
}
struct lp_img_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
params->type = bld_base->base.type;
params->context_ptr = bld->context_ptr;
params->thread_data_ptr = bld->thread_data_ptr;
params->exec_mask = mask_vec(bld_base);
+
+ if (params->image_index_offset)
+ params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
+ lp_build_const_int32(gallivm, 0), "");
+
bld->image->emit_op(bld->image,
bld->bld_base.base.gallivm,
params);
struct lp_sampler_size_query_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
params->int_type = bld_base->int_bld.type;
params->context_ptr = bld->context_ptr;
+ if (params->texture_unit_offset)
+ params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
+ lp_build_const_int32(gallivm, 0), "");
bld->image->emit_size_query(bld->image,
bld->bld_base.base.gallivm,
params);
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
+ if (!bld->outputs)
+ return;
for (unsigned comp = sc; comp < slots + sc; comp++) {
unsigned this_loc = var->data.driver_location + (comp / 4);
unsigned this_chan = comp % 4;
struct lp_sampler_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
params->type = bld_base->base.type;
params->context_ptr = bld->context_ptr;
params->thread_data_ptr = bld->thread_data_ptr;
+ if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
+ /* this is horrible but this can be dynamic */
+ LLVMValueRef coords[5];
+ LLVMValueRef *orig_texel_ptr;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type) };
+ LLVMValueRef texel[4], orig_offset;
+ unsigned i;
+ orig_texel_ptr = params->texel;
+
+ for (i = 0; i < 5; i++) {
+ coords[i] = params->coords[i];
+ }
+ orig_offset = params->texture_index_offset;
+
+ for (unsigned v = 0; v < uint_bld->type.length; v++) {
+ LLVMValueRef idx = lp_build_const_int32(gallivm, v);
+ LLVMValueRef new_coords[5];
+ for (i = 0; i < 5; i++) {
+ new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
+ coords[i], idx, "");
+ }
+ params->coords = new_coords;
+ params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
+ orig_offset,
+ idx, "");
+ params->type = lp_elem_type(bld_base->base.type);
+
+ params->texel = texel;
+ bld->sampler->emit_tex_sample(bld->sampler,
+ gallivm,
+ params);
+
+ for (i = 0; i < 4; i++) {
+ result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ orig_texel_ptr[i] = result[i];
+ }
+ return;
+ }
+
+ if (params->texture_index_offset)
+ params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+ params->texture_index_offset,
+ lp_build_const_int32(bld_base->base.gallivm, 0), "");
+
+ params->type = bld_base->base.type;
bld->sampler->emit_tex_sample(bld->sampler,
bld->bld_base.base.gallivm,
params);
params->int_type = bld_base->int_bld.type;
params->context_ptr = bld->context_ptr;
+ if (params->texture_unit_offset)
+ params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+ params->texture_unit_offset,
+ lp_build_const_int32(bld_base->base.gallivm, 0), "");
bld->sampler->emit_size_query(bld->sampler,
bld->bld_base.base.gallivm,
params);
static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
case nir_intrinsic_load_instance_id:
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
break;
+ case nir_intrinsic_load_base_instance:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
+ break;
+ case nir_intrinsic_load_base_vertex:
+ result[0] = bld->system_values.basevertex;
+ break;
case nir_intrinsic_load_vertex_id:
result[0] = bld->system_values.vertex_id;
break;
result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""));
break;
case nir_intrinsic_load_invocation_id:
- result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+ if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
+ result[0] = bld->system_values.invocation_id;
+ else
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+ break;
+ case nir_intrinsic_load_front_face:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
+ break;
+ case nir_intrinsic_load_draw_id:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
break;
default:
break;
+ case nir_intrinsic_load_local_group_size:
+ for (unsigned i = 0; i < 3; i++)
+ result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
+ break;
+ case nir_intrinsic_load_work_dim:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
+ break;
+ case nir_intrinsic_load_tess_coord:
+ for (unsigned i = 0; i < 3; i++) {
+ result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
+ }
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ for (unsigned i = 0; i < 4; i++)
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ for (unsigned i = 0; i < 2; i++)
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ result[0] = bld->system_values.vertices_in;
+ break;
+ case nir_intrinsic_load_sample_id:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ for (unsigned i = 0; i < 2; i++) {
+ LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
+ idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
+ LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
+ }
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ result[0] = bld->system_values.sample_mask_in;
+ break;
}
}
+static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
+ LLVMValueRef *dst)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
+}
+
static void bgnloop(struct lp_build_nir_context *bld_base)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ if (stream_id >= bld->gs_vertex_streams)
+ return;
assert(bld->gs_iface->emit_vertex);
LLVMValueRef total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef mask = mask_vec(bld_base);
mask = clamp_mask_to_max_output_vertices(bld, mask,
total_emitted_vertices_vec);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
bld->outputs,
total_emitted_vertices_vec,
+ mask,
lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
- increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
mask);
- increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
+ increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
mask);
}
static void
end_primitive_masked(struct lp_build_nir_context * bld_base,
- LLVMValueRef mask)
+ LLVMValueRef mask, uint32_t stream_id)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ if (stream_id >= bld->gs_vertex_streams)
+ return;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
LLVMValueRef emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef emitted_prims_vec =
- LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
LLVMValueRef total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
PIPE_FUNC_NOTEQUAL,
uint_bld->zero);
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
- total_emitted_vertices_vec,
- emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base));
- increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
+ total_emitted_vertices_vec,
+ emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
mask);
- clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
mask);
}
static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
{
- struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
assert(bld->gs_iface->end_primitive);
LLVMValueRef mask = mask_vec(bld_base);
- end_primitive_masked(bld_base, mask);
+ end_primitive_masked(bld_base, mask, stream_id);
}
static void
emit_prologue(struct lp_build_nir_soa_context *bld)
{
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
- if (bld->indirects & nir_var_shader_in && !bld->gs_iface) {
+ if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
unsigned index, chan;
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
}
}
+static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
+{
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ struct lp_build_loop_state loop_state;
+
+ LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
+
+ LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
+ LLVMValueRef init_val = NULL;
+ if (instr->intrinsic == nir_intrinsic_vote_ieq) {
+ /* for equal we unfortunately have to loop and find the first valid one. */
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
+
+ struct lp_build_if_state ifthen;
+ lp_build_if(&ifthen, gallivm, if_cond);
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
+ loop_state.counter, "");
+ LLVMBuildStore(builder, value_ptr, res_store);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
+ NULL, LLVMIntUGE);
+ init_val = LLVMBuildLoad(builder, res_store, "");
+ } else {
+ LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
+ }
+
+ LLVMValueRef res;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
+ loop_state.counter, "");
+ struct lp_build_if_state ifthen;
+ LLVMValueRef if_cond;
+ if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
+
+ lp_build_if(&ifthen, gallivm, if_cond);
+ res = LLVMBuildLoad(builder, res_store, "");
+
+ if (instr->intrinsic == nir_intrinsic_vote_ieq) {
+ LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
+ tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
+ res = LLVMBuildOr(builder, res, tmp, "");
+ } else if (instr->intrinsic == nir_intrinsic_vote_any)
+ res = LLVMBuildOr(builder, res, value_ptr, "");
+ else
+ res = LLVMBuildAnd(builder, res, value_ptr, "");
+ LLVMBuildStore(builder, res, res_store);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
+ NULL, LLVMIntUGE);
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
+}
+
+static void
+emit_interp_at(struct lp_build_nir_context *bld_base,
+ unsigned num_components,
+ nir_variable *var,
+ bool centroid,
+ bool sample,
+ unsigned const_index,
+ LLVMValueRef indir_index,
+ LLVMValueRef offsets[2],
+ LLVMValueRef dst[4])
+{
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+
+ for (unsigned i = 0; i < num_components; i++) {
+ dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
+ const_index + var->data.driver_location, i + var->data.location_frac,
+ centroid, sample, indir_index, offsets);
+ }
+}
+
void lp_build_nir_soa(struct gallivm_state *gallivm,
struct nir_shader *shader,
const struct lp_build_tgsi_params *params,
int64_type.width *= 2;
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
}
+ {
+ struct lp_type uint16_type;
+ uint16_type = lp_uint_type(type);
+ uint16_type.width /= 2;
+ lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
+ }
+ {
+ struct lp_type int16_type;
+ int16_type = lp_int_type(type);
+ int16_type.width /= 2;
+ lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
+ }
+ {
+ struct lp_type uint8_type;
+ uint8_type = lp_uint_type(type);
+ uint8_type.width /= 4;
+ lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
+ }
+ {
+ struct lp_type int8_type;
+ int8_type = lp_int_type(type);
+ int8_type.width /= 4;
+ lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
+ }
bld.bld_base.load_var = emit_load_var;
bld.bld_base.store_var = emit_store_var;
bld.bld_base.load_reg = emit_load_reg;
bld.bld_base.store_reg = emit_store_reg;
bld.bld_base.emit_var_decl = emit_var_decl;
bld.bld_base.load_ubo = emit_load_ubo;
+ bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
+ bld.bld_base.load_global = emit_load_global;
+ bld.bld_base.store_global = emit_store_global;
+ bld.bld_base.atomic_global = emit_atomic_global;
bld.bld_base.tex = emit_tex;
bld.bld_base.tex_size = emit_tex_size;
bld.bld_base.bgnloop = bgnloop;
bld.bld_base.barrier = emit_barrier;
bld.bld_base.image_op = emit_image_op;
bld.bld_base.image_size = emit_image_size;
+ bld.bld_base.vote = emit_vote;
+ bld.bld_base.helper_invocation = emit_helper_invocation;
+ bld.bld_base.interp_at = emit_interp_at;
bld.mask = params->mask;
bld.inputs = params->inputs;
bld.image = params->image;
bld.shared_ptr = params->shared_ptr;
bld.coro = params->coro;
-
+ bld.kernel_args_ptr = params->kernel_args;
bld.indirects = 0;
if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
bld.indirects |= nir_var_shader_in;
bld.gs_iface = params->gs_iface;
+ bld.tcs_iface = params->tcs_iface;
+ bld.tes_iface = params->tes_iface;
+ bld.fs_iface = params->fs_iface;
if (bld.gs_iface) {
struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
+ bld.gs_vertex_streams = params->gs_vertex_streams;
bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
shader->info.gs.vertices_out);
- bld.emitted_prims_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
- bld.emitted_vertices_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
- bld.total_emitted_vertices_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
+ for (int i = 0; i < params->gs_vertex_streams; i++) {
+ bld.emitted_prims_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
+ bld.emitted_vertices_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
+ bld.total_emitted_vertices_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
+ }
}
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
LLVMValueRef total_emitted_vertices_vec;
LLVMValueRef emitted_prims_vec;
- end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask));
- total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr, "");
- emitted_prims_vec =
- LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr, "");
-
- bld.gs_iface->gs_epilogue(bld.gs_iface,
- total_emitted_vertices_vec,
- emitted_prims_vec);
+
+ for (int i = 0; i < params->gs_vertex_streams; i++) {
+ end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
+
+ total_emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
+
+ emitted_prims_vec =
+ LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
+ bld.gs_iface->gs_epilogue(bld.gs_iface,
+ total_emitted_vertices_vec,
+ emitted_prims_vec, i);
+ }
}
lp_exec_mask_fini(&bld.exec_mask);
}