assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
}
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
}
static void
-emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
- LLVMValueRef chan_ptr,
- LLVMValueRef chan_ptr2,
- LLVMValueRef value)
+emit_store_64bit_split(struct lp_build_nir_context *bld_base,
+ LLVMValueRef value,
+ LLVMValueRef split_values[2])
{
- struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
- struct lp_build_context *float_bld = &bld_base->base;
unsigned i;
- LLVMValueRef temp, temp2;
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
int len = bld_base->base.type.length * 2;
value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
for (i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+ shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
+#endif
}
- temp = LLVMBuildShuffleVector(builder, value,
+ split_values[0] = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles,
bld_base->base.type.length),
"");
- temp2 = LLVMBuildShuffleVector(builder, value,
+ split_values[1] = LLVMBuildShuffleVector(builder, value,
LLVMGetUndef(LLVMTypeOf(value)),
LLVMConstVector(shuffles2,
bld_base->base.type.length),
"");
+}
+
+static void
+emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
+ LLVMValueRef chan_ptr,
+ LLVMValueRef chan_ptr2,
+ LLVMValueRef value)
+{
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct lp_build_context *float_bld = &bld_base->base;
+ LLVMValueRef split_vals[2];
+
+ emit_store_64bit_split(bld_base, value, split_vals);
- lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
- lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
+ lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
+ lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
}
static LLVMValueRef
unsigned bit_size,
nir_variable *var,
unsigned vertex_index,
+ LLVMValueRef indir_vertex_index,
unsigned const_index,
LLVMValueRef indir_index,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
int dmul = bit_size == 64 ? 2 : 1;
switch (deref_mode) {
- case nir_var_shader_in: {
+ case nir_var_shader_in:
for (unsigned i = 0; i < num_components; i++) {
int idx = (i * dmul) + var->data.location_frac;
if (bld->gs_iface) {
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
}
+ } else if (bld->tes_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+ LLVMValueRef result2;
+
+ if (indir_index)
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ else
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+ if (var->data.patch) {
+ result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ else {
+ result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+ indir_vertex_index ? true : false,
+ indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
+ indir_vertex_index ? true : false,
+ indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ } else if (bld->tcs_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+ if (indir_index)
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ else
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+ result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
} else {
if (indir_index) {
LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
}
}
}
- }
+ break;
+ case nir_var_shader_out:
+ if (bld->fs_iface && bld->fs_iface->fb_fetch) {
+ bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
+ return;
+ }
+ for (unsigned i = 0; i < num_components; i++) {
+ int idx = (i * dmul) + var->data.location_frac;
+ if (bld->tcs_iface) {
+ LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
+
+ if (indir_index)
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
+ else
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location);
+
+ result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+ if (bit_size == 64) {
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
+ LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
+ indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
+ indir_index ? true : false, attrib_index_val, swizzle_index_val, 0);
+ result[i] = emit_fetch_64bit(bld_base, result[i], result2);
+ }
+ }
+ }
+ break;
default:
break;
}
}
}
+static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
+ unsigned bit_size,
+ unsigned location,
+ unsigned const_index,
+ LLVMValueRef indir_vertex_index,
+ LLVMValueRef indir_index,
+ unsigned comp,
+ unsigned chan,
+ LLVMValueRef chan_val)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ unsigned swizzle = chan;
+ if (bit_size == 64) {
+ swizzle *= 2;
+ swizzle += comp;
+ if (swizzle >= 4) {
+ swizzle -= 4;
+ location++;
+ }
+ } else
+ swizzle += comp;
+ LLVMValueRef attrib_index_val;
+ LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
+
+ if (indir_index)
+ attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
+ else
+ attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
+ if (bit_size == 64) {
+ LLVMValueRef split_vals[2];
+ LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
+ emit_store_64bit_split(bld_base, chan_val, split_vals);
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index ? true : false,
+ attrib_index_val, swizzle_index_val,
+ split_vals[0], mask_vec(bld_base));
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index ? true : false,
+ attrib_index_val, swizzle_index_val2,
+ split_vals[1], mask_vec(bld_base));
+ } else {
+ chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
+ bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
+ indir_vertex_index ? true : false,
+ indir_vertex_index,
+ indir_index ? true : false,
+ attrib_index_val, swizzle_index_val,
+ chan_val, mask_vec(bld_base));
+ }
+}
+
static void emit_store_var(struct lp_build_nir_context *bld_base,
nir_variable_mode deref_mode,
- unsigned bit_size,
unsigned num_components,
+ unsigned bit_size,
+ nir_variable *var,
unsigned writemask,
+ LLVMValueRef indir_vertex_index,
unsigned const_index,
- nir_variable *var, LLVMValueRef dst)
+ LLVMValueRef indir_index,
+ LLVMValueRef dst)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
switch (deref_mode) {
case nir_var_shader_out: {
- unsigned location = var->data.driver_location + const_index;
+ unsigned location = var->data.driver_location;
unsigned comp = var->data.location_frac;
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
if (var->data.location == FRAG_RESULT_STENCIL)
else if (var->data.location == FRAG_RESULT_DEPTH)
comp = 2;
}
+
for (unsigned chan = 0; chan < num_components; chan++) {
if (writemask & (1u << chan)) {
LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
- emit_store_chan(bld_base, deref_mode, bit_size, location, comp, chan, chan_val);
+ if (bld->tcs_iface) {
+ emit_store_tcs_chan(bld_base, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
+ } else
+ emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
}
}
break;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
int nc = reg->reg->num_components;
- LLVMValueRef vals[4];
+ LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
struct lp_build_context *uint_bld = &bld_base->uint_bld;
if (reg->reg->num_array_elems) {
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
unsigned writemask,
LLVMValueRef indir_src,
LLVMValueRef reg_storage,
- LLVMValueRef dst[4])
+ LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
unsigned offset_bit_size,
bool offset_is_uniform,
LLVMValueRef offset,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
unsigned bit_size,
unsigned addr_bit_size,
LLVMValueRef addr,
- LLVMValueRef outval[4])
+ LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMAtomicRMWBinOp op;
- switch (nir_op) {
- case nir_intrinsic_global_atomic_add:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case nir_intrinsic_global_atomic_exchange:
- op = LLVMAtomicRMWBinOpXchg;
- break;
- case nir_intrinsic_global_atomic_and:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case nir_intrinsic_global_atomic_or:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case nir_intrinsic_global_atomic_xor:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case nir_intrinsic_global_atomic_umin:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case nir_intrinsic_global_atomic_umax:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case nir_intrinsic_global_atomic_imin:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case nir_intrinsic_global_atomic_imax:
- op = LLVMAtomicRMWBinOpMax;
- break;
- default:
- break;
- }
LLVMValueRef atom_res = lp_build_alloca(gallivm,
uint_bld->vec_type, "");
false);
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
} else {
+ LLVMAtomicRMWBinOp op;
+ switch (nir_op) {
+ case nir_intrinsic_global_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_global_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case nir_intrinsic_global_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_global_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_global_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_global_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_global_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_global_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_global_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ default:
+ unreachable("unknown atomic op");
+ }
+
scalar = LLVMBuildAtomicRMW(builder, op,
addr_ptr, value_ptr,
LLVMAtomicOrderingSequentiallyConsistent,
bool offset_is_uniform,
LLVMValueRef index,
LLVMValueRef offset,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
unsigned bit_size,
LLVMValueRef index,
LLVMValueRef offset,
- LLVMValueRef outval[4])
+ LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
{
struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
LLVMValueRef ssbo_ptr;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
- LLVMAtomicRMWBinOp op;
LLVMValueRef ssbo_limit = NULL;
if (index) {
} else
ssbo_ptr = bld->shared_ptr;
- switch (nir_op) {
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_ssbo_atomic_add:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_exchange:
- op = LLVMAtomicRMWBinOpXchg;
- break;
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_ssbo_atomic_and:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_ssbo_atomic_or:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_ssbo_atomic_xor:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_ssbo_atomic_umin:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_ssbo_atomic_umax:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_shared_atomic_imin:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_shared_atomic_imax:
- op = LLVMAtomicRMWBinOpMax;
- break;
- default:
- break;
- }
-
offset = lp_build_shr_imm(uint_bld, offset, 2);
LLVMValueRef atom_res = lp_build_alloca(gallivm,
uint_bld->vec_type, "");
false);
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
} else {
+ LLVMAtomicRMWBinOp op;
+
+ switch (nir_op) {
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_ssbo_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_ssbo_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_ssbo_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_shared_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_shared_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ default:
+ unreachable("unknown atomic op");
+ }
scalar = LLVMBuildAtomicRMW(builder, op,
scalar_ptr, value_ptr,
LLVMAtomicOrderingSequentiallyConsistent,
static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base,
LLVMValueRef index)
{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
struct lp_build_context *bld_broad = &bld_base->uint_bld;
- LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, bld_broad->zero, ""));
+ LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
+ LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
return lp_build_broadcast_scalar(bld_broad, size_ptr);
}
struct lp_img_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+
params->type = bld_base->base.type;
params->context_ptr = bld->context_ptr;
params->thread_data_ptr = bld->thread_data_ptr;
params->exec_mask = mask_vec(bld_base);
+
+ if (params->image_index_offset)
+ params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
+ lp_build_const_int32(gallivm, 0), "");
+
bld->image->emit_op(bld->image,
bld->bld_base.base.gallivm,
params);
struct lp_sampler_size_query_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
params->int_type = bld_base->int_bld.type;
params->context_ptr = bld->context_ptr;
+ if (params->texture_unit_offset)
+ params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
+ lp_build_const_int32(gallivm, 0), "");
bld->image->emit_size_query(bld->image,
bld->bld_base.base.gallivm,
params);
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
+ if (!bld->outputs)
+ return;
for (unsigned comp = sc; comp < slots + sc; comp++) {
unsigned this_loc = var->data.driver_location + (comp / 4);
unsigned this_chan = comp % 4;
struct lp_sampler_params *params)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
params->type = bld_base->base.type;
params->context_ptr = bld->context_ptr;
params->thread_data_ptr = bld->thread_data_ptr;
+ if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
+ /* this is horrible but this can be dynamic */
+ LLVMValueRef coords[5];
+ LLVMValueRef *orig_texel_ptr;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type),
+ LLVMGetUndef(bld_base->base.vec_type) };
+ LLVMValueRef texel[4], orig_offset;
+ unsigned i;
+ orig_texel_ptr = params->texel;
+
+ for (i = 0; i < 5; i++) {
+ coords[i] = params->coords[i];
+ }
+ orig_offset = params->texture_index_offset;
+
+ for (unsigned v = 0; v < uint_bld->type.length; v++) {
+ LLVMValueRef idx = lp_build_const_int32(gallivm, v);
+ LLVMValueRef new_coords[5];
+ for (i = 0; i < 5; i++) {
+ new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
+ coords[i], idx, "");
+ }
+ params->coords = new_coords;
+ params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
+ orig_offset,
+ idx, "");
+ params->type = lp_elem_type(bld_base->base.type);
+
+ params->texel = texel;
+ bld->sampler->emit_tex_sample(bld->sampler,
+ gallivm,
+ params);
+
+ for (i = 0; i < 4; i++) {
+ result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ orig_texel_ptr[i] = result[i];
+ }
+ return;
+ }
+
+ if (params->texture_index_offset)
+ params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+ params->texture_index_offset,
+ lp_build_const_int32(bld_base->base.gallivm, 0), "");
+
+ params->type = bld_base->base.type;
bld->sampler->emit_tex_sample(bld->sampler,
bld->bld_base.base.gallivm,
params);
params->int_type = bld_base->int_bld.type;
params->context_ptr = bld->context_ptr;
+ if (params->texture_unit_offset)
+ params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
+ params->texture_unit_offset,
+ lp_build_const_int32(bld_base->base.gallivm, 0), "");
bld->sampler->emit_size_query(bld->sampler,
bld->bld_base.base.gallivm,
params);
static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
- LLVMValueRef result[4])
+ LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct gallivm_state *gallivm = bld_base->base.gallivm;
result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), ""));
break;
case nir_intrinsic_load_invocation_id:
- result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
+ if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
+ result[0] = bld->system_values.invocation_id;
+ else
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
break;
case nir_intrinsic_load_front_face:
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
case nir_intrinsic_load_work_dim:
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
break;
+ case nir_intrinsic_load_tess_coord:
+ for (unsigned i = 0; i < 3; i++) {
+ result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
+ }
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ for (unsigned i = 0; i < 4; i++)
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ for (unsigned i = 0; i < 2; i++)
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ result[0] = bld->system_values.vertices_in;
+ break;
+ case nir_intrinsic_load_sample_id:
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ for (unsigned i = 0; i < 2; i++) {
+ LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
+ idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
+ LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
+ result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
+ }
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ result[0] = bld->system_values.sample_mask_in;
+ break;
}
}
+static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
+ LLVMValueRef *dst)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
+}
+
static void bgnloop(struct lp_build_nir_context *bld_base)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ if (stream_id >= bld->gs_vertex_streams)
+ return;
assert(bld->gs_iface->emit_vertex);
LLVMValueRef total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef mask = mask_vec(bld_base);
mask = clamp_mask_to_max_output_vertices(bld, mask,
total_emitted_vertices_vec);
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
bld->outputs,
total_emitted_vertices_vec,
+ mask,
lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
- increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
mask);
- increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
+ increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
mask);
}
static void
end_primitive_masked(struct lp_build_nir_context * bld_base,
- LLVMValueRef mask)
+ LLVMValueRef mask, uint32_t stream_id)
{
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ if (stream_id >= bld->gs_vertex_streams)
+ return;
struct lp_build_context *uint_bld = &bld_base->uint_bld;
LLVMValueRef emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef emitted_prims_vec =
- LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
LLVMValueRef total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
+ LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
PIPE_FUNC_NOTEQUAL,
uint_bld->zero);
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
- total_emitted_vertices_vec,
- emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base));
- increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
+ total_emitted_vertices_vec,
+ emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
+ increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
mask);
- clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
+ clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
mask);
}
static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
{
- struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+ ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
assert(bld->gs_iface->end_primitive);
LLVMValueRef mask = mask_vec(bld_base);
- end_primitive_masked(bld_base, mask);
+ end_primitive_masked(bld_base, mask, stream_id);
}
static void
emit_prologue(struct lp_build_nir_soa_context *bld)
{
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
- if (bld->indirects & nir_var_shader_in && !bld->gs_iface) {
+ if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
unsigned index, chan;
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
}
}
+static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
+{
+ struct gallivm_state * gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+
+ LLVMValueRef exec_mask = mask_vec(bld_base);
+ struct lp_build_loop_state loop_state;
+
+ LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
+
+ LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
+ LLVMValueRef init_val = NULL;
+ if (instr->intrinsic == nir_intrinsic_vote_ieq) {
+ /* for equal we unfortunately have to loop and find the first valid one. */
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
+
+ struct lp_build_if_state ifthen;
+ lp_build_if(&ifthen, gallivm, if_cond);
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
+ loop_state.counter, "");
+ LLVMBuildStore(builder, value_ptr, res_store);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
+ NULL, LLVMIntUGE);
+ init_val = LLVMBuildLoad(builder, res_store, "");
+ } else {
+ LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
+ }
+
+ LLVMValueRef res;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
+ loop_state.counter, "");
+ struct lp_build_if_state ifthen;
+ LLVMValueRef if_cond;
+ if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
+
+ lp_build_if(&ifthen, gallivm, if_cond);
+ res = LLVMBuildLoad(builder, res_store, "");
+
+ if (instr->intrinsic == nir_intrinsic_vote_ieq) {
+ LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
+ tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
+ res = LLVMBuildOr(builder, res, tmp, "");
+ } else if (instr->intrinsic == nir_intrinsic_vote_any)
+ res = LLVMBuildOr(builder, res, value_ptr, "");
+ else
+ res = LLVMBuildAnd(builder, res, value_ptr, "");
+ LLVMBuildStore(builder, res, res_store);
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
+ NULL, LLVMIntUGE);
+ result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
+}
+
+static void
+emit_interp_at(struct lp_build_nir_context *bld_base,
+ unsigned num_components,
+ nir_variable *var,
+ bool centroid,
+ bool sample,
+ unsigned const_index,
+ LLVMValueRef indir_index,
+ LLVMValueRef offsets[2],
+ LLVMValueRef dst[4])
+{
+ struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
+
+ for (unsigned i = 0; i < num_components; i++) {
+ dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
+ const_index + var->data.driver_location, i + var->data.location_frac,
+ centroid, sample, indir_index, offsets);
+ }
+}
+
void lp_build_nir_soa(struct gallivm_state *gallivm,
struct nir_shader *shader,
const struct lp_build_tgsi_params *params,
bld.bld_base.barrier = emit_barrier;
bld.bld_base.image_op = emit_image_op;
bld.bld_base.image_size = emit_image_size;
+ bld.bld_base.vote = emit_vote;
+ bld.bld_base.helper_invocation = emit_helper_invocation;
+ bld.bld_base.interp_at = emit_interp_at;
bld.mask = params->mask;
bld.inputs = params->inputs;
bld.indirects |= nir_var_shader_in;
bld.gs_iface = params->gs_iface;
+ bld.tcs_iface = params->tcs_iface;
+ bld.tes_iface = params->tes_iface;
+ bld.fs_iface = params->fs_iface;
if (bld.gs_iface) {
struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
+ bld.gs_vertex_streams = params->gs_vertex_streams;
bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
shader->info.gs.vertices_out);
- bld.emitted_prims_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
- bld.emitted_vertices_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
- bld.total_emitted_vertices_vec_ptr =
- lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
+ for (int i = 0; i < params->gs_vertex_streams; i++) {
+ bld.emitted_prims_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
+ bld.emitted_vertices_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
+ bld.total_emitted_vertices_vec_ptr[i] =
+ lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
+ }
}
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
LLVMValueRef total_emitted_vertices_vec;
LLVMValueRef emitted_prims_vec;
- end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask));
- total_emitted_vertices_vec =
- LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr, "");
- emitted_prims_vec =
- LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr, "");
-
- bld.gs_iface->gs_epilogue(bld.gs_iface,
- total_emitted_vertices_vec,
- emitted_prims_vec);
+
+ for (int i = 0; i < params->gs_vertex_streams; i++) {
+ end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
+
+ total_emitted_vertices_vec =
+ LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
+
+ emitted_prims_vec =
+ LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
+ bld.gs_iface->gs_epilogue(bld.gs_iface,
+ total_emitted_vertices_vec,
+ emitted_prims_vec, i);
+ }
}
lp_exec_mask_fini(&bld.exec_mask);
}