LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
int len = bld_base->base.type.length * 2;
for (unsigned i = 0; i < bld_base->base.type.length; i++) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
+ shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
+#endif
}
src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
+#if UTIL_ARCH_LITTLE_ENDIAN
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+#else
+ shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
+#endif
}
return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
}
return LLVMBuildOr(builder, div_mask, result, "");
}
+static LLVMValueRef
+do_quantize_to_f16(struct lp_build_nir_context *bld_base,
+ LLVMValueRef src)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef result;
+ result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+ result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
+ return result;
+}
+
static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
{
case nir_op_fmax:
result = lp_build_max(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
break;
- case nir_op_fne32:
+ case nir_op_fneu32:
result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
break;
case nir_op_fneg:
case nir_op_fpow:
result = lp_build_pow(&bld_base->base, src[0], src[1]);
break;
+ case nir_op_fquantize2f16:
+ result = do_quantize_to_f16(bld_base, src[0]);
+ break;
case nir_op_frcp:
result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
src[0], src[1]);
break;
+ case nir_op_imod:
case nir_op_irem:
result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
break;
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
for (unsigned i = 0; i < instr->def.num_components; i++)
- result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64);
+ result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
assign_ssa_dest(bld_base, &instr->def, result);
}
uint32_t const_offset = 0;
LLVMValueRef offset = NULL;
- if (var->data.compact) {
+ if (var->data.compact && nir_src_is_const(instr->arr.index)) {
assert(instr->deref_type == nir_deref_type_array);
const_offset = nir_src_as_uint(instr->arr.index);
goto out;
offset_is_uniform, idx, offset, result);
}
+static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
+ nir_intrinsic_instr *instr,
+ LLVMValueRef result[4])
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+ LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
+ bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
+
+ bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+ offset_is_uniform, idx, offset, result);
+}
+
static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
LLVMValueRef coords[5];
struct lp_img_params params;
const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned const_index;
+ LLVMValueRef indir_index;
+ get_deref_offset(bld_base, deref, false, NULL, NULL,
+ &const_index, &indir_index);
memset(¶ms, 0, sizeof(params));
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
params.img_op = LP_IMG_LOAD;
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
params.ms_index = get_src(bld_base, instr->src[2]);
- params.image_index = var->data.binding;
+ params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+ params.image_index_offset = indir_index;
bld_base->image_op(bld_base, ¶ms);
}
LLVMValueRef coords[5];
struct lp_img_params params;
const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned const_index;
+ LLVMValueRef indir_index;
+ get_deref_offset(bld_base, deref, false, NULL, NULL,
+ &const_index, &indir_index);
memset(¶ms, 0, sizeof(params));
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
params.ms_index = get_src(bld_base, instr->src[2]);
params.img_op = LP_IMG_STORE;
- params.image_index = var->data.binding;
+ params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+ params.image_index_offset = indir_index;
if (params.target == PIPE_TEXTURE_1D_ARRAY)
coords[2] = coords[1];
LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
LLVMValueRef coords[5];
const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned const_index;
+ LLVMValueRef indir_index;
+ get_deref_offset(bld_base, deref, false, NULL, NULL,
+ &const_index, &indir_index);
memset(¶ms, 0, sizeof(params));
params.outdata = result;
params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
- params.image_index = var->data.binding;
+ params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+ params.image_index_offset = indir_index;
bld_base->image_op(bld_base, ¶ms);
}
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
struct lp_sampler_size_query_params params = { 0 };
- params.texture_unit = var->data.binding;
- params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type));
+ unsigned const_index;
+ LLVMValueRef indir_index;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ get_deref_offset(bld_base, deref, false, NULL, NULL,
+ &const_index, &indir_index);
+ params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+ params.texture_unit_offset = indir_index;
+ params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
params.sizes_out = result;
bld_base->image_size(bld_base, ¶ms);
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
struct lp_sampler_size_query_params params = { 0 };
- params.texture_unit = var->data.binding;
- params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type));
+ unsigned const_index;
+ LLVMValueRef indir_index;
+ const struct glsl_type *type = glsl_without_array(var->type);
+ get_deref_offset(bld_base, deref, false, NULL, NULL,
+ &const_index, &indir_index);
+
+ params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+ params.texture_unit_offset = indir_index;
+ params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
params.sizes_out = result;
params.samples_only = true;
case nir_intrinsic_load_ubo:
visit_load_ubo(bld_base, instr, result);
break;
+ case nir_intrinsic_load_push_constant:
+ visit_load_push_constant(bld_base, instr, result);
+ break;
case nir_intrinsic_load_ssbo:
visit_load_ssbo(bld_base, instr, result);
break;
case nir_intrinsic_control_barrier:
visit_barrier(bld_base);
break;
+ case nir_intrinsic_group_memory_barrier:
case nir_intrinsic_memory_barrier:
case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_global_atomic_exchange:
case nir_intrinsic_global_atomic_comp_swap:
visit_global_atomic(bld_base, instr, result);
+ break;
case nir_intrinsic_vote_all:
case nir_intrinsic_vote_any:
case nir_intrinsic_vote_ieq:
struct lp_sampler_size_query_params params = { 0 };
LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
LLVMValueRef explicit_lod = NULL;
-
+ LLVMValueRef texture_unit_offset = NULL;
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_lod:
explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
break;
+ case nir_tex_src_texture_offset:
+ texture_unit_offset = get_src(bld_base, instr->src[i].src);
+ break;
default:
break;
}
params.is_sviewinfo = TRUE;
params.sizes_out = sizes_out;
params.samples_only = (instr->op == nir_texop_texture_samples);
+ params.texture_unit_offset = texture_unit_offset;
if (instr->op == nir_texop_query_levels)
params.explicit_lod = bld_base->uint_bld.zero;
unsigned sample_key = 0;
nir_deref_instr *texture_deref_instr = NULL;
nir_deref_instr *sampler_deref_instr = NULL;
+ LLVMValueRef texture_unit_offset = NULL;
LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
unsigned lod_src = 0;
LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
sample_key |= LP_SAMPLER_FETCH_MS;
ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
break;
+
+ case nir_tex_src_texture_offset:
+ texture_unit_offset = get_src(bld_base, instr->src[i].src);
+ break;
+ case nir_tex_src_sampler_offset:
+ break;
default:
assert(0);
break;
coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
}
- uint32_t base_index = 0;
- if (!texture_deref_instr) {
+ uint32_t samp_base_index = 0, tex_base_index = 0;
+ if (!sampler_deref_instr) {
int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
if (samp_src_index == -1) {
- base_index = instr->sampler_index;
+ samp_base_index = instr->sampler_index;
+ }
+ }
+ if (!texture_deref_instr) {
+ int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
+ if (tex_src_index == -1) {
+ tex_base_index = instr->texture_index;
}
}
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
params.sample_key = sample_key;
params.offsets = offsets;
- params.texture_index = base_index;
- params.sampler_index = base_index;
+ params.texture_index = tex_base_index;
+ params.texture_index_offset = texture_unit_offset;
+ params.sampler_index = samp_base_index;
params.coords = coords;
params.texel = texel;
params.lod = explicit_lod;
nir_remove_dead_derefs(nir);
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
- nir_foreach_variable(variable, &nir->outputs)
+ nir_foreach_shader_out_variable(variable, nir)
handle_shader_output_decl(bld_base, nir, variable);
bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
void lp_build_opt_nir(struct nir_shader *nir)
{
bool progress;
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_tg4_offsets = true,
+ };
+ NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
+ NIR_PASS_V(nir, nir_lower_frexp);
+
do {
progress = false;
NIR_PASS_V(nir, nir_opt_constant_folding);