return LLVMBuildOr(builder, div_mask, result, "");
}
+static LLVMValueRef
+do_quantize_to_f16(struct lp_build_nir_context *bld_base,
+ LLVMValueRef src)
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef result;
+ result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+ result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
+ return result;
+}
+
static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
{
case nir_op_fmax:
result = lp_build_max(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
break;
- case nir_op_fne32:
+ case nir_op_fneu32:
result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
break;
case nir_op_fneg:
case nir_op_fpow:
result = lp_build_pow(&bld_base->base, src[0], src[1]);
break;
+ case nir_op_fquantize2f16:
+ result = do_quantize_to_f16(bld_base, src[0]);
+ break;
case nir_op_frcp:
result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
src[0], src[1]);
break;
+ case nir_op_imod:
case nir_op_irem:
result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
break;
uint32_t const_offset = 0;
LLVMValueRef offset = NULL;
- if (var->data.compact) {
+ if (var->data.compact && nir_src_is_const(instr->arr.index)) {
assert(instr->deref_type == nir_deref_type_array);
const_offset = nir_src_as_uint(instr->arr.index);
goto out;
offset_is_uniform, idx, offset, result);
}
+static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
+ nir_intrinsic_instr *instr,
+ LLVMValueRef result[4])
+{
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+ LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
+ bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
+
+ bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+ offset_is_uniform, idx, offset, result);
+}
+
static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
nir_intrinsic_instr *instr,
case nir_intrinsic_load_ubo:
visit_load_ubo(bld_base, instr, result);
break;
+ case nir_intrinsic_load_push_constant:
+ visit_load_push_constant(bld_base, instr, result);
+ break;
case nir_intrinsic_load_ssbo:
visit_load_ssbo(bld_base, instr, result);
break;
coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
}
- uint32_t base_index = 0;
- if (!texture_deref_instr) {
+ uint32_t samp_base_index = 0, tex_base_index = 0;
+ if (!sampler_deref_instr) {
int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
if (samp_src_index == -1) {
- base_index = instr->sampler_index;
+ samp_base_index = instr->sampler_index;
+ }
+ }
+ if (!texture_deref_instr) {
+ int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
+ if (tex_src_index == -1) {
+ tex_base_index = instr->texture_index;
}
}
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
params.sample_key = sample_key;
params.offsets = offsets;
- params.texture_index = base_index;
+ params.texture_index = tex_base_index;
params.texture_index_offset = texture_unit_offset;
- params.sampler_index = base_index;
+ params.sampler_index = samp_base_index;
params.coords = coords;
params.texel = texel;
params.lod = explicit_lod;
nir_remove_dead_derefs(nir);
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
- nir_foreach_variable(variable, &nir->outputs)
+ nir_foreach_shader_out_variable(variable, nir)
handle_shader_output_decl(bld_base, nir, variable);
bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
void lp_build_opt_nir(struct nir_shader *nir)
{
bool progress;
+
+ static const struct nir_lower_tex_options lower_tex_options = {
+ .lower_tg4_offsets = true,
+ };
+ NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
+ NIR_PASS_V(nir, nir_lower_frexp);
+
do {
progress = false;
NIR_PASS_V(nir, nir_opt_constant_folding);