gallivm/nir: lower frexp/ldexp
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_nir.c
index 734e0098f14ffe5e2cfbf2d2ccb6cda7398a30b9..d4d35a1159413640c4cc713da2722bcdec7febfc 100644 (file)
@@ -441,6 +441,18 @@ do_int_mod(struct lp_build_nir_context *bld_base,
    return LLVMBuildOr(builder, div_mask, result, "");
 }
 
+static LLVMValueRef
+do_quantize_to_f16(struct lp_build_nir_context *bld_base,
+                   LLVMValueRef src)
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef result;
+   result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
+   result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
+   return result;
+}
+
 static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
                                   nir_op op, unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
 {
@@ -576,7 +588,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
    case nir_op_fmax:
       result = lp_build_max(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]);
       break;
-   case nir_op_fne32:
+   case nir_op_fneu32:
       result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
       break;
    case nir_op_fneg:
@@ -585,6 +597,9 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
    case nir_op_fpow:
       result = lp_build_pow(&bld_base->base, src[0], src[1]);
       break;
+   case nir_op_fquantize2f16:
+      result = do_quantize_to_f16(bld_base, src[0]);
+      break;
    case nir_op_frcp:
       result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
       break;
@@ -689,6 +704,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
       result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
                            src[0], src[1]);
       break;
+   case nir_op_imod:
    case nir_op_irem:
       result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
       break;
@@ -906,7 +922,7 @@ get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
    uint32_t const_offset = 0;
    LLVMValueRef offset = NULL;
 
-   if (var->data.compact) {
+   if (var->data.compact && nir_src_is_const(instr->arr.index)) {
       assert(instr->deref_type == nir_deref_type_array);
       const_offset = nir_src_as_uint(instr->arr.index);
       goto out;
@@ -1019,6 +1035,19 @@ static void visit_load_ubo(struct lp_build_nir_context *bld_base,
                       offset_is_uniform, idx, offset, result);
 }
 
+static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
+                                     nir_intrinsic_instr *instr,
+                                     LLVMValueRef result[4])
+{
+   struct gallivm_state *gallivm = bld_base->base.gallivm;
+   LLVMValueRef offset = get_src(bld_base, instr->src[0]);
+   LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
+   bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
+
+   bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
+                      offset_is_uniform, idx, offset, result);
+}
+
 
 static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
                            nir_intrinsic_instr *instr,
@@ -1077,6 +1106,10 @@ static void visit_load_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef coords[5];
    struct lp_img_params params;
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
@@ -1090,7 +1123,8 @@ static void visit_load_image(struct lp_build_nir_context *bld_base,
    params.img_op = LP_IMG_LOAD;
    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
       params.ms_index = get_src(bld_base, instr->src[2]);
-   params.image_index = var->data.binding;
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
    bld_base->image_op(bld_base, &params);
 }
 
@@ -1106,6 +1140,10 @@ static void visit_store_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef coords[5];
    struct lp_img_params params;
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
    params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
@@ -1122,7 +1160,8 @@ static void visit_store_image(struct lp_build_nir_context *bld_base,
    if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
       params.ms_index = get_src(bld_base, instr->src[2]);
    params.img_op = LP_IMG_STORE;
-   params.image_index = var->data.binding;
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
 
    if (params.target == PIPE_TEXTURE_1D_ARRAY)
       coords[2] = coords[1];
@@ -1142,6 +1181,10 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
    LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
    LLVMValueRef coords[5];
    const struct glsl_type *type = glsl_without_array(var->type);
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
 
    memset(&params, 0, sizeof(params));
 
@@ -1194,7 +1237,8 @@ static void visit_atomic_image(struct lp_build_nir_context *bld_base,
 
    params.outdata = result;
    params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
-   params.image_index = var->data.binding;
+   params.image_index = var->data.binding + (indir_index ? 0 : const_index);
+   params.image_index_offset = indir_index;
 
    bld_base->image_op(bld_base, &params);
 }
@@ -1207,8 +1251,14 @@ static void visit_image_size(struct lp_build_nir_context *bld_base,
    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
    nir_variable *var = nir_deref_instr_get_variable(deref);
    struct lp_sampler_size_query_params params = { 0 };
-   params.texture_unit = var->data.binding;
-   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type));
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   const struct glsl_type *type = glsl_without_array(var->type);
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
+   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+   params.texture_unit_offset = indir_index;
+   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
    params.sizes_out = result;
 
    bld_base->image_size(bld_base, &params);
@@ -1221,8 +1271,15 @@ static void visit_image_samples(struct lp_build_nir_context *bld_base,
    nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
    nir_variable *var = nir_deref_instr_get_variable(deref);
    struct lp_sampler_size_query_params params = { 0 };
-   params.texture_unit = var->data.binding;
-   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(var->type), glsl_sampler_type_is_array(var->type));
+   unsigned const_index;
+   LLVMValueRef indir_index;
+   const struct glsl_type *type = glsl_without_array(var->type);
+   get_deref_offset(bld_base, deref, false, NULL, NULL,
+                    &const_index, &indir_index);
+
+   params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
+   params.texture_unit_offset = indir_index;
+   params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
    params.sizes_out = result;
    params.samples_only = true;
 
@@ -1368,6 +1425,9 @@ static void visit_intrinsic(struct lp_build_nir_context *bld_base,
    case nir_intrinsic_load_ubo:
       visit_load_ubo(bld_base, instr, result);
       break;
+   case nir_intrinsic_load_push_constant:
+      visit_load_push_constant(bld_base, instr, result);
+      break;
    case nir_intrinsic_load_ssbo:
       visit_load_ssbo(bld_base, instr, result);
       break;
@@ -1730,11 +1790,17 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
          coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
    }
 
-   uint32_t base_index = 0;
-   if (!texture_deref_instr) {
+   uint32_t samp_base_index = 0, tex_base_index = 0;
+   if (!sampler_deref_instr) {
       int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
       if (samp_src_index == -1) {
-         base_index = instr->sampler_index;
+         samp_base_index = instr->sampler_index;
+      }
+   }
+   if (!texture_deref_instr) {
+      int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
+      if (tex_src_index == -1) {
+         tex_base_index = instr->texture_index;
       }
    }
 
@@ -1753,9 +1819,9 @@ static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *inst
    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
    params.sample_key = sample_key;
    params.offsets = offsets;
-   params.texture_index = base_index;
+   params.texture_index = tex_base_index;
    params.texture_index_offset = texture_unit_offset;
-   params.sampler_index = base_index;
+   params.sampler_index = samp_base_index;
    params.coords = coords;
    params.texel = texel;
    params.lod = explicit_lod;
@@ -1935,7 +2001,7 @@ bool lp_build_nir_llvm(
    nir_remove_dead_derefs(nir);
    nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
 
-   nir_foreach_variable(variable, &nir->outputs)
+   nir_foreach_shader_out_variable(variable, nir)
       handle_shader_output_decl(bld_base, nir, variable);
 
    bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
@@ -1965,6 +2031,13 @@ bool lp_build_nir_llvm(
 void lp_build_opt_nir(struct nir_shader *nir)
 {
    bool progress;
+
+   static const struct nir_lower_tex_options lower_tex_options = {
+      .lower_tg4_offsets = true,
+   };
+   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
+   NIR_PASS_V(nir, nir_lower_frexp);
+
    do {
       progress = false;
       NIR_PASS_V(nir, nir_opt_constant_folding);