X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_nir_soa.c;h=b83acd0b58fda00a4ca07fef64ed5c917194307d;hb=b31e8460a6bef37063bb9dfb55e4df3298cd533f;hp=7ac9697011694834bfbe09745d42c061b13cc43c;hpb=44a6b0107b37ad9644d3435cf6d2d29b6779654f;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index 7ac96970116..b83acd0b58f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -69,8 +69,13 @@ emit_fetch_64bit( assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); for (i = 0; i < bld_base->base.type.length * 2; i+=2) { +#if UTIL_ARCH_LITTLE_ENDIAN shuffles[i] = lp_build_const_int32(gallivm, i / 2); shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); +#else + shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); + shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2); +#endif } res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); @@ -78,40 +83,54 @@ emit_fetch_64bit( } static void -emit_store_64bit_chan(struct lp_build_nir_context *bld_base, - LLVMValueRef chan_ptr, - LLVMValueRef chan_ptr2, - LLVMValueRef value) +emit_store_64bit_split(struct lp_build_nir_context *bld_base, + LLVMValueRef value, + LLVMValueRef split_values[2]) { - struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; - struct lp_build_context *float_bld = &bld_base->base; unsigned i; - LLVMValueRef temp, temp2; LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; int len = bld_base->base.type.length * 2; value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), ""); for (i = 0; i < bld_base->base.type.length; i++) { +#if UTIL_ARCH_LITTLE_ENDIAN shuffles[i] = lp_build_const_int32(gallivm, i * 2); shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); +#else + shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1); + shuffles2[i] = lp_build_const_int32(gallivm, i * 2); +#endif } - temp = LLVMBuildShuffleVector(builder, value, + split_values[0] = LLVMBuildShuffleVector(builder, value, LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(shuffles, bld_base->base.type.length), ""); - temp2 = LLVMBuildShuffleVector(builder, value, + split_values[1] = LLVMBuildShuffleVector(builder, value, LLVMGetUndef(LLVMTypeOf(value)), LLVMConstVector(shuffles2, bld_base->base.type.length), ""); +} - lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); - lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); +static void +emit_store_64bit_chan(struct lp_build_nir_context *bld_base, + LLVMValueRef chan_ptr, + LLVMValueRef chan_ptr2, + LLVMValueRef value) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct lp_build_context *float_bld = &bld_base->base; + LLVMValueRef split_vals[2]; + + emit_store_64bit_split(bld_base, value, split_vals); + + lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr); + lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2); } static LLVMValueRef @@ -280,22 +299,35 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, unsigned bit_size, nir_variable *var, unsigned vertex_index, + LLVMValueRef indir_vertex_index, unsigned const_index, LLVMValueRef indir_index, - LLVMValueRef result[4]) + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; int dmul = bit_size == 64 ? 2 : 1; + unsigned location = var->data.driver_location; + unsigned location_frac = var->data.location_frac; + + if (!var->data.compact && !indir_index) + location += const_index; + else if (var->data.compact) { + location += const_index / 4; + location_frac += const_index % 4; + const_index = 0; + } switch (deref_mode) { - case nir_var_shader_in: { + case nir_var_shader_in: for (unsigned i = 0; i < num_components; i++) { - int idx = (i * dmul) + var->data.location_frac; + int idx = (i * dmul) + location_frac; + if (bld->gs_iface) { LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index); - LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, const_index + var->data.driver_location); + LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, location); LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx); LLVMValueRef result2; + result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base, false, vertex_index_val, 0, attrib_index_val, swizzle_index_val); if (bit_size == 64) { @@ -304,6 +336,71 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, false, vertex_index_val, 0, attrib_index_val, swizzle_index_val); result[i] = emit_fetch_64bit(bld_base, result[i], result2); } + } else if (bld->tes_iface) { + LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index); + LLVMValueRef attrib_index_val; + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx); + LLVMValueRef result2; + + if (indir_index) { + if (var->data.compact) { + swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx)); + attrib_index_val = lp_build_const_int32(gallivm, location); + } else + attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location)); + } else + attrib_index_val = lp_build_const_int32(gallivm, location); + + if (var->data.patch) { + result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base, + indir_index ? true : false, attrib_index_val, swizzle_index_val); + if (bit_size == 64) { + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1); + result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base, + indir_index ? true : false, attrib_index_val, swizzle_index_val); + result[i] = emit_fetch_64bit(bld_base, result[i], result2); + } + } + else { + result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base, + indir_vertex_index ? true : false, + indir_vertex_index ? indir_vertex_index : vertex_index_val, + (indir_index && !var->data.compact) ? true : false, attrib_index_val, + (indir_index && var->data.compact) ? true : false, swizzle_index_val); + if (bit_size == 64) { + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1); + result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base, + indir_vertex_index ? true : false, + indir_vertex_index ? indir_vertex_index : vertex_index_val, + indir_index ? true : false, attrib_index_val, false, swizzle_index_val); + result[i] = emit_fetch_64bit(bld_base, result[i], result2); + } + } + } else if (bld->tcs_iface) { + LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index); + LLVMValueRef attrib_index_val; + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx); + + if (indir_index) { + if (var->data.compact) { + swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx)); + attrib_index_val = lp_build_const_int32(gallivm, location); + } else + attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location)); + } else + attrib_index_val = lp_build_const_int32(gallivm, location); + result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base, + indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val, + (indir_index && !var->data.compact) ? true : false, attrib_index_val, + (indir_index && var->data.compact) ? true : false, swizzle_index_val); + if (bit_size == 64) { + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1); + LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base, + indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val, + indir_index ? true : false, attrib_index_val, + false, swizzle_index_val); + result[i] = emit_fetch_64bit(bld_base, result[i], result2); + } } else { if (indir_index) { LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location)); @@ -325,12 +422,12 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, } else { if (bld->indirects & nir_var_shader_in) { LLVMValueRef lindex = lp_build_const_int32(gallivm, - var->data.driver_location * 4 + idx); + location * 4 + idx); LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder, bld->inputs_array, lindex); if (bit_size == 64) { LLVMValueRef lindex2 = lp_build_const_int32(gallivm, - var->data.driver_location * 4 + (idx + 1)); + location * 4 + (idx + 1)); LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder, bld->inputs_array, lindex2); result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2); @@ -340,17 +437,49 @@ static void emit_load_var(struct lp_build_nir_context *bld_base, } else { if (bit_size == 64) { LLVMValueRef tmp[2]; - tmp[0] = bld->inputs[var->data.driver_location + const_index][idx]; - tmp[1] = bld->inputs[var->data.driver_location + const_index][idx + 1]; + tmp[0] = bld->inputs[location][idx]; + tmp[1] = bld->inputs[location][idx + 1]; result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]); } else { - result[i] = bld->inputs[var->data.driver_location + const_index][idx]; + result[i] = bld->inputs[location][idx]; } } } } } - } + break; + case nir_var_shader_out: + if (bld->fs_iface && bld->fs_iface->fb_fetch) { + bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result); + return; + } + for (unsigned i = 0; i < num_components; i++) { + int idx = (i * dmul) + location_frac; + if (bld->tcs_iface) { + LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index); + LLVMValueRef attrib_index_val; + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx); + + if (indir_index) + attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location)); + else + attrib_index_val = lp_build_const_int32(gallivm, location); + + result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base, + indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val, + (indir_index && !var->data.compact) ? true : false, attrib_index_val, + (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0); + if (bit_size == 64) { + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1); + LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base, + indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val, + indir_index ? true : false, attrib_index_val, + false, swizzle_index_val, 0); + result[i] = emit_fetch_64bit(bld_base, result[i], result2); + } + } + } + break; default: break; } @@ -383,19 +512,88 @@ static void emit_store_chan(struct lp_build_nir_context *bld_base, } } +static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base, + bool is_compact, + unsigned bit_size, + unsigned location, + unsigned const_index, + LLVMValueRef indir_vertex_index, + LLVMValueRef indir_index, + unsigned comp, + unsigned chan, + LLVMValueRef chan_val) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + unsigned swizzle = chan; + if (bit_size == 64) { + swizzle *= 2; + swizzle += comp; + if (swizzle >= 4) { + swizzle -= 4; + location++; + } + } else + swizzle += comp; + LLVMValueRef attrib_index_val; + LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle); + + if (indir_index) { + if (is_compact) { + swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle)); + attrib_index_val = lp_build_const_int32(gallivm, const_index + location); + } else + attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location)); + } else + attrib_index_val = lp_build_const_int32(gallivm, const_index + location); + if (bit_size == 64) { + LLVMValueRef split_vals[2]; + LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1); + emit_store_64bit_split(bld_base, chan_val, split_vals); + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val, + split_vals[0], mask_vec(bld_base)); + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index ? true : false, + attrib_index_val, + false, swizzle_index_val2, + split_vals[1], mask_vec(bld_base)); + } else { + chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, ""); + bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0, + indir_vertex_index ? true : false, + indir_vertex_index, + indir_index && !is_compact ? true : false, + attrib_index_val, + indir_index && is_compact ? true : false, + swizzle_index_val, + chan_val, mask_vec(bld_base)); + } +} + static void emit_store_var(struct lp_build_nir_context *bld_base, nir_variable_mode deref_mode, - unsigned bit_size, unsigned num_components, + unsigned bit_size, + nir_variable *var, unsigned writemask, + LLVMValueRef indir_vertex_index, unsigned const_index, - nir_variable *var, LLVMValueRef dst) + LLVMValueRef indir_index, + LLVMValueRef dst) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; switch (deref_mode) { case nir_var_shader_out: { - unsigned location = var->data.driver_location + const_index; + unsigned location = var->data.driver_location; unsigned comp = var->data.location_frac; if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { if (var->data.location == FRAG_RESULT_STENCIL) @@ -403,10 +601,20 @@ static void emit_store_var(struct lp_build_nir_context *bld_base, else if (var->data.location == FRAG_RESULT_DEPTH) comp = 2; } + + if (var->data.compact) { + location += const_index / 4; + comp += const_index % 4; + const_index = 0; + } + for (unsigned chan = 0; chan < num_components; chan++) { if (writemask & (1u << chan)) { LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, ""); - emit_store_chan(bld_base, deref_mode, bit_size, location, comp, chan, chan_val); + if (bld->tcs_iface) { + emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val); + } else + emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val); } } break; @@ -425,7 +633,7 @@ static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base, struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMBuilderRef builder = gallivm->builder; int nc = reg->reg->num_components; - LLVMValueRef vals[4]; + LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL }; struct lp_build_context *uint_bld = &bld_base->uint_bld; if (reg->reg->num_array_elems) { LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset); @@ -455,7 +663,7 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, unsigned writemask, LLVMValueRef indir_src, LLVMValueRef reg_storage, - LLVMValueRef dst[4]) + LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -488,13 +696,248 @@ static void emit_store_reg(struct lp_build_nir_context *bld_base, } } +static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + unsigned offset_bit_size, + bool offset_is_uniform, + LLVMValueRef offset, + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size); + LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr; + unsigned size_shift = 0; + struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size); + if (bit_size == 16) + size_shift = 1; + else if (bit_size == 32) + size_shift = 2; + else if (bit_size == 64) + size_shift = 3; + if (size_shift) + offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift)); + + LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0); + kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, ""); + + if (offset_is_uniform) { + offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), ""); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), ""); + + LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset); + result[c] = lp_build_broadcast_scalar(bld_broad, scalar); + } + } +} + +static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size) +{ + LLVMBuilderRef builder = gallivm->builder; + switch (bit_size) { + case 8: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), ""); + break; + case 16: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), ""); + break; + case 32: + default: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + break; + case 64: + addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), ""); + break; + } + return addr_ptr; +} + +static void emit_load_global(struct lp_build_nir_context *bld_base, + unsigned nc, + unsigned bit_size, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + struct lp_build_context *res_bld; + + res_bld = get_int_bld(bld_base, true, bit_size); + + for (unsigned c = 0; c < nc; c++) { + LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, ""); + + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); + + LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c)); + + LLVMValueRef temp_res; + temp_res = LLVMBuildLoad(builder, result, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, result); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + outval[c] = LLVMBuildLoad(builder, result, ""); + } +} + +static void emit_store_global(struct lp_build_nir_context *bld_base, + unsigned writemask, + unsigned nc, unsigned bit_size, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef dst) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + + for (unsigned c = 0; c < nc; c++) { + if (!(writemask & (1u << c))) + continue; + LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, ""); + + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size); + switch (bit_size) { + case 32: + value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), ""); + break; + case 64: + value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), ""); + break; + default: + break; + } + struct lp_build_if_state ifthen; + + LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + } +} + +static void emit_atomic_global(struct lp_build_nir_context *bld_base, + nir_intrinsic_op nir_op, + unsigned addr_bit_size, + LLVMValueRef addr, + LLVMValueRef val, LLVMValueRef val2, + LLVMValueRef *result) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + + LLVMValueRef atom_res = lp_build_alloca(gallivm, + uint_bld->vec_type, ""); + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val, + loop_state.counter, ""); + + LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr, + loop_state.counter, ""); + addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32); + struct lp_build_if_state ifthen; + LLVMValueRef cond, temp_res; + LLVMValueRef scalar; + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + if (nir_op == nir_intrinsic_global_atomic_comp_swap) { + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2, + loop_state.counter, ""); + cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, ""); + scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr, + cas_src_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); + } else { + LLVMAtomicRMWBinOp op; + switch (nir_op) { + case nir_intrinsic_global_atomic_add: + op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_global_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; + break; + case nir_intrinsic_global_atomic_and: + op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_global_atomic_or: + op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_global_atomic_xor: + op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_global_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_global_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_global_atomic_imin: + op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_global_atomic_imax: + op = LLVMAtomicRMWBinOpMax; + break; + default: + unreachable("unknown atomic op"); + } + + scalar = LLVMBuildAtomicRMW(builder, op, + addr_ptr, value_ptr, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_else(&ifthen); + temp_res = LLVMBuildLoad(builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, ""); + LLVMBuildStore(builder, temp_res, atom_res); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length), + NULL, LLVMIntUGE); + *result = LLVMBuildLoad(builder, atom_res, ""); +} + static void emit_load_ubo(struct lp_build_nir_context *bld_base, unsigned nc, unsigned bit_size, bool offset_is_uniform, LLVMValueRef index, LLVMValueRef offset, - LLVMValueRef result[4]) + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -527,13 +970,11 @@ static void emit_load_ubo(struct lp_build_nir_context *bld_base, LLVMValueRef overflow_mask; LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index); - num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 4), ""); num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); for (unsigned c = 0; c < nc; c++) { LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c)); overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, this_offset, num_consts); - result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL); } } @@ -545,7 +986,7 @@ static void emit_load_mem(struct lp_build_nir_context *bld_base, unsigned bit_size, LLVMValueRef index, LLVMValueRef offset, - LLVMValueRef outval[4]) + LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS]) { struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; @@ -688,7 +1129,6 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; LLVMValueRef ssbo_ptr; struct lp_build_context *uint_bld = &bld_base->uint_bld; - LLVMAtomicRMWBinOp op; LLVMValueRef ssbo_limit = NULL; if (index) { @@ -699,47 +1139,6 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, } else ssbo_ptr = bld->shared_ptr; - switch (nir_op) { - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_ssbo_atomic_add: - op = LLVMAtomicRMWBinOpAdd; - break; - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_ssbo_atomic_exchange: - op = LLVMAtomicRMWBinOpXchg; - break; - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_ssbo_atomic_and: - op = LLVMAtomicRMWBinOpAnd; - break; - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_ssbo_atomic_or: - op = LLVMAtomicRMWBinOpOr; - break; - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_ssbo_atomic_xor: - op = LLVMAtomicRMWBinOpXor; - break; - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_ssbo_atomic_umin: - op = LLVMAtomicRMWBinOpUMin; - break; - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_ssbo_atomic_umax: - op = LLVMAtomicRMWBinOpUMax; - break; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_shared_atomic_imin: - op = LLVMAtomicRMWBinOpMin; - break; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_shared_atomic_imax: - op = LLVMAtomicRMWBinOpMax; - break; - default: - break; - } - offset = lp_build_shr_imm(uint_bld, offset, 2); LLVMValueRef atom_res = lp_build_alloca(gallivm, uint_bld->vec_type, ""); @@ -781,6 +1180,48 @@ static void emit_atomic_mem(struct lp_build_nir_context *bld_base, false); scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, ""); } else { + LLVMAtomicRMWBinOp op; + + switch (nir_op) { + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_ssbo_atomic_add: + op = LLVMAtomicRMWBinOpAdd; + break; + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_ssbo_atomic_exchange: + op = LLVMAtomicRMWBinOpXchg; + break; + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_ssbo_atomic_and: + op = LLVMAtomicRMWBinOpAnd; + break; + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_ssbo_atomic_or: + op = LLVMAtomicRMWBinOpOr; + break; + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_ssbo_atomic_xor: + op = LLVMAtomicRMWBinOpXor; + break; + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_ssbo_atomic_umin: + op = LLVMAtomicRMWBinOpUMin; + break; + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_ssbo_atomic_umax: + op = LLVMAtomicRMWBinOpUMax; + break; + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_shared_atomic_imin: + op = LLVMAtomicRMWBinOpMin; + break; + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_shared_atomic_imax: + op = LLVMAtomicRMWBinOpMax; + break; + default: + unreachable("unknown atomic op"); + } scalar = LLVMBuildAtomicRMW(builder, op, scalar_ptr, value_ptr, LLVMAtomicOrderingSequentiallyConsistent, @@ -814,10 +1255,12 @@ static void emit_barrier(struct lp_build_nir_context *bld_base) static LLVMValueRef emit_get_buffer_size(struct lp_build_nir_context *bld_base, LLVMValueRef index) { + struct gallivm_state *gallivm = bld_base->base.gallivm; struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; struct lp_build_context *bld_broad = &bld_base->uint_bld; - LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, bld_broad->zero, "")); + LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr, + LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), "")); return lp_build_broadcast_scalar(bld_broad, size_ptr); } @@ -825,10 +1268,17 @@ static void emit_image_op(struct lp_build_nir_context *bld_base, struct lp_img_params *params) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; + params->type = bld_base->base.type; params->context_ptr = bld->context_ptr; params->thread_data_ptr = bld->thread_data_ptr; params->exec_mask = mask_vec(bld_base); + + if (params->image_index_offset) + params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset, + lp_build_const_int32(gallivm, 0), ""); + bld->image->emit_op(bld->image, bld->bld_base.base.gallivm, params); @@ -839,10 +1289,14 @@ static void emit_image_size(struct lp_build_nir_context *bld_base, struct lp_sampler_size_query_params *params) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; params->int_type = bld_base->int_bld.type; params->context_ptr = bld->context_ptr; + if (params->texture_unit_offset) + params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset, + lp_build_const_int32(gallivm, 0), ""); bld->image->emit_size_query(bld->image, bld->bld_base.base.gallivm, params); @@ -855,6 +1309,8 @@ static void init_var_slots(struct lp_build_nir_context *bld_base, struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; unsigned slots = glsl_count_attribute_slots(var->type, false) * 4; + if (!bld->outputs) + return; for (unsigned comp = sc; comp < slots + sc; comp++) { unsigned this_loc = var->data.driver_location + (comp / 4); unsigned this_chan = comp % 4; @@ -889,11 +1345,64 @@ static void emit_tex(struct lp_build_nir_context *bld_base, struct lp_sampler_params *params) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + struct gallivm_state *gallivm = bld_base->base.gallivm; params->type = bld_base->base.type; params->context_ptr = bld->context_ptr; params->thread_data_ptr = bld->thread_data_ptr; + if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) { + /* this is horrible but this can be dynamic */ + LLVMValueRef coords[5]; + LLVMValueRef *orig_texel_ptr; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type), + LLVMGetUndef(bld_base->base.vec_type), + LLVMGetUndef(bld_base->base.vec_type), + LLVMGetUndef(bld_base->base.vec_type) }; + LLVMValueRef texel[4], orig_offset; + unsigned i; + orig_texel_ptr = params->texel; + + for (i = 0; i < 5; i++) { + coords[i] = params->coords[i]; + } + orig_offset = params->texture_index_offset; + + for (unsigned v = 0; v < uint_bld->type.length; v++) { + LLVMValueRef idx = lp_build_const_int32(gallivm, v); + LLVMValueRef new_coords[5]; + for (i = 0; i < 5; i++) { + new_coords[i] = LLVMBuildExtractElement(gallivm->builder, + coords[i], idx, ""); + } + params->coords = new_coords; + params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, + orig_offset, + idx, ""); + params->type = lp_elem_type(bld_base->base.type); + + params->texel = texel; + bld->sampler->emit_tex_sample(bld->sampler, + gallivm, + params); + + for (i = 0; i < 4; i++) { + result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, ""); + } + } + for (i = 0; i < 4; i++) { + orig_texel_ptr[i] = result[i]; + } + return; + } + + if (params->texture_index_offset) + params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder, + params->texture_index_offset, + lp_build_const_int32(bld_base->base.gallivm, 0), ""); + + params->type = bld_base->base.type; bld->sampler->emit_tex_sample(bld->sampler, bld->bld_base.base.gallivm, params); @@ -907,6 +1416,10 @@ static void emit_tex_size(struct lp_build_nir_context *bld_base, params->int_type = bld_base->int_bld.type; params->context_ptr = bld->context_ptr; + if (params->texture_unit_offset) + params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder, + params->texture_unit_offset, + lp_build_const_int32(bld_base->base.gallivm, 0), ""); bld->sampler->emit_size_query(bld->sampler, bld->bld_base.base.gallivm, params); @@ -914,7 +1427,7 @@ static void emit_tex_size(struct lp_build_nir_context *bld_base, static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, nir_intrinsic_instr *instr, - LLVMValueRef result[4]) + LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; @@ -922,6 +1435,12 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, case nir_intrinsic_load_instance_id: result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); break; + case nir_intrinsic_load_base_instance: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance); + break; + case nir_intrinsic_load_base_vertex: + result[0] = bld->system_values.basevertex; + break; case nir_intrinsic_load_vertex_id: result[0] = bld->system_values.vertex_id; break; @@ -941,13 +1460,67 @@ static void emit_sysval_intrin(struct lp_build_nir_context *bld_base, result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "")); break; case nir_intrinsic_load_invocation_id: - result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); + if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL) + result[0] = bld->system_values.invocation_id; + else + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); + break; + case nir_intrinsic_load_front_face: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing); + break; + case nir_intrinsic_load_draw_id: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id); break; default: break; + case nir_intrinsic_load_local_group_size: + for (unsigned i = 0; i < 3; i++) + result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), "")); + break; + case nir_intrinsic_load_work_dim: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim); + break; + case nir_intrinsic_load_tess_coord: + for (unsigned i = 0; i < 3; i++) { + result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, ""); + } + break; + case nir_intrinsic_load_tess_level_outer: + for (unsigned i = 0; i < 4; i++) + result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, "")); + break; + case nir_intrinsic_load_tess_level_inner: + for (unsigned i = 0; i < 2; i++) + result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, "")); + break; + case nir_intrinsic_load_patch_vertices_in: + result[0] = bld->system_values.vertices_in; + break; + case nir_intrinsic_load_sample_id: + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id); + break; + case nir_intrinsic_load_sample_pos: + for (unsigned i = 0; i < 2; i++) { + LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), ""); + idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), ""); + LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx); + result[i] = lp_build_broadcast_scalar(&bld_base->base, val); + } + break; + case nir_intrinsic_load_sample_mask_in: + result[0] = bld->system_values.sample_mask_in; + break; } } +static void emit_helper_invocation(struct lp_build_nir_context *bld_base, + LLVMValueRef *dst) +{ + struct gallivm_state *gallivm = bld_base->base.gallivm; + struct lp_build_context *uint_bld = &bld_base->uint_bld; + *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1)); +} + static void bgnloop(struct lp_build_nir_context *bld_base) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; @@ -1063,37 +1636,42 @@ static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_i struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + if (stream_id >= bld->gs_vertex_streams) + return; assert(bld->gs_iface->emit_vertex); LLVMValueRef total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); + LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef mask = mask_vec(bld_base); mask = clamp_mask_to_max_output_vertices(bld, mask, total_emitted_vertices_vec); bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base, bld->outputs, total_emitted_vertices_vec, + mask, lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id)); - increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, + increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id], mask); - increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, + increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id], mask); } static void end_primitive_masked(struct lp_build_nir_context * bld_base, - LLVMValueRef mask) + LLVMValueRef mask, uint32_t stream_id) { struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + if (stream_id >= bld->gs_vertex_streams) + return; struct lp_build_context *uint_bld = &bld_base->uint_bld; LLVMValueRef emitted_vertices_vec = - LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); + LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef emitted_prims_vec = - LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); + LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], ""); LLVMValueRef total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); + LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], ""); LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, @@ -1101,29 +1679,29 @@ end_primitive_masked(struct lp_build_nir_context * bld_base, uint_bld->zero); mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base, - total_emitted_vertices_vec, - emitted_vertices_vec, emitted_prims_vec, mask_vec(bld_base)); - increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, + total_emitted_vertices_vec, + emitted_vertices_vec, emitted_prims_vec, mask, stream_id); + increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id], mask); - clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, + clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id], mask); } static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id) { - struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; assert(bld->gs_iface->end_primitive); LLVMValueRef mask = mask_vec(bld_base); - end_primitive_masked(bld_base, mask); + end_primitive_masked(bld_base, mask, stream_id); } static void emit_prologue(struct lp_build_nir_soa_context *bld) { struct gallivm_state * gallivm = bld->bld_base.base.gallivm; - if (bld->indirects & nir_var_shader_in && !bld->gs_iface) { + if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) { uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read); unsigned index, chan; LLVMTypeRef vec_type = bld->bld_base.base.vec_type; @@ -1147,6 +1725,82 @@ emit_prologue(struct lp_build_nir_soa_context *bld) } } +static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4]) +{ + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + + LLVMValueRef exec_mask = mask_vec(bld_base); + struct lp_build_loop_state loop_state; + + LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, ""); + + LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, ""); + LLVMValueRef init_val = NULL; + if (instr->intrinsic == nir_intrinsic_vote_ieq) { + /* for equal we unfortunately have to loop and find the first valid one. */ + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); + + struct lp_build_if_state ifthen; + lp_build_if(&ifthen, gallivm, if_cond); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src, + loop_state.counter, ""); + LLVMBuildStore(builder, value_ptr, res_store); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), + NULL, LLVMIntUGE); + init_val = LLVMBuildLoad(builder, res_store, ""); + } else { + LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store); + } + + LLVMValueRef res; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src, + loop_state.counter, ""); + struct lp_build_if_state ifthen; + LLVMValueRef if_cond; + if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, ""); + + lp_build_if(&ifthen, gallivm, if_cond); + res = LLVMBuildLoad(builder, res_store, ""); + + if (instr->intrinsic == nir_intrinsic_vote_ieq) { + LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, ""); + tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, ""); + res = LLVMBuildOr(builder, res, tmp, ""); + } else if (instr->intrinsic == nir_intrinsic_vote_any) + res = LLVMBuildOr(builder, res, value_ptr, ""); + else + res = LLVMBuildAnd(builder, res, value_ptr, ""); + LLVMBuildStore(builder, res, res_store); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length), + NULL, LLVMIntUGE); + result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, "")); +} + +static void +emit_interp_at(struct lp_build_nir_context *bld_base, + unsigned num_components, + nir_variable *var, + bool centroid, + bool sample, + unsigned const_index, + LLVMValueRef indir_index, + LLVMValueRef offsets[2], + LLVMValueRef dst[4]) +{ + struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base; + + for (unsigned i = 0; i < num_components; i++) { + dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base, + const_index + var->data.driver_location, i + var->data.location_frac, + centroid, sample, indir_index, offsets); + } +} + void lp_build_nir_soa(struct gallivm_state *gallivm, struct nir_shader *shader, const struct lp_build_tgsi_params *params, @@ -1187,12 +1841,40 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, int64_type.width *= 2; lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); } + { + struct lp_type uint16_type; + uint16_type = lp_uint_type(type); + uint16_type.width /= 2; + lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type); + } + { + struct lp_type int16_type; + int16_type = lp_int_type(type); + int16_type.width /= 2; + lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type); + } + { + struct lp_type uint8_type; + uint8_type = lp_uint_type(type); + uint8_type.width /= 4; + lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type); + } + { + struct lp_type int8_type; + int8_type = lp_int_type(type); + int8_type.width /= 4; + lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type); + } bld.bld_base.load_var = emit_load_var; bld.bld_base.store_var = emit_store_var; bld.bld_base.load_reg = emit_load_reg; bld.bld_base.store_reg = emit_store_reg; bld.bld_base.emit_var_decl = emit_var_decl; bld.bld_base.load_ubo = emit_load_ubo; + bld.bld_base.load_kernel_arg = emit_load_kernel_arg; + bld.bld_base.load_global = emit_load_global; + bld.bld_base.store_global = emit_store_global; + bld.bld_base.atomic_global = emit_atomic_global; bld.bld_base.tex = emit_tex; bld.bld_base.tex_size = emit_tex_size; bld.bld_base.bgnloop = bgnloop; @@ -1213,6 +1895,9 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.bld_base.barrier = emit_barrier; bld.bld_base.image_op = emit_image_op; bld.bld_base.image_size = emit_image_size; + bld.bld_base.vote = emit_vote; + bld.bld_base.helper_invocation = emit_helper_invocation; + bld.bld_base.interp_at = emit_interp_at; bld.mask = params->mask; bld.inputs = params->inputs; @@ -1229,23 +1914,29 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, bld.image = params->image; bld.shared_ptr = params->shared_ptr; bld.coro = params->coro; - + bld.kernel_args_ptr = params->kernel_args; bld.indirects = 0; if (params->info->indirect_files & (1 << TGSI_FILE_INPUT)) bld.indirects |= nir_var_shader_in; bld.gs_iface = params->gs_iface; + bld.tcs_iface = params->tcs_iface; + bld.tes_iface = params->tes_iface; + bld.fs_iface = params->fs_iface; if (bld.gs_iface) { struct lp_build_context *uint_bld = &bld.bld_base.uint_bld; + bld.gs_vertex_streams = params->gs_vertex_streams; bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, shader->info.gs.vertices_out); - bld.emitted_prims_vec_ptr = - lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr"); - bld.emitted_vertices_vec_ptr = - lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr"); - bld.total_emitted_vertices_vec_ptr = - lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr"); + for (int i = 0; i < params->gs_vertex_streams; i++) { + bld.emitted_prims_vec_ptr[i] = + lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr"); + bld.emitted_vertices_vec_ptr[i] = + lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr"); + bld.total_emitted_vertices_vec_ptr[i] = + lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr"); + } } lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); @@ -1260,15 +1951,19 @@ void lp_build_nir_soa(struct gallivm_state *gallivm, LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder; LLVMValueRef total_emitted_vertices_vec; LLVMValueRef emitted_prims_vec; - end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask)); - total_emitted_vertices_vec = - LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr, ""); - emitted_prims_vec = - LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr, ""); - - bld.gs_iface->gs_epilogue(bld.gs_iface, - total_emitted_vertices_vec, - emitted_prims_vec); + + for (int i = 0; i < params->gs_vertex_streams; i++) { + end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i); + + total_emitted_vertices_vec = + LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], ""); + + emitted_prims_vec = + LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], ""); + bld.gs_iface->gs_epilogue(bld.gs_iface, + total_emitted_vertices_vec, + emitted_prims_vec, i); + } } lp_exec_mask_fini(&bld.exec_mask); }