+
+static void
+lp_build_do_atomic_soa(struct gallivm_state *gallivm,
+ const struct util_format_description *format_desc,
+ struct lp_type type,
+ LLVMValueRef exec_mask,
+ LLVMValueRef base_ptr,
+ LLVMValueRef offset,
+ LLVMValueRef out_of_bounds,
+ unsigned img_op,
+ LLVMAtomicRMWBinOp op,
+ const LLVMValueRef rgba_in[4],
+ const LLVMValueRef rgba2_in[4],
+ LLVMValueRef atomic_result[4])
+{
+ enum pipe_format format = format_desc->format;
+
+ if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT) {
+ atomic_result[0] = lp_build_zero(gallivm, type);
+ return;
+ }
+
+ LLVMValueRef atom_res = lp_build_alloca(gallivm,
+ LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), "");
+
+ offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
+ struct lp_build_loop_state loop_state;
+ lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
+ struct lp_build_if_state ifthen;
+ LLVMValueRef cond;
+ LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
+
+ LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
+ assert(exec_mask);
+
+ cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
+ cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
+ lp_build_if(&ifthen, gallivm, cond);
+
+ LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
+ LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
+ cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+ data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), "");
+
+ if (img_op == LP_IMG_ATOMIC_CAS) {
+ LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, "");
+ LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), "");
+ data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
+ cas_src,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
+ } else {
+ data = LLVMBuildAtomicRMW(gallivm->builder, op,
+ cast_base_ptr, data,
+ LLVMAtomicOrderingSequentiallyConsistent,
+ false);
+ }
+
+ LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, "");
+ temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, "");
+ LLVMBuildStore(gallivm->builder, temp_res, atom_res);
+
+ lp_build_endif(&ifthen);
+ lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
+ NULL, LLVMIntUGE);
+ atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, "");
+}
+
+static void
+lp_build_img_op_no_format(struct gallivm_state *gallivm,
+ const struct lp_img_params *params,
+ LLVMValueRef outdata[4])
+{
+ /*
+ * If there's nothing bound, format is NONE, and we must return
+ * all zero as mandated by d3d10 in this case.
+ */
+ if (params->img_op != LP_IMG_STORE) {
+ LLVMValueRef zero = lp_build_zero(gallivm, params->type);
+ for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1); chan++) {
+ outdata[chan] = zero;
+ }
+ }
+}
+
+void
+lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
+ struct lp_sampler_dynamic_state *dynamic_state,
+ struct gallivm_state *gallivm,
+ const struct lp_img_params *params,
+ LLVMValueRef outdata[4])
+{
+ unsigned target = params->target;
+ unsigned dims = texture_dims(target);
+ /** regular scalar int type */
+ struct lp_type int_type, int_coord_type;
+ struct lp_build_context int_bld, int_coord_bld;
+ const struct util_format_description *format_desc = util_format_description(static_texture_state->format);
+ LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2];
+ LLVMValueRef ms_index = params->ms_index;
+ LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
+ int_type = lp_type_int(32);
+ int_coord_type = lp_int_type(params->type);
+ lp_build_context_init(&int_bld, gallivm, int_type);
+ lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
+
+ if (static_texture_state->format == PIPE_FORMAT_NONE) {
+ lp_build_img_op_no_format(gallivm, params, outdata);
+ return;
+ }
+ LLVMValueRef offset, i, j;
+
+ LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ LLVMValueRef num_samples = NULL, sample_stride = NULL;
+ if (ms_index) {
+ num_samples = dynamic_state->num_samples(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ sample_stride = dynamic_state->sample_stride(dynamic_state, gallivm,
+ params->context_ptr, params->image_index, NULL);
+ }
+
+ boolean layer_coord = has_layer_coord(target);
+
+ width = lp_build_broadcast_scalar(&int_coord_bld, width);
+ if (dims >= 2) {
+ height = lp_build_broadcast_scalar(&int_coord_bld, height);
+ row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
+ }
+ if (dims >= 3 || layer_coord) {
+ depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
+ img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
+ }
+
+ LLVMValueRef out_of_bounds = int_coord_bld.zero;
+ LLVMValueRef out1;
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+ if (dims >= 2) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+ }
+ if (dims >= 3) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+ }
+ lp_build_sample_offset(&int_coord_bld,
+ format_desc,
+ x, y, z, row_stride_vec, img_stride_vec,
+ &offset, &i, &j);
+
+ if (ms_index) {
+ out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, lp_build_broadcast_scalar(&int_coord_bld, num_samples));
+ out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
+
+ offset = lp_build_add(&int_coord_bld, offset,
+ lp_build_mul(&int_coord_bld, lp_build_broadcast_scalar(&int_coord_bld, sample_stride),
+ ms_index));
+ }
+ if (params->img_op == LP_IMG_LOAD) {
+ struct lp_type texel_type = params->type;
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
+ format_desc->channel[0].pure_integer) {
+ if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length);
+ } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length);
+ }
+ }
+
+ offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
+ struct lp_build_context texel_bld;
+ lp_build_context_init(&texel_bld, gallivm, texel_type);
+ lp_build_fetch_rgba_soa(gallivm,
+ format_desc,
+ texel_type, TRUE,
+ base_ptr, offset,
+ i, j,
+ NULL,
+ outdata);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
+ texel_bld.zero, outdata[chan]);
+ }
+ } else if (params->img_op == LP_IMG_STORE) {
+ lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+ params->indata);
+ } else {
+ lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
+ params->img_op, params->op, params->indata, params->indata2, outdata);
+ }
+}
+
+/*
+ * These functions are for indirect texture access suppoort.
+ *
+ * Indirect textures are implemented using a switch statement, that
+ * takes the texture index and jumps to the sampler functions for
+ * that texture unit.
+ */
+
+/*
+ * Initialise an indexed sampler switch block.
+ *
+ * This sets up the switch_info state and adds the LLVM flow control pieces.
+ */
+void
+lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
+ struct gallivm_state *gallivm,
+ const struct lp_sampler_params *params,
+ LLVMValueRef idx,
+ unsigned base, unsigned range)
+{
+ switch_info->gallivm = gallivm;
+ switch_info->params = *params;
+ switch_info->base = base;
+ switch_info->range = range;
+
+ /* for generating the switch functions we don't want the texture index offset */
+ switch_info->params.texture_index_offset = 0;
+
+ LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
+ switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
+
+ switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
+ switch_info->merge_ref, range - base);
+
+ LLVMTypeRef val_type[4];
+ val_type[0] = val_type[1] = val_type[2] = val_type[3] =
+ lp_build_vec_type(gallivm, params->type);
+ LLVMTypeRef ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
+
+ LLVMValueRef undef_val = LLVMGetUndef(ret_type);
+
+ LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
+
+ switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
+ LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
+}
+
+/*
+ * Add an individual entry to the indirect texture switch.
+ *
+ * This builds the sample function and links a case for it into the switch statement.
+ */
+void
+lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
+ int idx,
+ const struct lp_static_texture_state *static_texture_state,
+ const struct lp_static_sampler_state *static_sampler_state,
+ struct lp_sampler_dynamic_state *dynamic_texture_state)
+{
+ struct gallivm_state *gallivm = switch_info->gallivm;
+ LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
+ LLVMValueRef tex_ret;
+
+ LLVMAddCase(switch_info->switch_ref, LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0), this_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
+
+ lp_build_sample_soa_func(gallivm, static_texture_state,
+ static_sampler_state, dynamic_texture_state, &switch_info->params, idx, idx,
+ &tex_ret);
+
+ LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
+ LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
+}
+
+/*
+ * Finish a switch statement.
+ *
+ * This handles extract the results from the switch.
+ */
+void lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
+{
+ struct gallivm_state *gallivm = switch_info->gallivm;
+
+ LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
+ for (unsigned i = 0; i < 4; i++)
+ switch_info->params.texel[i] = LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
+}