- LLVMValueRef result = NULL;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_ballot:
- result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
- if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
- result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
- break;
- case nir_intrinsic_read_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- break;
- case nir_intrinsic_read_first_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
- break;
- case nir_intrinsic_load_subgroup_invocation:
- result = ac_get_thread_id(&ctx->ac);
- break;
- case nir_intrinsic_load_work_group_id: {
- LLVMValueRef values[3];
-
- for (int i = 0; i < 3; i++) {
- values[i] = ctx->args->workgroup_ids[i].used ?
- ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) : ctx->ac.i32_0;
- }
-
- result = ac_build_gather_values(&ctx->ac, values, 3);
- break;
- }
- case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_first_vertex:
- result = ctx->abi->load_base_vertex(ctx->abi);
- break;
- case nir_intrinsic_load_local_group_size:
- result = ctx->abi->load_local_group_size(ctx->abi);
- break;
- case nir_intrinsic_load_vertex_id:
- result = LLVMBuildAdd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->vertex_id),
- ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
- break;
- case nir_intrinsic_load_vertex_id_zero_base: {
- result = ctx->abi->vertex_id;
- break;
- }
- case nir_intrinsic_load_local_invocation_id: {
- result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
- break;
- }
- case nir_intrinsic_load_base_instance:
- result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
- break;
- case nir_intrinsic_load_draw_id:
- result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
- break;
- case nir_intrinsic_load_view_index:
- result = ac_get_arg(&ctx->ac, ctx->args->view_index);
- break;
- case nir_intrinsic_load_invocation_id:
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids),
- 8, 5);
- } else {
- if (ctx->ac.chip_class >= GFX10) {
- result = LLVMBuildAnd(ctx->ac.builder,
- ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
- LLVMConstInt(ctx->ac.i32, 127, 0), "");
- } else {
- result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
- }
- }
- break;
- case nir_intrinsic_load_primitive_id:
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
- } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
- } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
- } else
- fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
- break;
- case nir_intrinsic_load_sample_id:
- result = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ancillary),
- 8, 4);
- break;
- case nir_intrinsic_load_sample_pos:
- result = load_sample_pos(ctx);
- break;
- case nir_intrinsic_load_sample_mask_in:
- result = ctx->abi->load_sample_mask_in(ctx->abi);
- break;
- case nir_intrinsic_load_frag_coord: {
- LLVMValueRef values[4] = {
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]),
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
- ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
- ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))
- };
- result = ac_to_integer(&ctx->ac,
- ac_build_gather_values(&ctx->ac, values, 4));
- break;
- }
- case nir_intrinsic_load_layer_id:
- result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
- break;
- case nir_intrinsic_load_front_face:
- result = ac_get_arg(&ctx->ac, ctx->args->front_face);
- break;
- case nir_intrinsic_load_helper_invocation:
- result = ac_build_load_helper_invocation(&ctx->ac);
- break;
- case nir_intrinsic_is_helper_invocation:
- result = ac_build_is_helper_invocation(&ctx->ac);
- break;
- case nir_intrinsic_load_color0:
- result = ctx->abi->color0;
- break;
- case nir_intrinsic_load_color1:
- result = ctx->abi->color1;
- break;
- case nir_intrinsic_load_user_data_amd:
- assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
- result = ctx->abi->user_data;
- break;
- case nir_intrinsic_load_instance_id:
- result = ctx->abi->instance_id;
- break;
- case nir_intrinsic_load_num_work_groups:
- result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
- break;
- case nir_intrinsic_load_local_invocation_index:
- result = visit_load_local_invocation_index(ctx);
- break;
- case nir_intrinsic_load_subgroup_id:
- result = visit_load_subgroup_id(ctx);
- break;
- case nir_intrinsic_load_num_subgroups:
- result = visit_load_num_subgroups(ctx);
- break;
- case nir_intrinsic_first_invocation:
- result = visit_first_invocation(ctx);
- break;
- case nir_intrinsic_load_push_constant:
- result = visit_load_push_constant(ctx, instr);
- break;
- case nir_intrinsic_vulkan_resource_index: {
- LLVMValueRef index = get_src(ctx, instr->src[0]);
- unsigned desc_set = nir_intrinsic_desc_set(instr);
- unsigned binding = nir_intrinsic_binding(instr);
-
- result = ctx->abi->load_resource(ctx->abi, index, desc_set,
- binding);
- break;
- }
- case nir_intrinsic_vulkan_resource_reindex:
- result = visit_vulkan_resource_reindex(ctx, instr);
- break;
- case nir_intrinsic_store_ssbo:
- visit_store_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ssbo:
- result = visit_load_buffer(ctx, instr);
- break;
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- result = visit_atomic_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ubo:
- result = visit_load_ubo_buffer(ctx, instr);
- break;
- case nir_intrinsic_get_buffer_size:
- result = visit_get_buffer_size(ctx, instr);
- break;
- case nir_intrinsic_load_deref:
- result = visit_load_var(ctx, instr);
- break;
- case nir_intrinsic_store_deref:
- visit_store_var(ctx, instr);
- break;
- case nir_intrinsic_load_input:
- case nir_intrinsic_load_input_vertex:
- case nir_intrinsic_load_per_vertex_input:
- result = visit_load(ctx, instr, false);
- break;
- case nir_intrinsic_load_output:
- case nir_intrinsic_load_per_vertex_output:
- result = visit_load(ctx, instr, true);
- break;
- case nir_intrinsic_store_output:
- case nir_intrinsic_store_per_vertex_output:
- visit_store_output(ctx, instr);
- break;
- case nir_intrinsic_load_shared:
- result = visit_load_shared(ctx, instr);
- break;
- case nir_intrinsic_store_shared:
- visit_store_shared(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_samples:
- case nir_intrinsic_image_deref_samples:
- result = visit_image_samples(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_load:
- result = visit_image_load(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_load:
- result = visit_image_load(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_store:
- visit_image_store(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_store:
- visit_image_store(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_atomic_add:
- case nir_intrinsic_bindless_image_atomic_imin:
- case nir_intrinsic_bindless_image_atomic_umin:
- case nir_intrinsic_bindless_image_atomic_imax:
- case nir_intrinsic_bindless_image_atomic_umax:
- case nir_intrinsic_bindless_image_atomic_and:
- case nir_intrinsic_bindless_image_atomic_or:
- case nir_intrinsic_bindless_image_atomic_xor:
- case nir_intrinsic_bindless_image_atomic_exchange:
- case nir_intrinsic_bindless_image_atomic_comp_swap:
- case nir_intrinsic_bindless_image_atomic_inc_wrap:
- case nir_intrinsic_bindless_image_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- case nir_intrinsic_image_deref_atomic_inc_wrap:
- case nir_intrinsic_image_deref_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_size:
- result = visit_image_size(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_size:
- result = visit_image_size(ctx, instr, false);
- break;
- case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac,
- nir_intrinsic_memory_scope(instr));
- break;
- case nir_intrinsic_discard:
- case nir_intrinsic_discard_if:
- emit_discard(ctx, instr);
- break;
- case nir_intrinsic_demote:
- case nir_intrinsic_demote_if:
- emit_demote(ctx, instr);
- break;
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- case nir_intrinsic_memory_barrier_shared:
- emit_membar(&ctx->ac, instr);
- break;
- case nir_intrinsic_scoped_barrier: {
- assert(!(nir_intrinsic_memory_semantics(instr) &
- (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
-
- nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
-
- unsigned wait_flags = 0;
- if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
- wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- if (modes & nir_var_mem_shared)
- wait_flags |= AC_WAIT_LGKM;
-
- if (wait_flags)
- ac_build_waitcnt(&ctx->ac, wait_flags);
-
- if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
- ac_emit_barrier(&ctx->ac, ctx->stage);
- break;
- }
- case nir_intrinsic_memory_barrier_tcs_patch:
- break;
- case nir_intrinsic_control_barrier:
- ac_emit_barrier(&ctx->ac, ctx->stage);
- break;
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap:
- case nir_intrinsic_shared_atomic_fadd: {
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
- instr->src[1].ssa->bit_size);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_deref_atomic_add:
- case nir_intrinsic_deref_atomic_imin:
- case nir_intrinsic_deref_atomic_umin:
- case nir_intrinsic_deref_atomic_imax:
- case nir_intrinsic_deref_atomic_umax:
- case nir_intrinsic_deref_atomic_and:
- case nir_intrinsic_deref_atomic_or:
- case nir_intrinsic_deref_atomic_xor:
- case nir_intrinsic_deref_atomic_exchange:
- case nir_intrinsic_deref_atomic_comp_swap:
- case nir_intrinsic_deref_atomic_fadd: {
- LLVMValueRef ptr = get_src(ctx, instr->src[0]);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_load_barycentric_pixel:
- result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_centroid:
- result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_sample:
- result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_model:
- result = barycentric_model(ctx);
- break;
- case nir_intrinsic_load_barycentric_at_offset: {
- LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
- result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
- break;
- }
- case nir_intrinsic_load_barycentric_at_sample: {
- LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
- result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
- break;
- }
- case nir_intrinsic_load_interpolated_input: {
- /* We assume any indirect loads have been lowered away */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
- assert(offset);
- assert(offset[0].i32 == 0);
-
- LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
- unsigned index = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- result = load_interpolated_input(ctx, interp_param, index,
- component,
- instr->dest.ssa.num_components,
- instr->dest.ssa.bit_size);
- break;
- }
- case nir_intrinsic_emit_vertex:
- ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
- break;
- case nir_intrinsic_emit_vertex_with_counter: {
- unsigned stream = nir_intrinsic_stream_id(instr);
- LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
- ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
- next_vertex,
- ctx->abi->outputs);
- break;
- }
- case nir_intrinsic_end_primitive:
- case nir_intrinsic_end_primitive_with_counter:
- ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
- break;
- case nir_intrinsic_load_tess_coord:
- result = ctx->abi->load_tess_coord(ctx->abi);
- break;
- case nir_intrinsic_load_tess_level_outer:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
- break;
- case nir_intrinsic_load_tess_level_inner:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
- break;
- case nir_intrinsic_load_tess_level_outer_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
- break;
- case nir_intrinsic_load_tess_level_inner_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
- break;
- case nir_intrinsic_load_patch_vertices_in:
- result = ctx->abi->load_patch_vertices_in(ctx->abi);
- break;
- case nir_intrinsic_vote_all: {
- LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_vote_any: {
- LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_shuffle:
- if (ctx->ac.chip_class == GFX8 ||
- ctx->ac.chip_class == GFX9 ||
- (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
- result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- } else {
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- LLVMValueRef index = get_src(ctx, instr->src[1]);
- LLVMTypeRef type = LLVMTypeOf(src);
- struct waterfall_context wctx;
- LLVMValueRef index_val;
-
- index_val = enter_waterfall(ctx, &wctx, index, true);
-
- src = LLVMBuildZExt(ctx->ac.builder, src,
- ctx->ac.i32, "");
-
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
- ctx->ac.i32,
- (LLVMValueRef []) { src, index_val }, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-
- result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
-
- result = exit_waterfall(ctx, &wctx, result);
- }
- break;
- case nir_intrinsic_reduce:
- result = ac_build_reduce(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0],
- instr->const_index[1]);
- break;
- case nir_intrinsic_inclusive_scan:
- result = ac_build_inclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_exclusive_scan:
- result = ac_build_exclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_quad_broadcast: {
- unsigned lane = nir_src_as_uint(instr->src[1]);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- lane, lane, lane, lane);
- break;
- }
- case nir_intrinsic_quad_swap_horizontal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
- break;
- case nir_intrinsic_quad_swap_vertical:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
- break;
- case nir_intrinsic_quad_swap_diagonal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
- break;
- case nir_intrinsic_quad_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- mask & 0x3, (mask >> 2) & 0x3,
- (mask >> 4) & 0x3, (mask >> 6) & 0x3);
- break;
- }
- case nir_intrinsic_masked_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
- break;
- }
- case nir_intrinsic_write_invocation_amd:
- result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]));
- break;
- case nir_intrinsic_mbcnt_amd:
- result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
- break;
- case nir_intrinsic_load_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- case nir_intrinsic_store_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(comp_type, addr_space), "");
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- unsigned wrmask = nir_intrinsic_write_mask(instr);
- while (wrmask) {
- int start, count;
- u_bit_scan_consecutive_range(&wrmask, &start, &count);
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
- LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
- LLVMTypeRef vec_type =
- count == 1 ? comp_type : LLVMVectorType(comp_type, count);
- offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
- offset_ptr,
- LLVMPointerType(vec_type, addr_space),
- "");
- LLVMValueRef offset_src =
- ac_extract_components(&ctx->ac, src, start, count);
- LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
- }
- break;
- }
- case nir_intrinsic_load_constant: {
- unsigned base = nir_intrinsic_base(instr);
- unsigned range = nir_intrinsic_range(instr);
-
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- offset = LLVMBuildAdd(ctx->ac.builder, offset,
- LLVMConstInt(ctx->ac.i32, base, false), "");
-
- /* Clamp the offset to avoid out-of-bound access because global
- * instructions can't handle them.
- */
- LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
- offset, size, "");
- offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
-
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- default:
- fprintf(stderr, "Unknown intrinsic: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- break;
- }
- if (result) {
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- }
+ LLVMValueRef result = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ballot:
+ result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+ if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
+ result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
+ break;
+ case nir_intrinsic_read_invocation:
+ result =
+ ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ break;
+ case nir_intrinsic_read_first_invocation:
+ result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
+ break;
+ case nir_intrinsic_load_subgroup_invocation:
+ result = ac_get_thread_id(&ctx->ac);
+ break;
+ case nir_intrinsic_load_work_group_id: {
+ LLVMValueRef values[3];
+
+ for (int i = 0; i < 3; i++) {
+ values[i] = ctx->args->workgroup_ids[i].used
+ ? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
+ : ctx->ac.i32_0;
+ }
+
+ result = ac_build_gather_values(&ctx->ac, values, 3);
+ break;
+ }
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
+ result = ctx->abi->load_base_vertex(ctx->abi);
+ break;
+ case nir_intrinsic_load_local_group_size:
+ result = ctx->abi->load_local_group_size(ctx->abi);
+ break;
+ case nir_intrinsic_load_vertex_id:
+ result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id),
+ ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base: {
+ result = ctx->abi->vertex_id;
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_id: {
+ result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
+ break;
+ }
+ case nir_intrinsic_load_base_instance:
+ result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
+ break;
+ case nir_intrinsic_load_draw_id:
+ result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
+ break;
+ case nir_intrinsic_load_view_index:
+ result = ac_get_arg(&ctx->ac, ctx->args->view_index);
+ break;
+ case nir_intrinsic_load_invocation_id:
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
+ } else {
+ if (ctx->ac.chip_class >= GFX10) {
+ result =
+ LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
+ LLVMConstInt(ctx->ac.i32, 127, 0), "");
+ } else {
+ result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
+ }
+ }
+ break;
+ case nir_intrinsic_load_primitive_id:
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
+ } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
+ } else
+ fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
+ break;
+ case nir_intrinsic_load_sample_id:
+ result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ result = load_sample_pos(ctx);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ result = ctx->abi->load_sample_mask_in(ctx->abi);
+ break;
+ case nir_intrinsic_load_frag_coord: {
+ LLVMValueRef values[4] = {
+ ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
+ ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
+ ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
+ result = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
+ break;
+ }
+ case nir_intrinsic_load_layer_id:
+ result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ break;
+ case nir_intrinsic_load_front_face:
+ result = ac_get_arg(&ctx->ac, ctx->args->front_face);
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ result = ac_build_load_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_is_helper_invocation:
+ result = ac_build_is_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_load_color0:
+ result = ctx->abi->color0;
+ break;
+ case nir_intrinsic_load_color1:
+ result = ctx->abi->color1;
+ break;
+ case nir_intrinsic_load_user_data_amd:
+ assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
+ result = ctx->abi->user_data;
+ break;
+ case nir_intrinsic_load_instance_id:
+ result = ctx->abi->instance_id;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
+ break;
+ case nir_intrinsic_load_local_invocation_index:
+ result = visit_load_local_invocation_index(ctx);
+ break;
+ case nir_intrinsic_load_subgroup_id:
+ result = visit_load_subgroup_id(ctx);
+ break;
+ case nir_intrinsic_load_num_subgroups:
+ result = visit_load_num_subgroups(ctx);
+ break;
+ case nir_intrinsic_first_invocation:
+ result = visit_first_invocation(ctx);
+ break;
+ case nir_intrinsic_load_push_constant:
+ result = visit_load_push_constant(ctx, instr);
+ break;
+ case nir_intrinsic_vulkan_resource_index: {
+ LLVMValueRef index = get_src(ctx, instr->src[0]);
+ unsigned desc_set = nir_intrinsic_desc_set(instr);
+ unsigned binding = nir_intrinsic_binding(instr);
+
+ result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
+ break;
+ }
+ case nir_intrinsic_vulkan_resource_reindex:
+ result = visit_vulkan_resource_reindex(ctx, instr);
+ break;
+ case nir_intrinsic_store_ssbo:
+ visit_store_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ssbo:
+ result = visit_load_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ result = visit_atomic_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ubo:
+ result = visit_load_ubo_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_get_buffer_size:
+ result = visit_get_buffer_size(ctx, instr);
+ break;
+ case nir_intrinsic_load_deref:
+ result = visit_load_var(ctx, instr);
+ break;
+ case nir_intrinsic_store_deref:
+ visit_store_var(ctx, instr);
+ break;
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_input_vertex:
+ case nir_intrinsic_load_per_vertex_input:
+ result = visit_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_per_vertex_output:
+ result = visit_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_store_output:
+ case nir_intrinsic_store_per_vertex_output:
+ visit_store_output(ctx, instr);
+ break;
+ case nir_intrinsic_load_shared:
+ result = visit_load_shared(ctx, instr);
+ break;
+ case nir_intrinsic_store_shared:
+ visit_store_shared(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_samples:
+ case nir_intrinsic_image_deref_samples:
+ result = visit_image_samples(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_load:
+ result = visit_image_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_load:
+ result = visit_image_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_store:
+ visit_image_store(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_store:
+ visit_image_store(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_size:
+ result = visit_image_size(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_size:
+ result = visit_image_size(ctx, instr, false);
+ break;
+ case nir_intrinsic_shader_clock:
+ result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
+ break;
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ emit_discard(ctx, instr);
+ break;
+ case nir_intrinsic_demote:
+ case nir_intrinsic_demote_if:
+ emit_demote(ctx, instr);
+ break;
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ emit_membar(&ctx->ac, instr);
+ break;
+ case nir_intrinsic_scoped_barrier: {
+ assert(!(nir_intrinsic_memory_semantics(instr) &
+ (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
+
+ nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
+
+ unsigned wait_flags = 0;
+ if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
+ wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ if (modes & nir_var_mem_shared)
+ wait_flags |= AC_WAIT_LGKM;
+
+ if (wait_flags)
+ ac_build_waitcnt(&ctx->ac, wait_flags);
+
+ if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ break;
+ case nir_intrinsic_control_barrier:
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap:
+ case nir_intrinsic_shared_atomic_fadd: {
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_deref_atomic_add:
+ case nir_intrinsic_deref_atomic_imin:
+ case nir_intrinsic_deref_atomic_umin:
+ case nir_intrinsic_deref_atomic_imax:
+ case nir_intrinsic_deref_atomic_umax:
+ case nir_intrinsic_deref_atomic_and:
+ case nir_intrinsic_deref_atomic_or:
+ case nir_intrinsic_deref_atomic_xor:
+ case nir_intrinsic_deref_atomic_exchange:
+ case nir_intrinsic_deref_atomic_comp_swap:
+ case nir_intrinsic_deref_atomic_fadd: {
+ LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_pixel:
+ result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_model:
+ result = barycentric_model(ctx);
+ break;
+ case nir_intrinsic_load_barycentric_at_offset: {
+ LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_sample: {
+ LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
+ result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
+ break;
+ }
+ case nir_intrinsic_load_interpolated_input: {
+ /* We assume any indirect loads have been lowered away */
+ ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
+ assert(offset);
+ assert(offset[0].i32 == 0);
+
+ LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ result = load_interpolated_input(ctx, interp_param, index, component,
+ instr->dest.ssa.num_components, instr->dest.ssa.bit_size);
+ break;
+ }
+ case nir_intrinsic_emit_vertex:
+ ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
+ break;
+ case nir_intrinsic_emit_vertex_with_counter: {
+ unsigned stream = nir_intrinsic_stream_id(instr);
+ LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
+ ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
+ break;
+ }
+ case nir_intrinsic_end_primitive:
+ case nir_intrinsic_end_primitive_with_counter:
+ ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
+ break;
+ case nir_intrinsic_load_tess_coord:
+ result = ctx->abi->load_tess_coord(ctx->abi);
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
+ break;
+ case nir_intrinsic_load_tess_level_outer_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
+ break;
+ case nir_intrinsic_load_tess_level_inner_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ result = ctx->abi->load_patch_vertices_in(ctx->abi);
+ break;
+ case nir_intrinsic_vote_all: {
+ LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_vote_any: {
+ LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_shuffle:
+ if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
+ (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
+ result =
+ ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
+ } else {
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
+ LLVMTypeRef type = LLVMTypeOf(src);
+ struct waterfall_context wctx;
+ LLVMValueRef index_val;
+
+ index_val = enter_waterfall(ctx, &wctx, index, true);
+
+ src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
+
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
+ (LLVMValueRef[]){src, index_val}, 2,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+
+ result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
+
+ result = exit_waterfall(ctx, &wctx, result);
+ }
+ break;
+ case nir_intrinsic_reduce:
+ result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
+ instr->const_index[1]);
+ break;
+ case nir_intrinsic_inclusive_scan:
+ result =
+ ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_exclusive_scan:
+ result =
+ ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
+ break;
+ case nir_intrinsic_quad_broadcast: {
+ unsigned lane = nir_src_as_uint(instr->src[1]);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
+ break;
+ }
+ case nir_intrinsic_quad_swap_horizontal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
+ break;
+ case nir_intrinsic_quad_swap_vertical:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
+ break;
+ case nir_intrinsic_quad_swap_diagonal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
+ break;
+ case nir_intrinsic_quad_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
+ (mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+ break;
+ }
+ case nir_intrinsic_masked_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
+ break;
+ }
+ case nir_intrinsic_write_invocation_amd:
+ result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
+ break;
+ case nir_intrinsic_mbcnt_amd:
+ result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
+ break;
+ case nir_intrinsic_load_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ ? comp_type
+ : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ case nir_intrinsic_store_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ unsigned wrmask = nir_intrinsic_write_mask(instr);
+ while (wrmask) {
+ int start, count;
+ u_bit_scan_consecutive_range(&wrmask, &start, &count);
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+ LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+ LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+ offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
+ LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+ }
+ break;
+ }
+ case nir_intrinsic_load_constant: {
+ unsigned base = nir_intrinsic_base(instr);
+ unsigned range = nir_intrinsic_range(instr);
+
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
+
+ /* Clamp the offset to avoid out-of-bound access because global
+ * instructions can't handle them.
+ */
+ LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
+
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
+ LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
+ ? comp_type
+ : LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown intrinsic: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ break;
+ }
+ if (result) {
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }