LOCAL_SRC_FILES := \
$(AMD_COMMON_FILES) \
- $(AMD_COMPILER_FILES) \
- $(AMD_DEBUG_FILES) \
- $(AMD_NIR_FILES)
+ $(AMD_COMMON_LLVM_FILES) \
+ $(AMD_DEBUG_FILES)
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
$(MESA_TOP)/include \
$(MESA_TOP)/src \
$(MESA_TOP)/src/amd/common \
+ $(MESA_TOP)/src/amd/llvm \
$(MESA_TOP)/src/compiler \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
$(MESA_TOP)/src/gallium/include \
addrlib/src/r800/siaddrlib.cpp \
addrlib/src/r800/siaddrlib.h
-AMD_COMPILER_FILES = \
+AMD_COMMON_FILES = \
common/ac_binary.c \
common/ac_binary.h \
common/ac_exp_param.h \
- common/ac_llvm_build.c \
- common/ac_llvm_build.h \
- common/ac_llvm_cull.c \
- common/ac_llvm_cull.h \
- common/ac_llvm_helper.cpp \
- common/ac_llvm_util.c \
- common/ac_llvm_util.h \
+ common/ac_gpu_info.c \
+ common/ac_gpu_info.h \
+ common/ac_surface.c \
+ common/ac_surface.h \
common/ac_rtld.c \
common/ac_rtld.h \
- common/ac_shader_abi.h \
common/ac_shader_util.c \
common/ac_shader_util.h
-
-AMD_NIR_FILES = \
- common/ac_nir_to_llvm.c \
- common/ac_nir_to_llvm.h
-
-AMD_COMMON_FILES = \
- common/ac_gpu_info.c \
- common/ac_gpu_info.h \
- common/ac_surface.c \
- common/ac_surface.h
+AMD_COMMON_LLVM_FILES = \
+ llvm/ac_llvm_build.c \
+ llvm/ac_llvm_build.h \
+ llvm/ac_llvm_cull.c \
+ llvm/ac_llvm_cull.h \
+ llvm/ac_llvm_helper.cpp \
+ llvm/ac_llvm_util.c \
+ llvm/ac_llvm_util.h \
+ llvm/ac_shader_abi.h \
+ llvm/ac_nir_to_llvm.c \
+ llvm/ac_nir_to_llvm.h
AMD_DEBUG_FILES = \
common/ac_debug.c \
+++ /dev/null
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
-#include "ac_llvm_build.h"
-
-#include <llvm-c/Core.h>
-#include <llvm/Config/llvm-config.h>
-
-#include "c11/threads.h"
-
-#include <assert.h>
-#include <stdio.h>
-
-#include "ac_llvm_util.h"
-#include "ac_shader_util.h"
-#include "ac_exp_param.h"
-#include "util/bitscan.h"
-#include "util/macros.h"
-#include "util/u_atomic.h"
-#include "util/u_math.h"
-#include "sid.h"
-
-#include "shader_enums.h"
-
-#define AC_LLVM_INITIAL_CF_DEPTH 4
-
-/* Data for if/else/endif and bgnloop/endloop control flow structures.
- */
-struct ac_llvm_flow {
- /* Loop exit or next part of if/else/endif. */
- LLVMBasicBlockRef next_block;
- LLVMBasicBlockRef loop_entry_block;
-};
-
-/* Initialize module-independent parts of the context.
- *
- * The caller is responsible for initializing ctx::module and ctx::builder.
- */
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
- struct ac_llvm_compiler *compiler,
- enum chip_class chip_class, enum radeon_family family,
- enum ac_float_mode float_mode, unsigned wave_size,
- unsigned ballot_mask_bits)
-{
- LLVMValueRef args[1];
-
- ctx->context = LLVMContextCreate();
-
- ctx->chip_class = chip_class;
- ctx->family = family;
- ctx->wave_size = wave_size;
- ctx->ballot_mask_bits = ballot_mask_bits;
- ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
- : compiler->tm,
- ctx->context);
- ctx->builder = ac_create_builder(ctx->context, float_mode);
-
- ctx->voidt = LLVMVoidTypeInContext(ctx->context);
- ctx->i1 = LLVMInt1TypeInContext(ctx->context);
- ctx->i8 = LLVMInt8TypeInContext(ctx->context);
- ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
- ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
- ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
- ctx->intptr = ctx->i32;
- ctx->f16 = LLVMHalfTypeInContext(ctx->context);
- ctx->f32 = LLVMFloatTypeInContext(ctx->context);
- ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
- ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
- ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
- ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
- ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
- ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
- ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
- ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
- ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
- ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
- ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
-
- ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
- ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
- ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
- ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
- ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
- ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
- ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
- ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
- ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
- ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
- ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
- ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
- ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
- ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
-
- ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
- ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
-
- ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "range", 5);
-
- ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "invariant.load", 14);
-
- ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
-
- args[0] = LLVMConstReal(ctx->f32, 2.5);
- ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
-
- ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
- "amdgpu.uniform", 14);
-
- ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
- ctx->flow = calloc(1, sizeof(*ctx->flow));
-}
-
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx)
-{
- free(ctx->flow->stack);
- free(ctx->flow);
- ctx->flow = NULL;
-}
-
-int
-ac_get_llvm_num_components(LLVMValueRef value)
-{
- LLVMTypeRef type = LLVMTypeOf(value);
- unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
- ? LLVMGetVectorSize(type)
- : 1;
- return num_components;
-}
-
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
- LLVMValueRef value,
- int index)
-{
- if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
- assert(index == 0);
- return value;
- }
-
- return LLVMBuildExtractElement(ac->builder, value,
- LLVMConstInt(ac->i32, index, false), "");
-}
-
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
-{
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
- type = LLVMGetElementType(type);
-
- if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
- return LLVMGetIntTypeWidth(type);
-
- if (type == ctx->f16)
- return 16;
- if (type == ctx->f32)
- return 32;
- if (type == ctx->f64)
- return 64;
-
- unreachable("Unhandled type kind in get_elem_bits");
-}
-
-unsigned
-ac_get_type_size(LLVMTypeRef type)
-{
- LLVMTypeKind kind = LLVMGetTypeKind(type);
-
- switch (kind) {
- case LLVMIntegerTypeKind:
- return LLVMGetIntTypeWidth(type) / 8;
- case LLVMHalfTypeKind:
- return 2;
- case LLVMFloatTypeKind:
- return 4;
- case LLVMDoubleTypeKind:
- return 8;
- case LLVMPointerTypeKind:
- if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
- return 4;
- return 8;
- case LLVMVectorTypeKind:
- return LLVMGetVectorSize(type) *
- ac_get_type_size(LLVMGetElementType(type));
- case LLVMArrayTypeKind:
- return LLVMGetArrayLength(type) *
- ac_get_type_size(LLVMGetElementType(type));
- default:
- assert(0);
- return 0;
- }
-}
-
-static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (t == ctx->i8)
- return ctx->i8;
- else if (t == ctx->f16 || t == ctx->i16)
- return ctx->i16;
- else if (t == ctx->f32 || t == ctx->i32)
- return ctx->i32;
- else if (t == ctx->f64 || t == ctx->i64)
- return ctx->i64;
- else
- unreachable("Unhandled integer size");
-}
-
-LLVMTypeRef
-ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
- switch (LLVMGetPointerAddressSpace(t)) {
- case AC_ADDR_SPACE_GLOBAL:
- return ctx->i64;
- case AC_ADDR_SPACE_LDS:
- return ctx->i32;
- default:
- unreachable("unhandled address space");
- }
- }
- return to_integer_type_scalar(ctx, t);
-}
-
-LLVMValueRef
-ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
- return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
- }
- return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
-}
-
-LLVMValueRef
-ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
- return v;
- return ac_to_integer(ctx, v);
-}
-
-static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (t == ctx->i8)
- return ctx->i8;
- else if (t == ctx->i16 || t == ctx->f16)
- return ctx->f16;
- else if (t == ctx->i32 || t == ctx->f32)
- return ctx->f32;
- else if (t == ctx->i64 || t == ctx->f64)
- return ctx->f64;
- else
- unreachable("Unhandled float size");
-}
-
-LLVMTypeRef
-ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
-{
- if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
- LLVMTypeRef elem_type = LLVMGetElementType(t);
- return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
- LLVMGetVectorSize(t));
- }
- return to_float_type_scalar(ctx, t);
-}
-
-LLVMValueRef
-ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
-{
- LLVMTypeRef type = LLVMTypeOf(v);
- return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
-}
-
-
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
- LLVMTypeRef return_type, LLVMValueRef *params,
- unsigned param_count, unsigned attrib_mask)
-{
- LLVMValueRef function, call;
- bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
-
- function = LLVMGetNamedFunction(ctx->module, name);
- if (!function) {
- LLVMTypeRef param_types[32], function_type;
- unsigned i;
-
- assert(param_count <= 32);
-
- for (i = 0; i < param_count; ++i) {
- assert(params[i]);
- param_types[i] = LLVMTypeOf(params[i]);
- }
- function_type =
- LLVMFunctionType(return_type, param_types, param_count, 0);
- function = LLVMAddFunction(ctx->module, name, function_type);
-
- LLVMSetFunctionCallConv(function, LLVMCCallConv);
- LLVMSetLinkage(function, LLVMExternalLinkage);
-
- if (!set_callsite_attrs)
- ac_add_func_attributes(ctx->context, function, attrib_mask);
- }
-
- call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
- if (set_callsite_attrs)
- ac_add_func_attributes(ctx->context, call, attrib_mask);
- return call;
-}
-
-/**
- * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
- * intrinsic names).
- */
-void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
-{
- LLVMTypeRef elem_type = type;
-
- assert(bufsize >= 8);
-
- if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
- int ret = snprintf(buf, bufsize, "v%u",
- LLVMGetVectorSize(type));
- if (ret < 0) {
- char *type_name = LLVMPrintTypeToString(type);
- fprintf(stderr, "Error building type name for: %s\n",
- type_name);
- LLVMDisposeMessage(type_name);
- return;
- }
- elem_type = LLVMGetElementType(type);
- buf += ret;
- bufsize -= ret;
- }
- switch (LLVMGetTypeKind(elem_type)) {
- default: break;
- case LLVMIntegerTypeKind:
- snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
- break;
- case LLVMHalfTypeKind:
- snprintf(buf, bufsize, "f16");
- break;
- case LLVMFloatTypeKind:
- snprintf(buf, bufsize, "f32");
- break;
- case LLVMDoubleTypeKind:
- snprintf(buf, bufsize, "f64");
- break;
- }
-}
-
-/**
- * Helper function that builds an LLVM IR PHI node and immediately adds
- * incoming edges.
- */
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
- unsigned count_incoming, LLVMValueRef *values,
- LLVMBasicBlockRef *blocks)
-{
- LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
- LLVMAddIncoming(phi, values, blocks, count_incoming);
- return phi;
-}
-
-void ac_build_s_barrier(struct ac_llvm_context *ctx)
-{
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
- 0, AC_FUNC_ATTR_CONVERGENT);
-}
-
-/* Prevent optimizations (at least of memory accesses) across the current
- * point in the program by emitting empty inline assembly that is marked as
- * having side effects.
- *
- * Optionally, a value can be passed through the inline assembly to prevent
- * LLVM from hoisting calls to ReadNone functions.
- */
-void
-ac_build_optimization_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef *pvgpr)
-{
- static int counter = 0;
-
- LLVMBuilderRef builder = ctx->builder;
- char code[16];
-
- snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
-
- if (!pvgpr) {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
- LLVMBuildCall(builder, inlineasm, NULL, 0, "");
- } else {
- LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
- LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
- LLVMValueRef vgpr = *pvgpr;
- LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
- unsigned vgpr_size = ac_get_type_size(vgpr_type);
- LLVMValueRef vgpr0;
-
- assert(vgpr_size % 4 == 0);
-
- vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
- vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
- vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
- vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
- vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
-
- *pvgpr = vgpr;
- }
-}
-
-LLVMValueRef
-ac_build_shader_clock(struct ac_llvm_context *ctx)
-{
- const char *intr = LLVM_VERSION_MAJOR >= 9 && ctx->chip_class >= GFX8 ?
- "llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter";
- LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0);
- return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
-}
-
-LLVMValueRef
-ac_build_ballot(struct ac_llvm_context *ctx,
- LLVMValueRef value)
-{
- const char *name;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- if (ctx->wave_size == 64)
- name = "llvm.amdgcn.icmp.i64.i32";
- else
- name = "llvm.amdgcn.icmp.i32.i32";
- } else {
- name = "llvm.amdgcn.icmp.i32";
- }
- LLVMValueRef args[3] = {
- value,
- ctx->i32_0,
- LLVMConstInt(ctx->i32, LLVMIntNE, 0)
- };
-
- /* We currently have no other way to prevent LLVM from lifting the icmp
- * calls to a dominating basic block.
- */
- ac_build_optimization_barrier(ctx, &args[0]);
-
- args[0] = ac_to_integer(ctx, args[0]);
-
- return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
- LLVMValueRef value)
-{
- const char *name = LLVM_VERSION_MAJOR >= 9 ? "llvm.amdgcn.icmp.i64.i1" : "llvm.amdgcn.icmp.i1";
- LLVMValueRef args[3] = {
- value,
- ctx->i1false,
- LLVMConstInt(ctx->i32, LLVMIntNE, 0),
- };
-
- return ac_build_intrinsic(ctx, name, ctx->i64, args, 3,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
- LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
- return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
-}
-
-LLVMValueRef
-ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
- return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
- LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
-}
-
-LLVMValueRef
-ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
- LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
- LLVMValueRef vote_set = ac_build_ballot(ctx, value);
-
- LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- vote_set, active_set, "");
- LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- vote_set,
- LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
- return LLVMBuildOr(ctx->builder, all, none, "");
-}
-
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
- unsigned value_count, unsigned component)
-{
- LLVMValueRef vec = NULL;
-
- if (value_count == 1) {
- return values[component];
- } else if (!value_count)
- unreachable("value_count is 0");
-
- for (unsigned i = component; i < value_count + component; i++) {
- LLVMValueRef value = values[i];
-
- if (i == component)
- vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
- LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
- vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
- }
- return vec;
-}
-
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- bool load,
- bool always_vector)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef vec = NULL;
- unsigned i;
-
- if (value_count == 1 && !always_vector) {
- if (load)
- return LLVMBuildLoad(builder, values[0], "");
- return values[0];
- } else if (!value_count)
- unreachable("value_count is 0");
-
- for (i = 0; i < value_count; i++) {
- LLVMValueRef value = values[i * value_stride];
- if (load)
- value = LLVMBuildLoad(builder, value, "");
-
- if (!i)
- vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
- LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
- vec = LLVMBuildInsertElement(builder, vec, value, index, "");
- }
- return vec;
-}
-
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count)
-{
- return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
-}
-
-/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
- * channels with undef. Extract at most src_channels components from the input.
- */
-static LLVMValueRef
-ac_build_expand(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned src_channels,
- unsigned dst_channels)
-{
- LLVMTypeRef elemtype;
- LLVMValueRef chan[dst_channels];
-
- if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
- unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
-
- if (src_channels == dst_channels && vec_size == dst_channels)
- return value;
-
- src_channels = MIN2(src_channels, vec_size);
-
- for (unsigned i = 0; i < src_channels; i++)
- chan[i] = ac_llvm_extract_elem(ctx, value, i);
-
- elemtype = LLVMGetElementType(LLVMTypeOf(value));
- } else {
- if (src_channels) {
- assert(src_channels == 1);
- chan[0] = value;
- }
- elemtype = LLVMTypeOf(value);
- }
-
- for (unsigned i = src_channels; i < dst_channels; i++)
- chan[i] = LLVMGetUndef(elemtype);
-
- return ac_build_gather_values(ctx, chan, dst_channels);
-}
-
-/* Extract components [start, start + channels) from a vector.
- */
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned start,
- unsigned channels)
-{
- LLVMValueRef chan[channels];
-
- for (unsigned i = 0; i < channels; i++)
- chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
-
- return ac_build_gather_values(ctx, chan, channels);
-}
-
-/* Expand a scalar or vector to <4 x type> by filling the remaining channels
- * with undef. Extract at most num_channels components from the input.
- */
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned num_channels)
-{
- return ac_build_expand(ctx, value, num_channels, 4);
-}
-
-LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
- unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
- const char *name;
-
- if (type_size == 2)
- name = "llvm.rint.f16";
- else if (type_size == 4)
- name = "llvm.rint.f32";
- else
- name = "llvm.rint.f64";
-
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef den)
-{
- /* If we do (num / den), LLVM >= 7.0 does:
- * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
- *
- * If we do (num * (1 / den)), LLVM does:
- * return num * v_rcp_f32(den);
- */
- LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
- LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
- LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
-
- /* Use v_rcp_f32 instead of precise division. */
- if (!LLVMIsConstant(ret))
- LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
- return ret;
-}
-
-/* See fast_idiv_by_const.h. */
-/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- num = LLVMBuildLShr(builder, num, pre_shift, "");
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildAdd(builder, num,
- LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* See fast_idiv_by_const.h. */
-/* If num != UINT_MAX, this more efficient version can be used. */
-/* Set: increment = util_fast_udiv_info::increment; */
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- num = LLVMBuildLShr(builder, num, pre_shift, "");
- num = LLVMBuildNUWAdd(builder, num, increment, "");
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* See fast_idiv_by_const.h. */
-/* Both operands must fit in 31 bits and the divisor must not be 1. */
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef post_shift)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- num = LLVMBuildMul(builder,
- LLVMBuildZExt(builder, num, ctx->i64, ""),
- LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
- num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
- num = LLVMBuildTrunc(builder, num, ctx->i32, "");
- return LLVMBuildLShr(builder, num, post_shift, "");
-}
-
-/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
- * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
- * already multiplied by two. id is the cube face number.
- */
-struct cube_selection_coords {
- LLVMValueRef stc[2];
- LLVMValueRef ma;
- LLVMValueRef id;
-};
-
-static void
-build_cube_intrinsic(struct ac_llvm_context *ctx,
- LLVMValueRef in[3],
- struct cube_selection_coords *out)
-{
- LLVMTypeRef f32 = ctx->f32;
-
- out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
- out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
- f32, in, 3, AC_FUNC_ATTR_READNONE);
-}
-
-/**
- * Build a manual selection sequence for cube face sc/tc coordinates and
- * major axis vector (multiplied by 2 for consistency) for the given
- * vec3 \p coords, for the face implied by \p selcoords.
- *
- * For the major axis, we always adjust the sign to be in the direction of
- * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
- * the selcoords major axis.
- */
-static void build_cube_select(struct ac_llvm_context *ctx,
- const struct cube_selection_coords *selcoords,
- const LLVMValueRef *coords,
- LLVMValueRef *out_st,
- LLVMValueRef *out_ma)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
- LLVMValueRef is_ma_positive;
- LLVMValueRef sgn_ma;
- LLVMValueRef is_ma_z, is_not_ma_z;
- LLVMValueRef is_ma_y;
- LLVMValueRef is_ma_x;
- LLVMValueRef sgn;
- LLVMValueRef tmp;
-
- is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
- selcoords->ma, LLVMConstReal(f32, 0.0), "");
- sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
- LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
-
- is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
- is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
- is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
- LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
- is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
-
- /* Select sc */
- tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
- LLVMBuildSelect(builder, is_ma_z, sgn_ma,
- LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
- out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select tc */
- tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
- sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
- LLVMConstReal(f32, -1.0), "");
- out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
-
- /* Select ma */
- tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
- LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
- tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
- ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
- *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
-}
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
- bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg,
- LLVMValueRef *derivs_arg)
-{
-
- LLVMBuilderRef builder = ctx->builder;
- struct cube_selection_coords selcoords;
- LLVMValueRef coords[3];
- LLVMValueRef invma;
-
- if (is_array && !is_lod) {
- LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
-
- /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
- *
- * "For Array forms, the array layer used will be
- *
- * max(0, min(d−1, floor(layer+0.5)))
- *
- * where d is the depth of the texture array and layer
- * comes from the component indicated in the tables below.
- * Workaroudn for an issue where the layer is taken from a
- * helper invocation which happens to fall on a different
- * layer due to extrapolation."
- *
- * GFX8 and earlier attempt to implement this in hardware by
- * clamping the value of coords[2] = (8 * layer) + face.
- * Unfortunately, this means that the we end up with the wrong
- * face when clamping occurs.
- *
- * Clamp the layer earlier to work around the issue.
- */
- if (ctx->chip_class <= GFX8) {
- LLVMValueRef ge0;
- ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
- tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
- }
-
- coords_arg[3] = tmp;
- }
-
- build_cube_intrinsic(ctx, coords_arg, &selcoords);
-
- invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
- ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
- invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
-
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
-
- coords[2] = selcoords.id;
-
- if (is_deriv && derivs_arg) {
- LLVMValueRef derivs[4];
- int axis;
-
- /* Convert cube derivatives to 2D derivatives. */
- for (axis = 0; axis < 2; axis++) {
- LLVMValueRef deriv_st[2];
- LLVMValueRef deriv_ma;
-
- /* Transform the derivative alongside the texture
- * coordinate. Mathematically, the correct formula is
- * as follows. Assume we're projecting onto the +Z face
- * and denote by dx/dh the derivative of the (original)
- * X texture coordinate with respect to horizontal
- * window coordinates. The projection onto the +Z face
- * plane is:
- *
- * f(x,z) = x/z
- *
- * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
- * = 1/z * dx/dh - x/z * 1/z * dz/dh.
- *
- * This motivatives the implementation below.
- *
- * Whether this actually gives the expected results for
- * apps that might feed in derivatives obtained via
- * finite differences is anyone's guess. The OpenGL spec
- * seems awfully quiet about how textureGrad for cube
- * maps should be handled.
- */
- build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
- deriv_st, &deriv_ma);
-
- deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
-
- for (int i = 0; i < 2; ++i)
- derivs[axis * 2 + i] =
- LLVMBuildFSub(builder,
- LLVMBuildFMul(builder, deriv_st[i], invma, ""),
- LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
- }
-
- memcpy(derivs_arg, derivs, sizeof(derivs));
- }
-
- /* Shift the texture coordinate. This must be applied after the
- * derivative calculation.
- */
- for (int i = 0; i < 2; ++i)
- coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
-
- if (is_array) {
- /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
- /* coords_arg.w component - array_index for cube arrays */
- coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
- }
-
- memcpy(coords_arg, coords, sizeof(coords));
-}
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j)
-{
- LLVMValueRef args[5];
- LLVMValueRef p1;
-
- args[0] = i;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = params;
-
- p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
- ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
-
- args[0] = p1;
- args[1] = j;
- args[2] = llvm_chan;
- args[3] = attr_number;
- args[4] = params;
-
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
- ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j)
-{
- LLVMValueRef args[6];
- LLVMValueRef p1;
-
- args[0] = i;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = ctx->i1false;
- args[4] = params;
-
- p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
- ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
-
- args[0] = p1;
- args[1] = j;
- args[2] = llvm_chan;
- args[3] = attr_number;
- args[4] = ctx->i1false;
- args[5] = params;
-
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
- ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
- LLVMValueRef parameter,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params)
-{
- LLVMValueRef args[4];
-
- args[0] = parameter;
- args[1] = llvm_chan;
- args[2] = attr_number;
- args[3] = params;
-
- return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
- ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index)
-{
- return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
-}
-
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index)
-{
- LLVMValueRef indices[2] = {
- ctx->i32_0,
- index,
- };
- return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
-}
-
-LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef index)
-{
- return LLVMBuildPointerCast(ctx->builder,
- LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
- LLVMTypeOf(ptr), "");
-}
-
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- LLVMValueRef value)
-{
- LLVMBuildStore(ctx->builder, value,
- ac_build_gep0(ctx, base_ptr, index));
-}
-
-/**
- * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
- * It's equivalent to doing a load from &base_ptr[index].
- *
- * \param base_ptr Where the array starts.
- * \param index The element index into the array.
- * \param uniform Whether the base_ptr and index can be assumed to be
- * dynamically uniform (i.e. load to an SGPR)
- * \param invariant Whether the load is invariant (no other opcodes affect it)
- * \param no_unsigned_wraparound
- * For all possible re-associations and re-distributions of an expression
- * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
- * without inbounds in base_ptr), this parameter is true if "addr + offset"
- * does not result in an unsigned integer wraparound. This is used for
- * optimal code generation of 32-bit pointer arithmetic.
- *
- * For example, a 32-bit immediate offset that causes a 32-bit unsigned
- * integer wraparound can't be an imm offset in s_load_dword, because
- * the instruction performs "addr + offset" in 64 bits.
- *
- * Expected usage for bindless textures by chaining GEPs:
- * // possible unsigned wraparound, don't use InBounds:
- * ptr1 = LLVMBuildGEP(base_ptr, index);
- * image = load(ptr1); // becomes "s_load ptr1, 0"
- *
- * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
- * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
- */
-static LLVMValueRef
-ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index, bool uniform, bool invariant,
- bool no_unsigned_wraparound)
-{
- LLVMValueRef pointer, result;
-
- if (no_unsigned_wraparound &&
- LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
- pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
- else
- pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
-
- if (uniform)
- LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
- result = LLVMBuildLoad(ctx->builder, pointer, "");
- if (invariant)
- LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
- return result;
-}
-
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index)
-{
- return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
-}
-
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
-{
- return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
-}
-
-/* This assumes that there is no unsigned integer wraparound during the address
- * computation, excluding all GEPs within base_ptr. */
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
-{
- return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
-}
-
-/* See ac_build_load_custom() documentation. */
-LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index)
-{
- return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
-}
-
-static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
- unsigned cache_policy)
-{
- return cache_policy |
- (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
-}
-
-static void
-ac_build_buffer_store_common(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned num_channels,
- LLVMTypeRef return_channel_type,
- unsigned cache_policy,
- bool use_format,
- bool structurized)
-{
- LLVMValueRef args[6];
- int idx = 0;
- args[idx++] = data;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- if (use_format) {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
- indexing_kind, type_name);
- } else {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
- indexing_kind, type_name);
- }
-
- ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy)
-{
- ac_build_buffer_store_common(ctx, rsrc, data, vindex,
- voffset, NULL, num_channels,
- ctx->f32, cache_policy,
- true, true);
-}
-
-/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
- * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
- * or v4i32 (num_channels=3,4).
- */
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool swizzle_enable_hint)
-{
- /* Split 3 channel stores, because only LLVM 9+ support 3-channel
- * intrinsics. */
- if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
- LLVMValueRef v[3], v01;
-
- for (int i = 0; i < 3; i++) {
- v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
- LLVMConstInt(ctx->i32, i, 0), "");
- }
- v01 = ac_build_gather_values(ctx, v, 2);
-
- ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
- soffset, inst_offset, cache_policy,
- swizzle_enable_hint);
- ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
- soffset, inst_offset + 8,
- cache_policy,
- swizzle_enable_hint);
- return;
- }
-
- /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
- * (voffset is swizzled, but soffset isn't swizzled).
- * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
- */
- if (!swizzle_enable_hint) {
- LLVMValueRef offset = soffset;
-
- if (inst_offset)
- offset = LLVMBuildAdd(ctx->builder, offset,
- LLVMConstInt(ctx->i32, inst_offset, 0), "");
-
- ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata),
- ctx->i32_0, voffset, offset,
- num_channels, ctx->f32,
- cache_policy, false, false);
- return;
- }
-
- static const unsigned dfmts[] = {
- V_008F0C_BUF_DATA_FORMAT_32,
- V_008F0C_BUF_DATA_FORMAT_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32,
- V_008F0C_BUF_DATA_FORMAT_32_32_32_32
- };
- unsigned dfmt = dfmts[num_channels - 1];
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
- LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy);
-}
-
-static LLVMValueRef
-ac_build_buffer_load_common(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned num_channels,
- LLVMTypeRef channel_type,
- unsigned cache_policy,
- bool can_speculate,
- bool use_format,
- bool structurized)
-{
- LLVMValueRef args[5];
- int idx = 0;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- if (use_format) {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
- indexing_kind, type_name);
- } else {
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
- indexing_kind, type_name);
- }
-
- return ac_build_intrinsic(ctx, name, type, args, idx,
- ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- int num_channels,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool can_speculate,
- bool allow_smem)
-{
- LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
- if (voffset)
- offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
- if (soffset)
- offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
-
- if (allow_smem && !(cache_policy & ac_slc) &&
- (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
- assert(vindex == NULL);
-
- LLVMValueRef result[8];
-
- for (int i = 0; i < num_channels; i++) {
- if (i) {
- offset = LLVMBuildAdd(ctx->builder, offset,
- LLVMConstInt(ctx->i32, 4, 0), "");
- }
- LLVMValueRef args[3] = {
- rsrc,
- offset,
- LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
- };
- result[i] = ac_build_intrinsic(ctx,
- "llvm.amdgcn.s.buffer.load.f32",
- ctx->f32, args, 3,
- AC_FUNC_ATTR_READNONE);
- }
- if (num_channels == 1)
- return result[0];
-
- if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
- result[num_channels++] = LLVMGetUndef(ctx->f32);
- return ac_build_gather_values(ctx, result, num_channels);
- }
-
- return ac_build_buffer_load_common(ctx, rsrc, vindex,
- offset, ctx->i32_0,
- num_channels, ctx->f32,
- cache_policy,
- can_speculate, false, false);
-}
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy,
- bool can_speculate)
-{
- return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
- ctx->i32_0, num_channels, ctx->f32,
- cache_policy, can_speculate,
- true, true);
-}
-
-static LLVMValueRef
-ac_build_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate,
- bool structurized)
-{
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- LLVMValueRef args[6];
- int idx = 0;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
- args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
- indexing_kind, type_name);
-
- return ac_build_intrinsic(ctx, name, type, args, idx,
- ac_get_load_intr_attribs(can_speculate));
-}
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate)
-{
- return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt,
- cache_policy, can_speculate, true);
-}
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate)
-{
- return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt,
- cache_policy, can_speculate, false);
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy)
-{
- LLVMValueRef res;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- res = ac_build_buffer_load_common(ctx, rsrc, NULL,
- voffset, soffset,
- 1, ctx->i16, cache_policy,
- false, false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, cache_policy,
- false);
-
- res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
- }
-
- return res;
-}
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy)
-{
- LLVMValueRef res;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
-
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- res = ac_build_buffer_load_common(ctx, rsrc, NULL,
- voffset, soffset,
- 1, ctx->i8, cache_policy,
- false, false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
- immoffset, 1, dfmt, nfmt, cache_policy,
- false);
-
- res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
- }
-
- return res;
-}
-
-/**
- * Convert an 11- or 10-bit unsigned floating point number to an f32.
- *
- * The input exponent is expected to be biased analogous to IEEE-754, i.e. by
- * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs).
- */
-static LLVMValueRef
-ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned exp_bits, unsigned mant_bits)
-{
- assert(LLVMTypeOf(src) == ctx->i32);
-
- LLVMValueRef tmp;
- LLVMValueRef mantissa;
- mantissa = LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
-
- /* Converting normal numbers is just a shift + correcting the exponent bias */
- unsigned normal_shift = 23 - mant_bits;
- unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
- LLVMValueRef shifted, normal;
-
- shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
- normal = LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
-
- /* Converting nan/inf numbers is the same, but with a different exponent update */
- LLVMValueRef naninf;
- naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
-
- /* Converting denormals is the complex case: determine the leading zeros of the
- * mantissa to obtain the correct shift for the mantissa and exponent correction.
- */
- LLVMValueRef denormal;
- LLVMValueRef params[2] = {
- mantissa,
- ctx->i1true, /* result can be undef when arg is 0 */
- };
- LLVMValueRef ctlz = ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32,
- params, 2, AC_FUNC_ATTR_READNONE);
-
- /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
- tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
- denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
-
- unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
- tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
- tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
- denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
-
- /* Select the final result. */
- LLVMValueRef result;
-
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
- LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), "");
- result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
-
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
- LLVMConstInt(ctx->i32, 1 << mant_bits, false), "");
- result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
-
- tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
- result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
-
- return ac_to_float(ctx, result);
-}
-
-/**
- * Generate a fully general open coded buffer format fetch with all required
- * fixups suitable for vertex fetch, using non-format buffer loads.
- *
- * Some combinations of argument values have special interpretations:
- * - size = 8 bytes, format = fixed indicates PIPE_FORMAT_R11G11B10_FLOAT
- * - size = 8 bytes, format != {float,fixed} indicates a 2_10_10_10 data format
- *
- * \param log_size log(size of channel in bytes)
- * \param num_channels number of channels (1 to 4)
- * \param format AC_FETCH_FORMAT_xxx value
- * \param reverse whether XYZ channels are reversed
- * \param known_aligned whether the source is known to be aligned to hardware's
- * effective element size for loading the given format
- * (note: this means dword alignment for 8_8_8_8, 16_16, etc.)
- * \param rsrc buffer resource descriptor
- * \return the resulting vector of floats or integers bitcast to <4 x i32>
- */
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
- unsigned log_size,
- unsigned num_channels,
- unsigned format,
- bool reverse,
- bool known_aligned,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy,
- bool can_speculate)
-{
- LLVMValueRef tmp;
- unsigned load_log_size = log_size;
- unsigned load_num_channels = num_channels;
- if (log_size == 3) {
- load_log_size = 2;
- if (format == AC_FETCH_FORMAT_FLOAT) {
- load_num_channels = 2 * num_channels;
- } else {
- load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
- }
- }
-
- int log_recombine = 0;
- if (ctx->chip_class == GFX6 && !known_aligned) {
- /* Avoid alignment restrictions by loading one byte at a time. */
- load_num_channels <<= load_log_size;
- log_recombine = load_log_size;
- load_log_size = 0;
- } else if (load_num_channels == 2 || load_num_channels == 4) {
- log_recombine = -util_logbase2(load_num_channels);
- load_num_channels = 1;
- load_log_size += -log_recombine;
- }
-
- assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9);
-
- LLVMValueRef loads[32]; /* up to 32 bytes */
- for (unsigned i = 0; i < load_num_channels; ++i) {
- tmp = LLVMBuildAdd(ctx->builder, soffset,
- LLVMConstInt(ctx->i32, i << load_log_size, false), "");
- LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 :
- load_log_size == 1 ? ctx->i16 : ctx->i32;
- unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
- loads[i] = ac_build_buffer_load_common(
- ctx, rsrc, vindex, voffset, tmp,
- num_channels, channel_type, cache_policy,
- can_speculate, false, true);
- if (load_log_size >= 2)
- loads[i] = ac_to_integer(ctx, loads[i]);
- }
-
- if (log_recombine > 0) {
- /* Recombine bytes if necessary (GFX6 only) */
- LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
-
- for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
- LLVMValueRef accum = NULL;
- for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
- tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
- if (i == 0) {
- accum = tmp;
- } else {
- tmp = LLVMBuildShl(ctx->builder, tmp,
- LLVMConstInt(dst_type, 8 * i, false), "");
- accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
- }
- }
- loads[dst] = accum;
- }
- } else if (log_recombine < 0) {
- /* Split vectors of dwords */
- if (load_log_size > 2) {
- assert(load_num_channels == 1);
- LLVMValueRef loaded = loads[0];
- unsigned log_split = load_log_size - 2;
- log_recombine += log_split;
- load_num_channels = 1 << log_split;
- load_log_size = 2;
- for (unsigned i = 0; i < load_num_channels; ++i) {
- tmp = LLVMConstInt(ctx->i32, i, false);
- loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
- }
- }
-
- /* Further split dwords and shorts if required */
- if (log_recombine < 0) {
- for (unsigned src = load_num_channels,
- dst = load_num_channels << -log_recombine;
- src > 0; --src) {
- unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
- LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
- LLVMValueRef loaded = loads[src - 1];
- LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
- for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
- tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
- tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
- loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
- }
- }
- }
- }
-
- if (log_size == 3) {
- if (format == AC_FETCH_FORMAT_FLOAT) {
- for (unsigned i = 0; i < num_channels; ++i) {
- tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
- loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
- }
- } else if (format == AC_FETCH_FORMAT_FIXED) {
- /* 10_11_11_FLOAT */
- LLVMValueRef data = loads[0];
- LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
- LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
- LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
- LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
-
- loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
- loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
- loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
-
- num_channels = 3;
- log_size = 2;
- format = AC_FETCH_FORMAT_FLOAT;
- } else {
- /* 2_10_10_10 data formats */
- LLVMValueRef data = loads[0];
- LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
- LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
- loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
- loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
- loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
- tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
- loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
-
- num_channels = 4;
- }
- }
-
- if (format == AC_FETCH_FORMAT_FLOAT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan) {
- tmp = ac_to_float(ctx, loads[chan]);
- if (log_size == 3)
- tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
- else if (log_size == 1)
- tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
- loads[chan] = ac_to_integer(ctx, tmp);
- }
- }
- } else if (format == AC_FETCH_FORMAT_UINT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan)
- loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
- }
- } else if (format == AC_FETCH_FORMAT_SINT) {
- if (log_size != 2) {
- for (unsigned chan = 0; chan < num_channels; ++chan)
- loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
- }
- } else {
- bool unsign = format == AC_FETCH_FORMAT_UNORM ||
- format == AC_FETCH_FORMAT_USCALED ||
- format == AC_FETCH_FORMAT_UINT;
-
- for (unsigned chan = 0; chan < num_channels; ++chan) {
- if (unsign) {
- tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
- } else {
- tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
- }
-
- LLVMValueRef scale = NULL;
- if (format == AC_FETCH_FORMAT_FIXED) {
- assert(log_size == 2);
- scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
- } else if (format == AC_FETCH_FORMAT_UNORM) {
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
- scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
- } else if (format == AC_FETCH_FORMAT_SNORM) {
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
- scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
- }
- if (scale)
- tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
-
- if (format == AC_FETCH_FORMAT_SNORM) {
- /* Clamp to [-1, 1] */
- LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
- LLVMValueRef clamp =
- LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
- tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
- }
-
- loads[chan] = ac_to_integer(ctx, tmp);
- }
- }
-
- while (num_channels < 4) {
- if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
- loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
- } else {
- loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
- }
- num_channels++;
- }
-
- if (reverse) {
- tmp = loads[0];
- loads[0] = loads[2];
- loads[2] = tmp;
- }
-
- return ac_build_gather_values(ctx, loads, 4);
-}
-
-static void
-ac_build_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool structurized)
-{
- voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
- immoffset, "");
-
- LLVMValueRef args[7];
- int idx = 0;
- args[idx++] = vdata;
- args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
- if (structurized)
- args[idx++] = vindex ? vindex : ctx->i32_0;
- args[idx++] = voffset ? voffset : ctx->i32_0;
- args[idx++] = soffset ? soffset : ctx->i32_0;
- args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
- args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
- unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
- const char *indexing_kind = structurized ? "struct" : "raw";
- char name[256], type_name[8];
-
- LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
- ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
-
- snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
- indexing_kind, type_name);
-
- ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
-}
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy)
-{
- ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy,
- true);
-}
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy)
-{
- ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
- immoffset, num_channels, dfmt, nfmt, cache_policy,
- false);
-}
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy)
-{
- vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
- voffset, soffset, 1,
- ctx->i16, cache_policy,
- false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, cache_policy);
- }
-}
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy)
-{
- vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
- ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
- voffset, soffset, 1,
- ctx->i8, cache_policy,
- false, false);
- } else {
- unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
- unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
-
- vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
-
- ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
- ctx->i32_0, 1, dfmt, nfmt, cache_policy);
- }
-}
-/**
- * Set range metadata on an instruction. This can only be used on load and
- * call instructions. If you know an instruction can only produce the values
- * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
- * \p lo is the minimum value inclusive.
- * \p hi is the maximum value exclusive.
- */
-static void set_range_metadata(struct ac_llvm_context *ctx,
- LLVMValueRef value, unsigned lo, unsigned hi)
-{
- LLVMValueRef range_md, md_args[2];
- LLVMTypeRef type = LLVMTypeOf(value);
- LLVMContextRef context = LLVMGetTypeContext(type);
-
- md_args[0] = LLVMConstInt(type, lo, false);
- md_args[1] = LLVMConstInt(type, hi, false);
- range_md = LLVMMDNodeInContext(context, md_args, 2);
- LLVMSetMetadata(value, ctx->range_md_kind, range_md);
-}
-
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx)
-{
- LLVMValueRef tid;
-
- LLVMValueRef tid_args[2];
- tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
- tid_args[1] = ctx->i32_0;
- tid_args[1] = ac_build_intrinsic(ctx,
- "llvm.amdgcn.mbcnt.lo", ctx->i32,
- tid_args, 2, AC_FUNC_ATTR_READNONE);
-
- if (ctx->wave_size == 32) {
- tid = tid_args[1];
- } else {
- tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
- ctx->i32, tid_args,
- 2, AC_FUNC_ATTR_READNONE);
- }
- set_range_metadata(ctx, tid, 0, ctx->wave_size);
- return tid;
-}
-
-/*
- * AMD GCN implements derivatives using the local data store (LDS)
- * All writes to the LDS happen in all executing threads at
- * the same time. TID is the Thread ID for the current
- * thread and is a value between 0 and 63, representing
- * the thread's position in the wavefront.
- *
- * For the pixel shader threads are grouped into quads of four pixels.
- * The TIDs of the pixels of a quad are:
- *
- * +------+------+
- * |4n + 0|4n + 1|
- * +------+------+
- * |4n + 2|4n + 3|
- * +------+------+
- *
- * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
- * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
- * the current pixel's column, and masking with 0xfffffffe yields the TID
- * of the left pixel of the current pixel's row.
- *
- * Adding 1 yields the TID of the pixel to the right of the left pixel, and
- * adding 2 yields the TID of the pixel below the top pixel.
- */
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
- uint32_t mask,
- int idx,
- LLVMValueRef val)
-{
- unsigned tl_lanes[4], trbl_lanes[4];
- char name[32], type[8];
- LLVMValueRef tl, trbl;
- LLVMTypeRef result_type;
- LLVMValueRef result;
-
- result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
-
- if (result_type == ctx->f16)
- val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
-
- for (unsigned i = 0; i < 4; ++i) {
- tl_lanes[i] = i & mask;
- trbl_lanes[i] = (i & mask) + idx;
- }
-
- tl = ac_build_quad_swizzle(ctx, val,
- tl_lanes[0], tl_lanes[1],
- tl_lanes[2], tl_lanes[3]);
- trbl = ac_build_quad_swizzle(ctx, val,
- trbl_lanes[0], trbl_lanes[1],
- trbl_lanes[2], trbl_lanes[3]);
-
- if (result_type == ctx->f16) {
- tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
- trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
- }
-
- tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
- trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
- result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
-
- ac_build_type_name_for_intr(result_type, type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
-
- return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
-}
-
-void
-ac_build_sendmsg(struct ac_llvm_context *ctx,
- uint32_t msg,
- LLVMValueRef wave_id)
-{
- LLVMValueRef args[2];
- args[0] = LLVMConstInt(ctx->i32, msg, false);
- args[1] = wave_id;
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
-}
-
-LLVMValueRef
-ac_build_imsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type)
-{
- LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
- dst_type, &arg, 1,
- AC_FUNC_ATTR_READNONE);
-
- /* The HW returns the last bit index from MSB, but NIR/TGSI wants
- * the index from LSB. Invert it by doing "31 - msb". */
- msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
- msb, "");
-
- LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
- LLVMValueRef cond = LLVMBuildOr(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- arg, ctx->i32_0, ""),
- LLVMBuildICmp(ctx->builder, LLVMIntEQ,
- arg, all_ones, ""), "");
-
- return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
-}
-
-LLVMValueRef
-ac_build_umsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type)
-{
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef highest_bit;
- LLVMValueRef zero;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
- switch (bitsize) {
- case 64:
- intrin_name = "llvm.ctlz.i64";
- type = ctx->i64;
- highest_bit = LLVMConstInt(ctx->i64, 63, false);
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.ctlz.i32";
- type = ctx->i32;
- highest_bit = LLVMConstInt(ctx->i32, 31, false);
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.ctlz.i16";
- type = ctx->i16;
- highest_bit = LLVMConstInt(ctx->i16, 15, false);
- zero = ctx->i16_0;
- break;
- case 8:
- intrin_name = "llvm.ctlz.i8";
- type = ctx->i8;
- highest_bit = LLVMConstInt(ctx->i8, 7, false);
- zero = ctx->i8_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
-
- LLVMValueRef params[2] = {
- arg,
- ctx->i1true,
- };
-
- LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
- params, 2,
- AC_FUNC_ATTR_READNONE);
-
- /* The HW returns the last bit index from MSB, but TGSI/NIR wants
- * the index from LSB. Invert it by doing "31 - msb". */
- msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
-
- if (bitsize == 64) {
- msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
- } else if (bitsize < 32) {
- msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
- }
-
- /* check for zero */
- return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
- LLVMConstInt(ctx->i32, -1, true), msb, "");
-}
-
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- char name[64];
- snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
- LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- char name[64];
- snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
- LLVMValueRef args[2] = {a, b};
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b)
-{
- LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
- return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
-}
-
-LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
-{
- LLVMTypeRef t = LLVMTypeOf(value);
- return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
- LLVMConstReal(t, 1.0));
-}
-
-void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
-{
- LLVMValueRef args[9];
-
- args[0] = LLVMConstInt(ctx->i32, a->target, 0);
- args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
-
- if (a->compr) {
- LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
- LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
-
- args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
- v2i16, "");
- args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
- v2i16, "");
- args[4] = LLVMConstInt(ctx->i1, a->done, 0);
- args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
-
- ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
- ctx->voidt, args, 6, 0);
- } else {
- args[2] = a->out[0];
- args[3] = a->out[1];
- args[4] = a->out[2];
- args[5] = a->out[3];
- args[6] = LLVMConstInt(ctx->i1, a->done, 0);
- args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
-
- ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
- ctx->voidt, args, 8, 0);
- }
-}
-
-void ac_build_export_null(struct ac_llvm_context *ctx)
-{
- struct ac_export_args args;
-
- args.enabled_channels = 0x0; /* enabled channels */
- args.valid_mask = 1; /* whether the EXEC mask is valid */
- args.done = 1; /* DONE bit */
- args.target = V_008DFC_SQ_EXP_NULL;
- args.compr = 0; /* COMPR flag (0 = 32-bit export) */
- args.out[0] = LLVMGetUndef(ctx->f32); /* R */
- args.out[1] = LLVMGetUndef(ctx->f32); /* G */
- args.out[2] = LLVMGetUndef(ctx->f32); /* B */
- args.out[3] = LLVMGetUndef(ctx->f32); /* A */
-
- ac_build_export(ctx, &args);
-}
-
-static unsigned ac_num_coords(enum ac_image_dim dim)
-{
- switch (dim) {
- case ac_image_1d:
- return 1;
- case ac_image_2d:
- case ac_image_1darray:
- return 2;
- case ac_image_3d:
- case ac_image_cube:
- case ac_image_2darray:
- case ac_image_2dmsaa:
- return 3;
- case ac_image_2darraymsaa:
- return 4;
- default:
- unreachable("ac_num_coords: bad dim");
- }
-}
-
-static unsigned ac_num_derivs(enum ac_image_dim dim)
-{
- switch (dim) {
- case ac_image_1d:
- case ac_image_1darray:
- return 2;
- case ac_image_2d:
- case ac_image_2darray:
- case ac_image_cube:
- return 4;
- case ac_image_3d:
- return 6;
- case ac_image_2dmsaa:
- case ac_image_2darraymsaa:
- default:
- unreachable("derivatives not supported");
- }
-}
-
-static const char *get_atomic_name(enum ac_atomic_op op)
-{
- switch (op) {
- case ac_atomic_swap: return "swap";
- case ac_atomic_add: return "add";
- case ac_atomic_sub: return "sub";
- case ac_atomic_smin: return "smin";
- case ac_atomic_umin: return "umin";
- case ac_atomic_smax: return "smax";
- case ac_atomic_umax: return "umax";
- case ac_atomic_and: return "and";
- case ac_atomic_or: return "or";
- case ac_atomic_xor: return "xor";
- case ac_atomic_inc_wrap: return "inc";
- case ac_atomic_dec_wrap: return "dec";
- }
- unreachable("bad atomic op");
-}
-
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
- struct ac_image_args *a)
-{
- const char *overload[3] = { "", "", "" };
- unsigned num_overloads = 0;
- LLVMValueRef args[18];
- unsigned num_args = 0;
- enum ac_image_dim dim = a->dim;
-
- assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
- !a->level_zero);
- assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
- a->opcode != ac_image_store_mip) ||
- a->lod);
- assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
- (!a->compare && !a->offset));
- assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_get_lod) ||
- !a->bias);
- assert((a->bias ? 1 : 0) +
- (a->lod ? 1 : 0) +
- (a->level_zero ? 1 : 0) +
- (a->derivs[0] ? 1 : 0) <= 1);
-
- if (a->opcode == ac_image_get_lod) {
- switch (dim) {
- case ac_image_1darray:
- dim = ac_image_1d;
- break;
- case ac_image_2darray:
- case ac_image_cube:
- dim = ac_image_2d;
- break;
- default:
- break;
- }
- }
-
- bool sample = a->opcode == ac_image_sample ||
- a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_get_lod;
- bool atomic = a->opcode == ac_image_atomic ||
- a->opcode == ac_image_atomic_cmpswap;
- bool load = a->opcode == ac_image_sample ||
- a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_load ||
- a->opcode == ac_image_load_mip;
- LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
-
- if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
- args[num_args++] = a->data[0];
- if (a->opcode == ac_image_atomic_cmpswap)
- args[num_args++] = a->data[1];
- }
-
- if (!atomic)
- args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
-
- if (a->offset)
- args[num_args++] = ac_to_integer(ctx, a->offset);
- if (a->bias) {
- args[num_args++] = ac_to_float(ctx, a->bias);
- overload[num_overloads++] = ".f32";
- }
- if (a->compare)
- args[num_args++] = ac_to_float(ctx, a->compare);
- if (a->derivs[0]) {
- unsigned count = ac_num_derivs(dim);
- for (unsigned i = 0; i < count; ++i)
- args[num_args++] = ac_to_float(ctx, a->derivs[i]);
- overload[num_overloads++] = ".f32";
- }
- unsigned num_coords =
- a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
- for (unsigned i = 0; i < num_coords; ++i)
- args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
- if (a->lod)
- args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
- overload[num_overloads++] = sample ? ".f32" : ".i32";
-
- args[num_args++] = a->resource;
- if (sample) {
- args[num_args++] = a->sampler;
- args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
- }
-
- args[num_args++] = ctx->i32_0; /* texfailctrl */
- args[num_args++] = LLVMConstInt(ctx->i32,
- load ? get_load_cache_policy(ctx, a->cache_policy) :
- a->cache_policy, false);
-
- const char *name;
- const char *atomic_subop = "";
- switch (a->opcode) {
- case ac_image_sample: name = "sample"; break;
- case ac_image_gather4: name = "gather4"; break;
- case ac_image_load: name = "load"; break;
- case ac_image_load_mip: name = "load.mip"; break;
- case ac_image_store: name = "store"; break;
- case ac_image_store_mip: name = "store.mip"; break;
- case ac_image_atomic:
- name = "atomic.";
- atomic_subop = get_atomic_name(a->atomic);
- break;
- case ac_image_atomic_cmpswap:
- name = "atomic.";
- atomic_subop = "cmpswap";
- break;
- case ac_image_get_lod: name = "getlod"; break;
- case ac_image_get_resinfo: name = "getresinfo"; break;
- default: unreachable("invalid image opcode");
- }
-
- const char *dimname;
- switch (dim) {
- case ac_image_1d: dimname = "1d"; break;
- case ac_image_2d: dimname = "2d"; break;
- case ac_image_3d: dimname = "3d"; break;
- case ac_image_cube: dimname = "cube"; break;
- case ac_image_1darray: dimname = "1darray"; break;
- case ac_image_2darray: dimname = "2darray"; break;
- case ac_image_2dmsaa: dimname = "2dmsaa"; break;
- case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
- default: unreachable("invalid dim");
- }
-
- bool lod_suffix =
- a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
- char intr_name[96];
- snprintf(intr_name, sizeof(intr_name),
- "llvm.amdgcn.image.%s%s" /* base name */
- "%s%s%s" /* sample/gather modifiers */
- ".%s.%s%s%s%s", /* dimension and type overloads */
- name, atomic_subop,
- a->compare ? ".c" : "",
- a->bias ? ".b" :
- lod_suffix ? ".l" :
- a->derivs[0] ? ".d" :
- a->level_zero ? ".lz" : "",
- a->offset ? ".o" : "",
- dimname,
- atomic ? "i32" : "v4f32",
- overload[0], overload[1], overload[2]);
-
- LLVMTypeRef retty;
- if (atomic)
- retty = ctx->i32;
- else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
- retty = ctx->voidt;
- else
- retty = ctx->v4f32;
-
- LLVMValueRef result =
- ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
- a->attributes);
- if (!sample && retty == ctx->v4f32) {
- result = LLVMBuildBitCast(ctx->builder, result,
- ctx->v4i32, "");
- }
- return result;
-}
-
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc)
-{
- LLVMValueRef samples;
-
- /* Read the samples from the descriptor directly.
- * Hardware doesn't have any instruction for this.
- */
- samples = LLVMBuildExtractElement(ctx->builder, rsrc,
- LLVMConstInt(ctx->i32, 3, 0), "");
- samples = LLVMBuildLShr(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 16, 0), "");
- samples = LLVMBuildAnd(ctx->builder, samples,
- LLVMConstInt(ctx->i32, 0xf, 0), "");
- samples = LLVMBuildShl(ctx->builder, ctx->i32_1,
- samples, "");
- return samples;
-}
-
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- LLVMTypeRef v2f16 =
- LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
-
- return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16,
- args, 2, AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2])
-{
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi)
-{
- assert(bits == 8 || bits == 10 || bits == 16);
-
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
- LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
- LLVMValueRef max_alpha =
- bits != 10 ? max_rgb : ctx->i32_1;
- LLVMValueRef min_alpha =
- bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
-
- /* Clamp. */
- if (bits != 16) {
- for (int i = 0; i < 2; i++) {
- bool alpha = hi && i == 1;
- args[i] = ac_build_imin(ctx, args[i],
- alpha ? max_alpha : max_rgb);
- args[i] = ac_build_imax(ctx, args[i],
- alpha ? min_alpha : min_rgb);
- }
- }
-
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-/* The 8-bit and 10-bit clamping is for HW workarounds. */
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi)
-{
- assert(bits == 8 || bits == 10 || bits == 16);
-
- LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
- bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
- LLVMValueRef max_alpha =
- bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
-
- /* Clamp. */
- if (bits != 16) {
- for (int i = 0; i < 2; i++) {
- bool alpha = hi && i == 1;
- args[i] = ac_build_umin(ctx, args[i],
- alpha ? max_alpha : max_rgb);
- }
- }
-
- LLVMValueRef res =
- ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
- ctx->v2i16, args, 2,
- AC_FUNC_ATTR_READNONE);
- return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
- &i1, 1, AC_FUNC_ATTR_READNONE);
-}
-
-void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
-{
- ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
- &i1, 1, 0);
-}
-
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
- LLVMValueRef offset, LLVMValueRef width,
- bool is_signed)
-{
- LLVMValueRef args[] = {
- input,
- offset,
- width,
- };
-
- return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" :
- "llvm.amdgcn.ubfe.i32",
- ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
-
-}
-
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2)
-{
- return LLVMBuildAdd(ctx->builder,
- LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
-}
-
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2)
-{
- /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
- if (ctx->chip_class >= GFX10) {
- return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32,
- (LLVMValueRef []) {s0, s1, s2}, 3,
- AC_FUNC_ATTR_READNONE);
- }
-
- return LLVMBuildFAdd(ctx->builder,
- LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
-}
-
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
-{
- if (!wait_flags)
- return;
-
- unsigned lgkmcnt = 63;
- unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
- unsigned vscnt = 63;
-
- if (wait_flags & AC_WAIT_LGKM)
- lgkmcnt = 0;
- if (wait_flags & AC_WAIT_VLOAD)
- vmcnt = 0;
-
- if (wait_flags & AC_WAIT_VSTORE) {
- if (ctx->chip_class >= GFX10)
- vscnt = 0;
- else
- vmcnt = 0;
- }
-
- /* There is no intrinsic for vscnt(0), so use a fence. */
- if ((wait_flags & AC_WAIT_LGKM &&
- wait_flags & AC_WAIT_VLOAD &&
- wait_flags & AC_WAIT_VSTORE) ||
- vscnt == 0) {
- LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
- return;
- }
-
- unsigned simm16 = (lgkmcnt << 8) |
- (7 << 4) | /* expcnt */
- (vmcnt & 0xf) |
- ((vmcnt >> 4) << 14);
-
- LLVMValueRef args[1] = {
- LLVMConstInt(ctx->i32, simm16, false),
- };
- ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
- ctx->voidt, args, 1, 0);
-}
-
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
- LLVMValueRef src1, LLVMValueRef src2,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.fmed3.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.fmed3.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.amdgcn.fmed3.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- src1,
- src2,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 3,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.fract.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.fract.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.amdgcn.fract.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize);
- LLVMValueRef zero = LLVMConstInt(type, 0, false);
- LLVMValueRef one = LLVMConstInt(type, 1, false);
-
- LLVMValueRef cmp, val;
- cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
- val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
- cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
- val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
- return val;
-}
-
-LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMValueRef cmp, val, zero, one;
- LLVMTypeRef type;
-
- if (bitsize == 16) {
- type = ctx->f16;
- zero = ctx->f16_0;
- one = ctx->f16_1;
- } else if (bitsize == 32) {
- type = ctx->f32;
- zero = ctx->f32_0;
- one = ctx->f32_1;
- } else {
- type = ctx->f64;
- zero = ctx->f64_0;
- one = ctx->f64_1;
- }
-
- cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
- val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
- cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
- val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
- return val;
-}
-
-LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
-{
- LLVMValueRef result;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
- switch (bitsize) {
- case 64:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- case 8:
- result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
-
- return result;
-}
-
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- LLVMValueRef result;
- unsigned bitsize;
-
- bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
-
- switch (bitsize) {
- case 64:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
- break;
- case 32:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
- break;
- case 16:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- case 8:
- result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8,
- (LLVMValueRef []) { src0 }, 1,
- AC_FUNC_ATTR_READNONE);
-
- result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
- break;
- default:
- unreachable(!"invalid bitsize");
- break;
- }
-
- return result;
-}
-
-#define AC_EXP_TARGET 0
-#define AC_EXP_ENABLED_CHANNELS 1
-#define AC_EXP_OUT0 2
-
-enum ac_ir_type {
- AC_IR_UNDEF,
- AC_IR_CONST,
- AC_IR_VALUE,
-};
-
-struct ac_vs_exp_chan
-{
- LLVMValueRef value;
- float const_float;
- enum ac_ir_type type;
-};
-
-struct ac_vs_exp_inst {
- unsigned offset;
- LLVMValueRef inst;
- struct ac_vs_exp_chan chan[4];
-};
-
-struct ac_vs_exports {
- unsigned num;
- struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
-};
-
-/* Return true if the PARAM export has been eliminated. */
-static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- struct ac_vs_exp_inst *exp)
-{
- unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
- bool is_zero[4] = {}, is_one[4] = {};
-
- for (i = 0; i < 4; i++) {
- /* It's a constant expression. Undef outputs are eliminated too. */
- if (exp->chan[i].type == AC_IR_UNDEF) {
- is_zero[i] = true;
- is_one[i] = true;
- } else if (exp->chan[i].type == AC_IR_CONST) {
- if (exp->chan[i].const_float == 0)
- is_zero[i] = true;
- else if (exp->chan[i].const_float == 1)
- is_one[i] = true;
- else
- return false; /* other constant */
- } else
- return false;
- }
-
- /* Only certain combinations of 0 and 1 can be eliminated. */
- if (is_zero[0] && is_zero[1] && is_zero[2])
- default_val = is_zero[3] ? 0 : 1;
- else if (is_one[0] && is_one[1] && is_one[2])
- default_val = is_zero[3] ? 2 : 3;
- else
- return false;
-
- /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
- LLVMInstructionEraseFromParent(exp->inst);
-
- /* Change OFFSET to DEFAULT_VAL. */
- for (i = 0; i < num_outputs; i++) {
- if (vs_output_param_offset[i] == exp->offset) {
- vs_output_param_offset[i] =
- AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
- break;
- }
- }
- return true;
-}
-
-static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- struct ac_vs_exports *processed,
- struct ac_vs_exp_inst *exp)
-{
- unsigned p, copy_back_channels = 0;
-
- /* See if the output is already in the list of processed outputs.
- * The LLVMValueRef comparison relies on SSA.
- */
- for (p = 0; p < processed->num; p++) {
- bool different = false;
-
- for (unsigned j = 0; j < 4; j++) {
- struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
- struct ac_vs_exp_chan *c2 = &exp->chan[j];
-
- /* Treat undef as a match. */
- if (c2->type == AC_IR_UNDEF)
- continue;
-
- /* If c1 is undef but c2 isn't, we can copy c2 to c1
- * and consider the instruction duplicated.
- */
- if (c1->type == AC_IR_UNDEF) {
- copy_back_channels |= 1 << j;
- continue;
- }
-
- /* Test whether the channels are not equal. */
- if (c1->type != c2->type ||
- (c1->type == AC_IR_CONST &&
- c1->const_float != c2->const_float) ||
- (c1->type == AC_IR_VALUE &&
- c1->value != c2->value)) {
- different = true;
- break;
- }
- }
- if (!different)
- break;
-
- copy_back_channels = 0;
- }
- if (p == processed->num)
- return false;
-
- /* If a match was found, but the matching export has undef where the new
- * one has a normal value, copy the normal value to the undef channel.
- */
- struct ac_vs_exp_inst *match = &processed->exp[p];
-
- /* Get current enabled channels mask. */
- LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
- unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
-
- while (copy_back_channels) {
- unsigned chan = u_bit_scan(©_back_channels);
-
- assert(match->chan[chan].type == AC_IR_UNDEF);
- LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
- exp->chan[chan].value);
- match->chan[chan] = exp->chan[chan];
-
- /* Update number of enabled channels because the original mask
- * is not always 0xf.
- */
- enabled_channels |= (1 << chan);
- LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
- LLVMConstInt(ctx->i32, enabled_channels, 0));
- }
-
- /* The PARAM export is duplicated. Kill it. */
- LLVMInstructionEraseFromParent(exp->inst);
-
- /* Change OFFSET to the matching export. */
- for (unsigned i = 0; i < num_outputs; i++) {
- if (vs_output_param_offset[i] == exp->offset) {
- vs_output_param_offset[i] = match->offset;
- break;
- }
- }
- return true;
-}
-
-void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
- LLVMValueRef main_fn,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- uint8_t *num_param_exports)
-{
- LLVMBasicBlockRef bb;
- bool removed_any = false;
- struct ac_vs_exports exports;
-
- exports.num = 0;
-
- /* Process all LLVM instructions. */
- bb = LLVMGetFirstBasicBlock(main_fn);
- while (bb) {
- LLVMValueRef inst = LLVMGetFirstInstruction(bb);
-
- while (inst) {
- LLVMValueRef cur = inst;
- inst = LLVMGetNextInstruction(inst);
- struct ac_vs_exp_inst exp;
-
- if (LLVMGetInstructionOpcode(cur) != LLVMCall)
- continue;
-
- LLVMValueRef callee = ac_llvm_get_called_value(cur);
-
- if (!ac_llvm_is_function(callee))
- continue;
-
- const char *name = LLVMGetValueName(callee);
- unsigned num_args = LLVMCountParams(callee);
-
- /* Check if this is an export instruction. */
- if ((num_args != 9 && num_args != 8) ||
- (strcmp(name, "llvm.SI.export") &&
- strcmp(name, "llvm.amdgcn.exp.f32")))
- continue;
-
- LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
- unsigned target = LLVMConstIntGetZExtValue(arg);
-
- if (target < V_008DFC_SQ_EXP_PARAM)
- continue;
-
- target -= V_008DFC_SQ_EXP_PARAM;
-
- /* Parse the instruction. */
- memset(&exp, 0, sizeof(exp));
- exp.offset = target;
- exp.inst = cur;
-
- for (unsigned i = 0; i < 4; i++) {
- LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
-
- exp.chan[i].value = v;
-
- if (LLVMIsUndef(v)) {
- exp.chan[i].type = AC_IR_UNDEF;
- } else if (LLVMIsAConstantFP(v)) {
- LLVMBool loses_info;
- exp.chan[i].type = AC_IR_CONST;
- exp.chan[i].const_float =
- LLVMConstRealGetDouble(v, &loses_info);
- } else {
- exp.chan[i].type = AC_IR_VALUE;
- }
- }
-
- /* Eliminate constant and duplicated PARAM exports. */
- if (ac_eliminate_const_output(vs_output_param_offset,
- num_outputs, &exp) ||
- ac_eliminate_duplicated_output(ctx,
- vs_output_param_offset,
- num_outputs, &exports,
- &exp)) {
- removed_any = true;
- } else {
- exports.exp[exports.num++] = exp;
- }
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-
- /* Remove holes in export memory due to removed PARAM exports.
- * This is done by renumbering all PARAM exports.
- */
- if (removed_any) {
- uint8_t old_offset[VARYING_SLOT_MAX];
- unsigned out, i;
-
- /* Make a copy of the offsets. We need the old version while
- * we are modifying some of them. */
- memcpy(old_offset, vs_output_param_offset,
- sizeof(old_offset));
-
- for (i = 0; i < exports.num; i++) {
- unsigned offset = exports.exp[i].offset;
-
- /* Update vs_output_param_offset. Multiple outputs can
- * have the same offset.
- */
- for (out = 0; out < num_outputs; out++) {
- if (old_offset[out] == offset)
- vs_output_param_offset[out] = i;
- }
-
- /* Change the PARAM offset in the instruction. */
- LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
- LLVMConstInt(ctx->i32,
- V_008DFC_SQ_EXP_PARAM + i, 0));
- }
- *num_param_exports = exports.num;
- }
-}
-
-void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
-{
- LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
- ac_build_intrinsic(ctx,
- "llvm.amdgcn.init.exec", ctx->voidt,
- &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
-}
-
-void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
-{
- unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
- ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
- LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
- "lds");
-}
-
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr)
-{
- return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
-}
-
-void ac_lds_store(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr,
- LLVMValueRef value)
-{
- value = ac_to_integer(ctx, value);
- ac_build_indexed_store(ctx, ctx->lds,
- dw_addr, value);
-}
-
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
- LLVMTypeRef dst_type,
- LLVMValueRef src0)
-{
- unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
- const char *intrin_name;
- LLVMTypeRef type;
- LLVMValueRef zero;
-
- switch (src0_bitsize) {
- case 64:
- intrin_name = "llvm.cttz.i64";
- type = ctx->i64;
- zero = ctx->i64_0;
- break;
- case 32:
- intrin_name = "llvm.cttz.i32";
- type = ctx->i32;
- zero = ctx->i32_0;
- break;
- case 16:
- intrin_name = "llvm.cttz.i16";
- type = ctx->i16;
- zero = ctx->i16_0;
- break;
- case 8:
- intrin_name = "llvm.cttz.i8";
- type = ctx->i8;
- zero = ctx->i8_0;
- break;
- default:
- unreachable(!"invalid bitsize");
- }
-
- LLVMValueRef params[2] = {
- src0,
-
- /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
- * add special code to check for x=0. The reason is that
- * the LLVM behavior for x=0 is different from what we
- * need here. However, LLVM also assumes that ffs(x) is
- * in [0, 31], but GLSL expects that ffs(0) = -1, so
- * a conditional assignment to handle 0 is still required.
- *
- * The hardware already implements the correct behavior.
- */
- ctx->i1true,
- };
-
- LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
- params, 2,
- AC_FUNC_ATTR_READNONE);
-
- if (src0_bitsize == 64) {
- lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
- } else if (src0_bitsize < 32) {
- lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
- }
-
- /* TODO: We need an intrinsic to skip this conditional. */
- /* Check for zero: */
- return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
- LLVMIntEQ, src0,
- zero, ""),
- LLVMConstInt(ctx->i32, -1, 0), lsb, "");
-}
-
-LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
-{
- return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
-}
-
-LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
-{
- return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
-}
-
-static struct ac_llvm_flow *
-get_current_flow(struct ac_llvm_context *ctx)
-{
- if (ctx->flow->depth > 0)
- return &ctx->flow->stack[ctx->flow->depth - 1];
- return NULL;
-}
-
-static struct ac_llvm_flow *
-get_innermost_loop(struct ac_llvm_context *ctx)
-{
- for (unsigned i = ctx->flow->depth; i > 0; --i) {
- if (ctx->flow->stack[i - 1].loop_entry_block)
- return &ctx->flow->stack[i - 1];
- }
- return NULL;
-}
-
-static struct ac_llvm_flow *
-push_flow(struct ac_llvm_context *ctx)
-{
- struct ac_llvm_flow *flow;
-
- if (ctx->flow->depth >= ctx->flow->depth_max) {
- unsigned new_max = MAX2(ctx->flow->depth << 1,
- AC_LLVM_INITIAL_CF_DEPTH);
-
- ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
- ctx->flow->depth_max = new_max;
- }
-
- flow = &ctx->flow->stack[ctx->flow->depth];
- ctx->flow->depth++;
-
- flow->next_block = NULL;
- flow->loop_entry_block = NULL;
- return flow;
-}
-
-static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
- int label_id)
-{
- char buf[32];
- snprintf(buf, sizeof(buf), "%s%d", base, label_id);
- LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
-}
-
-/* Append a basic block at the level of the parent flow.
- */
-static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
- const char *name)
-{
- assert(ctx->flow->depth >= 1);
-
- if (ctx->flow->depth >= 2) {
- struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
-
- return LLVMInsertBasicBlockInContext(ctx->context,
- flow->next_block, name);
- }
-
- LLVMValueRef main_fn =
- LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
- return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
-}
-
-/* Emit a branch to the given default target for the current block if
- * applicable -- that is, if the current block does not already contain a
- * branch from a break or continue.
- */
-static void emit_default_branch(LLVMBuilderRef builder,
- LLVMBasicBlockRef target)
-{
- if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
- LLVMBuildBr(builder, target);
-}
-
-void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
-{
- struct ac_llvm_flow *flow = push_flow(ctx);
- flow->loop_entry_block = append_basic_block(ctx, "LOOP");
- flow->next_block = append_basic_block(ctx, "ENDLOOP");
- set_basicblock_name(flow->loop_entry_block, "loop", label_id);
- LLVMBuildBr(ctx->builder, flow->loop_entry_block);
- LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
-}
-
-void ac_build_break(struct ac_llvm_context *ctx)
-{
- struct ac_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(ctx->builder, flow->next_block);
-}
-
-void ac_build_continue(struct ac_llvm_context *ctx)
-{
- struct ac_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(ctx->builder, flow->loop_entry_block);
-}
-
-void ac_build_else(struct ac_llvm_context *ctx, int label_id)
-{
- struct ac_llvm_flow *current_branch = get_current_flow(ctx);
- LLVMBasicBlockRef endif_block;
-
- assert(!current_branch->loop_entry_block);
-
- endif_block = append_basic_block(ctx, "ENDIF");
- emit_default_branch(ctx->builder, endif_block);
-
- LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
- set_basicblock_name(current_branch->next_block, "else", label_id);
-
- current_branch->next_block = endif_block;
-}
-
-void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
-{
- struct ac_llvm_flow *current_branch = get_current_flow(ctx);
-
- assert(!current_branch->loop_entry_block);
-
- emit_default_branch(ctx->builder, current_branch->next_block);
- LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
- set_basicblock_name(current_branch->next_block, "endif", label_id);
-
- ctx->flow->depth--;
-}
-
-void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
-{
- struct ac_llvm_flow *current_loop = get_current_flow(ctx);
-
- assert(current_loop->loop_entry_block);
-
- emit_default_branch(ctx->builder, current_loop->loop_entry_block);
-
- LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
- set_basicblock_name(current_loop->next_block, "endloop", label_id);
- ctx->flow->depth--;
-}
-
-void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
-{
- struct ac_llvm_flow *flow = push_flow(ctx);
- LLVMBasicBlockRef if_block;
-
- if_block = append_basic_block(ctx, "IF");
- flow->next_block = append_basic_block(ctx, "ELSE");
- set_basicblock_name(if_block, "if", label_id);
- LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
- LLVMPositionBuilderAtEnd(ctx->builder, if_block);
-}
-
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
- int label_id)
-{
- LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
- value, ctx->f32_0, "");
- ac_build_ifcc(ctx, cond, label_id);
-}
-
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
- int label_id)
-{
- LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
- ac_to_integer(ctx, value),
- ctx->i32_0, "");
- ac_build_ifcc(ctx, cond, label_id);
-}
-
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name)
-{
- LLVMBuilderRef builder = ac->builder;
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
- LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
- LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
- LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
- LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
- LLVMValueRef res;
-
- if (first_instr) {
- LLVMPositionBuilderBefore(first_builder, first_instr);
- } else {
- LLVMPositionBuilderAtEnd(first_builder, first_block);
- }
-
- res = LLVMBuildAlloca(first_builder, type, name);
- LLVMDisposeBuilder(first_builder);
- return res;
-}
-
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
- LLVMTypeRef type, const char *name)
-{
- LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
- LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
- return ptr;
-}
-
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMTypeRef type)
-{
- int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- return LLVMBuildBitCast(ctx->builder, ptr,
- LLVMPointerType(type, addr_space), "");
-}
-
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
- unsigned count)
-{
- unsigned num_components = ac_get_llvm_num_components(value);
- if (count == num_components)
- return value;
-
- LLVMValueRef masks[MAX2(count, 2)];
- masks[0] = ctx->i32_0;
- masks[1] = ctx->i32_1;
- for (unsigned i = 2; i < count; i++)
- masks[i] = LLVMConstInt(ctx->i32, i, false);
-
- if (count == 1)
- return LLVMBuildExtractElement(ctx->builder, value, masks[0],
- "");
-
- LLVMValueRef swizzle = LLVMConstVector(masks, count);
- return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
-}
-
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
- unsigned rshift, unsigned bitwidth)
-{
- LLVMValueRef value = param;
- if (rshift)
- value = LLVMBuildLShr(ctx->builder, value,
- LLVMConstInt(ctx->i32, rshift, false), "");
-
- if (rshift + bitwidth < 32) {
- unsigned mask = (1 << bitwidth) - 1;
- value = LLVMBuildAnd(ctx->builder, value,
- LLVMConstInt(ctx->i32, mask, false), "");
- }
- return value;
-}
-
-/* Adjust the sample index according to FMASK.
- *
- * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
- * which is the identity mapping. Each nibble says which physical sample
- * should be fetched to get that sample.
- *
- * For example, 0x11111100 means there are only 2 samples stored and
- * the second sample covers 3/4 of the pixel. When reading samples 0
- * and 1, return physical sample 0 (determined by the first two 0s
- * in FMASK), otherwise return physical sample 1.
- *
- * The sample index should be adjusted as follows:
- * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
- */
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
- LLVMValueRef *addr, bool is_array_tex)
-{
- struct ac_image_args fmask_load = {};
- fmask_load.opcode = ac_image_load;
- fmask_load.resource = fmask;
- fmask_load.dmask = 0xf;
- fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
- fmask_load.attributes = AC_FUNC_ATTR_READNONE;
-
- fmask_load.coords[0] = addr[0];
- fmask_load.coords[1] = addr[1];
- if (is_array_tex)
- fmask_load.coords[2] = addr[2];
-
- LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
- fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
- ac->i32_0, "");
-
- /* Apply the formula. */
- unsigned sample_chan = is_array_tex ? 3 : 2;
- LLVMValueRef final_sample;
- final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
- LLVMConstInt(ac->i32, 4, 0), "");
- final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
- /* Mask the sample index by 0x7, because 0x8 means an unknown value
- * with EQAA, so those will map to 0. */
- final_sample = LLVMBuildAnd(ac->builder, final_sample,
- LLVMConstInt(ac->i32, 0x7, 0), "");
-
- /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
- * resource descriptor is 0 (invalid).
- */
- LLVMValueRef tmp;
- tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
- tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
- tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
-
- /* Replace the MSAA sample index. */
- addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
- addr[sample_chan], "");
-}
-
-static LLVMValueRef
-_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
-{
- ac_build_optimization_barrier(ctx, &src);
- return ac_build_intrinsic(ctx,
- lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, lane },
- lane == NULL ? 1 : 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-}
-
-/**
- * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
- * @param ctx
- * @param src
- * @param lane - id of the lane or NULL for the first active lane
- * @return value of the lane
- */
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
-
- if (bits == 32) {
- ret = _ac_build_readlane(ctx, src, lane);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i, 0), "");
- LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
- ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
- LLVMConstInt(ctx->i32, i, 0), "");
- }
- }
- if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
- return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
- (LLVMValueRef []) {value, lane, src}, 3,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
-{
- if (ctx->wave_size == 32) {
- return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
- (LLVMValueRef []) { mask, ctx->i32_0 },
- 2, AC_FUNC_ATTR_READNONE);
- }
- LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
- LLVMVectorType(ctx->i32, 2),
- "");
- LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
- ctx->i32_0, "");
- LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
- ctx->i32_1, "");
- LLVMValueRef val =
- ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
- (LLVMValueRef []) { mask_lo, ctx->i32_0 },
- 2, AC_FUNC_ATTR_READNONE);
- val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
- (LLVMValueRef []) { mask_hi, val },
- 2, AC_FUNC_ATTR_READNONE);
- return val;
-}
-
-enum dpp_ctrl {
- _dpp_quad_perm = 0x000,
- _dpp_row_sl = 0x100,
- _dpp_row_sr = 0x110,
- _dpp_row_rr = 0x120,
- dpp_wf_sl1 = 0x130,
- dpp_wf_rl1 = 0x134,
- dpp_wf_sr1 = 0x138,
- dpp_wf_rr1 = 0x13C,
- dpp_row_mirror = 0x140,
- dpp_row_half_mirror = 0x141,
- dpp_row_bcast15 = 0x142,
- dpp_row_bcast31 = 0x143
-};
-
-static inline enum dpp_ctrl
-dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
- assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
- return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
-}
-
-static inline enum dpp_ctrl
-dpp_row_sl(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sl | amount;
-}
-
-static inline enum dpp_ctrl
-dpp_row_sr(unsigned amount)
-{
- assert(amount > 0 && amount < 16);
- return _dpp_row_sr | amount;
-}
-
-static LLVMValueRef
-_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32",
- LLVMTypeOf(old),
- (LLVMValueRef[]) {
- old, src,
- LLVMConstInt(ctx->i32, dpp_ctrl, 0),
- LLVMConstInt(ctx->i32, row_mask, 0),
- LLVMConstInt(ctx->i32, bank_mask, 0),
- LLVMConstInt(ctx->i1, bound_ctrl, 0) },
- 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
- enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
- bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- old = ac_to_integer(ctx, old);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
- bank_mask, bound_ctrl);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- LLVMValueRef old_vector =
- LLVMBuildBitCast(ctx->builder, old, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- old = LLVMBuildExtractElement(ctx->builder, old_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
- dpp_ctrl,
- row_mask,
- bank_mask,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMValueRef args[6] = {
- src,
- src,
- LLVMConstInt(ctx->i32, sel, false),
- LLVMConstInt(ctx->i32, sel >> 32, false),
- ctx->i1true, /* fi */
- bound_ctrl ? ctx->i1true : ctx->i1false,
- };
- return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
- : "llvm.amdgcn.permlane16",
- ctx->i32, args, 6,
- AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-static LLVMValueRef
-ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
- bool exchange_rows, bool bound_ctrl)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
- bound_ctrl);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp =
- _ac_build_permlane16(ctx, src, sel,
- exchange_rows,
- bound_ctrl);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static inline unsigned
-ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
-{
- assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
- return and_mask | (or_mask << 5) | (xor_mask << 10);
-}
-
-static LLVMValueRef
-_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle",
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, LLVMConstInt(ctx->i32, mask, 0) },
- 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
-{
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
- LLVMValueRef ret;
- if (bits == 32) {
- ret = _ac_build_ds_swizzle(ctx, src, mask);
- } else {
- assert(bits % 32 == 0);
- LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
- LLVMValueRef src_vector =
- LLVMBuildBitCast(ctx->builder, src, vec_type, "");
- ret = LLVMGetUndef(vec_type);
- for (unsigned i = 0; i < bits / 32; i++) {
- src = LLVMBuildExtractElement(ctx->builder, src_vector,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
- mask);
- ret = LLVMBuildInsertElement(ctx->builder, ret,
- ret_comp,
- LLVMConstInt(ctx->i32, i,
- 0), "");
- }
- }
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
-{
- char name[32], type[8];
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
- return ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
- (LLVMValueRef []) { src }, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef
-ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
- LLVMValueRef inactive)
-{
- char name[33], type[8];
- LLVMTypeRef src_type = LLVMTypeOf(src);
- src = ac_to_integer(ctx, src);
- inactive = ac_to_integer(ctx, inactive);
- ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
- snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
- LLVMValueRef ret =
- ac_build_intrinsic(ctx, name,
- LLVMTypeOf(src), (LLVMValueRef []) {
- src, inactive }, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
- return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
-}
-
-static LLVMValueRef
-get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
-{
- if (type_size == 4) {
- switch (op) {
- case nir_op_iadd: return ctx->i32_0;
- case nir_op_fadd: return ctx->f32_0;
- case nir_op_imul: return ctx->i32_1;
- case nir_op_fmul: return ctx->f32_1;
- case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
- case nir_op_umax: return ctx->i32_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
- case nir_op_ior: return ctx->i32_0;
- case nir_op_ixor: return ctx->i32_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- } else { /* type_size == 64bit */
- switch (op) {
- case nir_op_iadd: return ctx->i64_0;
- case nir_op_fadd: return ctx->f64_0;
- case nir_op_imul: return ctx->i64_1;
- case nir_op_fmul: return ctx->f64_1;
- case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
- case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
- case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
- case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
- case nir_op_umax: return ctx->i64_0;
- case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
- case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
- case nir_op_ior: return ctx->i64_0;
- case nir_op_ixor: return ctx->i64_0;
- default:
- unreachable("bad reduction intrinsic");
- }
- }
-}
-
-static LLVMValueRef
-ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
-{
- bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
- switch (op) {
- case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
- case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
- case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
- case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
- case nir_op_imin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umin: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmin: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32",
- _64bit ? ctx->f64 : ctx->f32,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_imax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_umax: return LLVMBuildSelect(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
- lhs, rhs, "");
- case nir_op_fmax: return ac_build_intrinsic(ctx,
- _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32",
- _64bit ? ctx->f64 : ctx->f32,
- (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
- case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
- case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
- case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
- default:
- unreachable("bad reduction intrinsic");
- }
-}
-
-/**
- * \param maxprefix specifies that the result only needs to be correct for a
- * prefix of this many threads
- *
- * TODO: add inclusive and excluse scan functions for GFX6.
- */
-static LLVMValueRef
-ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
- unsigned maxprefix, bool inclusive)
-{
- LLVMValueRef result, tmp;
-
- if (ctx->chip_class >= GFX10) {
- result = inclusive ? src : identity;
- } else {
- if (!inclusive)
- src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
- result = src;
- }
- if (maxprefix <= 1)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 2)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 3)
- return result;
- tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 4)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 8)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 16)
- return result;
-
- if (ctx->chip_class >= GFX10) {
- /* dpp_row_bcast{15,31} are not supported on gfx10. */
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef cc;
- /* TODO-GFX10: Can we get better code-gen by putting this into
- * a branch so that LLVM generates EXEC mask manipulations? */
- if (inclusive)
- tmp = result;
- else
- tmp = ac_build_alu_op(ctx, result, src, op);
- tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
- tmp = ac_build_alu_op(ctx, result, tmp, op);
- cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
- cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
- result = LLVMBuildSelect(builder, cc, tmp, result, "");
- if (maxprefix <= 32)
- return result;
-
- if (inclusive)
- tmp = result;
- else
- tmp = ac_build_alu_op(ctx, result, src, op);
- tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
- tmp = ac_build_alu_op(ctx, result, tmp, op);
- cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
- LLVMConstInt(ctx->i32, 32, false), "");
- result = LLVMBuildSelect(builder, cc, tmp, result, "");
- return result;
- }
-
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- if (maxprefix <= 32)
- return result;
- tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, tmp, op);
- return result;
-}
-
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
- LLVMValueRef result;
-
- if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
- LLVMBuilderRef builder = ctx->builder;
- src = LLVMBuildZExt(builder, src, ctx->i32, "");
- result = ac_build_ballot(ctx, src);
- result = ac_build_mbcnt(ctx, result);
- result = LLVMBuildAdd(builder, result, src, "");
- return result;
- }
-
- ac_build_optimization_barrier(ctx, &src);
-
- LLVMValueRef identity =
- get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
-
- return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
-{
- LLVMValueRef result;
-
- if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
- LLVMBuilderRef builder = ctx->builder;
- src = LLVMBuildZExt(builder, src, ctx->i32, "");
- result = ac_build_ballot(ctx, src);
- result = ac_build_mbcnt(ctx, result);
- return result;
- }
-
- ac_build_optimization_barrier(ctx, &src);
-
- LLVMValueRef identity =
- get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
-
- return ac_build_wwm(ctx, result);
-}
-
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
-{
- if (cluster_size == 1) return src;
- ac_build_optimization_barrier(ctx, &src);
- LLVMValueRef result, swap;
- LLVMValueRef identity = get_reduction_identity(ctx, op,
- ac_get_type_size(LLVMTypeOf(src)));
- result = LLVMBuildBitCast(ctx->builder,
- ac_build_set_inactive(ctx, src, identity),
- LLVMTypeOf(identity), "");
- swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 2) return ac_build_wwm(ctx, result);
-
- swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 4) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 8) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 16) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX10)
- swap = ac_build_permlane16(ctx, result, 0, true, false);
- else if (ctx->chip_class >= GFX8 && cluster_size != 32)
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
- else
- swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
- result = ac_build_alu_op(ctx, result, swap, op);
- if (cluster_size == 32) return ac_build_wwm(ctx, result);
-
- if (ctx->chip_class >= GFX8) {
- if (ctx->chip_class >= GFX10)
- swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
- else
- swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
- result = ac_build_alu_op(ctx, result, swap, op);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
- return ac_build_wwm(ctx, result);
- } else {
- swap = ac_build_readlane(ctx, result, ctx->i32_0);
- result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
- result = ac_build_alu_op(ctx, result, swap, op);
- return ac_build_wwm(ctx, result);
- }
-}
-
-/**
- * "Top half" of a scan that reduces per-wave values across an entire
- * workgroup.
- *
- * The source value must be present in the highest lane of the wave, and the
- * highest lane must be live.
- */
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- if (ws->maxwaves <= 1)
- return;
-
- const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMValueRef tmp;
-
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
- ac_build_ifcc(ctx, tmp, 1000);
- LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
- ac_build_endif(ctx, 1000);
-}
-
-/**
- * "Bottom half" of a scan that reduces per-wave values across an entire
- * workgroup.
- *
- * The caller must place a barrier between the top and bottom halves.
- */
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- const LLVMTypeRef type = LLVMTypeOf(ws->src);
- const LLVMValueRef identity =
- get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
-
- if (ws->maxwaves <= 1) {
- ws->result_reduce = ws->src;
- ws->result_inclusive = ws->src;
- ws->result_exclusive = identity;
- return;
- }
- assert(ws->maxwaves <= 32);
-
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef tid = ac_get_thread_id(ctx);
- LLVMBasicBlockRef bbs[2];
- LLVMValueRef phivalues_scan[2];
- LLVMValueRef tmp, tmp2;
-
- bbs[0] = LLVMGetInsertBlock(builder);
- phivalues_scan[0] = LLVMGetUndef(type);
-
- if (ws->enable_reduce)
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
- else if (ws->enable_inclusive)
- tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
- else
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
- ac_build_ifcc(ctx, tmp, 1001);
- {
- tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
-
- ac_build_optimization_barrier(ctx, &tmp);
-
- bbs[1] = LLVMGetInsertBlock(builder);
- phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
- }
- ac_build_endif(ctx, 1001);
-
- const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
-
- if (ws->enable_reduce) {
- tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
- ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
- }
- if (ws->enable_inclusive)
- ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
- if (ws->enable_exclusive) {
- tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
- tmp = ac_build_readlane(ctx, scan, tmp);
- tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
- ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
- }
-}
-
-/**
- * Inclusive scan of a per-wave value across an entire workgroup.
- *
- * This implies an s_barrier instruction.
- *
- * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
- * of the workgroup are live. (This requirement cannot easily be relaxed in a
- * useful manner because of the barrier in the algorithm.)
- */
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- ac_build_wg_wavescan_top(ctx, ws);
- ac_build_s_barrier(ctx);
- ac_build_wg_wavescan_bottom(ctx, ws);
-}
-
-/**
- * "Top half" of a scan that reduces per-thread values across an entire
- * workgroup.
- *
- * All lanes must be active when this code runs.
- */
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- if (ws->enable_exclusive) {
- ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
- if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
- ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
- ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
- } else {
- ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
- }
-
- bool enable_inclusive = ws->enable_inclusive;
- bool enable_exclusive = ws->enable_exclusive;
- ws->enable_inclusive = false;
- ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
- ac_build_wg_wavescan_top(ctx, ws);
- ws->enable_inclusive = enable_inclusive;
- ws->enable_exclusive = enable_exclusive;
-}
-
-/**
- * "Bottom half" of a scan that reduces per-thread values across an entire
- * workgroup.
- *
- * The caller must place a barrier between the top and bottom halves.
- */
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- bool enable_inclusive = ws->enable_inclusive;
- bool enable_exclusive = ws->enable_exclusive;
- ws->enable_inclusive = false;
- ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
- ac_build_wg_wavescan_bottom(ctx, ws);
- ws->enable_inclusive = enable_inclusive;
- ws->enable_exclusive = enable_exclusive;
-
- /* ws->result_reduce is already the correct value */
- if (ws->enable_inclusive)
- ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
- if (ws->enable_exclusive)
- ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
-}
-
-/**
- * A scan that reduces per-thread values across an entire workgroup.
- *
- * The caller must ensure that all lanes are active when this code runs
- * (WWM is insufficient!), because there is an implied barrier.
- */
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
-{
- ac_build_wg_scan_top(ctx, ws);
- ac_build_s_barrier(ctx);
- ac_build_wg_scan_bottom(ctx, ws);
-}
-
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
- unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
-{
- unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
- if (ctx->chip_class >= GFX8) {
- return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
- } else {
- return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
- }
-}
-
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
-{
- index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
- return ac_build_intrinsic(ctx,
- "llvm.amdgcn.ds.bpermute", ctx->i32,
- (LLVMValueRef []) {index, src}, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-}
-
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.frexp.exp.i16.f16";
- type = ctx->i16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.frexp.exp.i32.f32";
- type = ctx->i32;
- } else {
- intr = "llvm.amdgcn.frexp.exp.i32.f64";
- type = ctx->i32;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
-}
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMTypeRef type;
- char *intr;
-
- if (bitsize == 16) {
- intr = "llvm.amdgcn.frexp.mant.f16";
- type = ctx->f16;
- } else if (bitsize == 32) {
- intr = "llvm.amdgcn.frexp.mant.f32";
- type = ctx->f32;
- } else {
- intr = "llvm.amdgcn.frexp.mant.f64";
- type = ctx->f64;
- }
-
- LLVMValueRef params[] = {
- src0,
- };
- return ac_build_intrinsic(ctx, intr, type, params, 1,
- AC_FUNC_ATTR_READNONE);
-}
-
-/*
- * this takes an I,J coordinate pair,
- * and works out the X and Y derivatives.
- * it returns DDX(I), DDX(J), DDY(I), DDY(J).
- */
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
-{
- LLVMValueRef result[4], a;
- unsigned i;
-
- for (i = 0; i < 2; i++) {
- a = LLVMBuildExtractElement(ctx->builder, interp_ij,
- LLVMConstInt(ctx->i32, i, false), "");
- result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
- result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
- }
- return ac_build_gather_values(ctx, result, 4);
-}
-
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
-{
- LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
- ctx->i1, NULL, 0,
- AC_FUNC_ATTR_READNONE);
- result = LLVMBuildNot(ctx->builder, result, "");
- return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
-}
-
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
- LLVMValueRef *args, unsigned num_args)
-{
- LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
- LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
- return ret;
-}
-
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
- LLVMValueRef stencil, LLVMValueRef samplemask,
- struct ac_export_args *args)
-{
- unsigned mask = 0;
- unsigned format = ac_get_spi_shader_z_format(depth != NULL,
- stencil != NULL,
- samplemask != NULL);
-
- assert(depth || stencil || samplemask);
-
- memset(args, 0, sizeof(*args));
-
- args->valid_mask = 1; /* whether the EXEC mask is valid */
- args->done = 1; /* DONE bit */
-
- /* Specify the target we are exporting */
- args->target = V_008DFC_SQ_EXP_MRTZ;
-
- args->compr = 0; /* COMP flag */
- args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
- args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
- args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
- args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
-
- if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
- assert(!depth);
- args->compr = 1; /* COMPR flag */
-
- if (stencil) {
- /* Stencil should be in X[23:16]. */
- stencil = ac_to_integer(ctx, stencil);
- stencil = LLVMBuildShl(ctx->builder, stencil,
- LLVMConstInt(ctx->i32, 16, 0), "");
- args->out[0] = ac_to_float(ctx, stencil);
- mask |= 0x3;
- }
- if (samplemask) {
- /* SampleMask should be in Y[15:0]. */
- args->out[1] = samplemask;
- mask |= 0xc;
- }
- } else {
- if (depth) {
- args->out[0] = depth;
- mask |= 0x1;
- }
- if (stencil) {
- args->out[1] = stencil;
- mask |= 0x2;
- }
- if (samplemask) {
- args->out[2] = samplemask;
- mask |= 0x4;
- }
- }
-
- /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
- * at the X writemask component. */
- if (ctx->chip_class == GFX6 &&
- ctx->family != CHIP_OLAND &&
- ctx->family != CHIP_HAINAN)
- mask |= 0x1;
-
- /* Specify which components to enable */
- args->enabled_channels = mask;
-}
-
+++ /dev/null
-/*
- * Copyright 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-#ifndef AC_LLVM_BUILD_H
-#define AC_LLVM_BUILD_H
-
-#include <stdbool.h>
-#include <llvm-c/Core.h>
-#include "compiler/nir/nir.h"
-#include "amd_family.h"
-#include "ac_shader_util.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
- AC_ADDR_SPACE_GLOBAL = 1,
- AC_ADDR_SPACE_GDS = 2,
- AC_ADDR_SPACE_LDS = 3,
- AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
- AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
-};
-
-#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
-#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
-#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
-
-struct ac_llvm_flow;
-struct ac_llvm_compiler;
-enum ac_float_mode;
-
-struct ac_llvm_flow_state {
- struct ac_llvm_flow *stack;
- unsigned depth_max;
- unsigned depth;
-};
-
-struct ac_llvm_context {
- LLVMContextRef context;
- LLVMModuleRef module;
- LLVMBuilderRef builder;
-
- LLVMTypeRef voidt;
- LLVMTypeRef i1;
- LLVMTypeRef i8;
- LLVMTypeRef i16;
- LLVMTypeRef i32;
- LLVMTypeRef i64;
- LLVMTypeRef intptr;
- LLVMTypeRef f16;
- LLVMTypeRef f32;
- LLVMTypeRef f64;
- LLVMTypeRef v2i16;
- LLVMTypeRef v2i32;
- LLVMTypeRef v3i32;
- LLVMTypeRef v4i32;
- LLVMTypeRef v2f32;
- LLVMTypeRef v3f32;
- LLVMTypeRef v4f32;
- LLVMTypeRef v8i32;
- LLVMTypeRef iN_wavemask;
- LLVMTypeRef iN_ballotmask;
-
- LLVMValueRef i8_0;
- LLVMValueRef i8_1;
- LLVMValueRef i16_0;
- LLVMValueRef i16_1;
- LLVMValueRef i32_0;
- LLVMValueRef i32_1;
- LLVMValueRef i64_0;
- LLVMValueRef i64_1;
- LLVMValueRef f16_0;
- LLVMValueRef f16_1;
- LLVMValueRef f32_0;
- LLVMValueRef f32_1;
- LLVMValueRef f64_0;
- LLVMValueRef f64_1;
- LLVMValueRef i1true;
- LLVMValueRef i1false;
-
- /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
- * are two ac_llvm_contexts. Declare a pointer here, so that the control
- * flow stack is shared by both ac_llvm_contexts.
- */
- struct ac_llvm_flow_state *flow;
-
- unsigned range_md_kind;
- unsigned invariant_load_md_kind;
- unsigned uniform_md_kind;
- unsigned fpmath_md_kind;
- LLVMValueRef fpmath_md_2p5_ulp;
- LLVMValueRef empty_md;
-
- enum chip_class chip_class;
- enum radeon_family family;
-
- unsigned wave_size;
- unsigned ballot_mask_bits;
-
- LLVMValueRef lds;
-};
-
-void
-ac_llvm_context_init(struct ac_llvm_context *ctx,
- struct ac_llvm_compiler *compiler,
- enum chip_class chip_class, enum radeon_family family,
- enum ac_float_mode float_mode, unsigned wave_size,
- unsigned ballot_mask_bits);
-
-void
-ac_llvm_context_dispose(struct ac_llvm_context *ctx);
-
-int
-ac_get_llvm_num_components(LLVMValueRef value);
-
-int
-ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
-
-LLVMValueRef
-ac_llvm_extract_elem(struct ac_llvm_context *ac,
- LLVMValueRef value,
- int index);
-
-unsigned ac_get_type_size(LLVMTypeRef type);
-
-LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
-LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
-LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
-LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
-LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
-
-LLVMValueRef
-ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
- LLVMTypeRef return_type, LLVMValueRef *params,
- unsigned param_count, unsigned attrib_mask);
-
-void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
-
-LLVMValueRef
-ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
- unsigned count_incoming, LLVMValueRef *values,
- LLVMBasicBlockRef *blocks);
-
-void ac_build_s_barrier(struct ac_llvm_context *ctx);
-void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
- LLVMValueRef *pvgpr);
-
-LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
-
-LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
-LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
- LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef
-ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
- unsigned value_count, unsigned component);
-
-LLVMValueRef
-ac_build_gather_values_extended(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- bool load,
- bool always_vector);
-LLVMValueRef
-ac_build_gather_values(struct ac_llvm_context *ctx,
- LLVMValueRef *values,
- unsigned value_count);
-
-LLVMValueRef
-ac_extract_components(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned start,
- unsigned channels);
-
-LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned num_channels);
-LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-LLVMValueRef
-ac_build_fdiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef den);
-
-LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef pre_shift,
- LLVMValueRef post_shift,
- LLVMValueRef increment);
-LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
- LLVMValueRef num,
- LLVMValueRef multiplier,
- LLVMValueRef post_shift);
-
-void
-ac_prepare_cube_coords(struct ac_llvm_context *ctx,
- bool is_deriv, bool is_array, bool is_lod,
- LLVMValueRef *coords_arg,
- LLVMValueRef *derivs_arg);
-
-
-LLVMValueRef
-ac_build_fs_interp(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params,
- LLVMValueRef i,
- LLVMValueRef j);
-
-LLVMValueRef
-ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
- LLVMValueRef parameter,
- LLVMValueRef llvm_chan,
- LLVMValueRef attr_number,
- LLVMValueRef params);
-
-LLVMValueRef
-ac_build_gep_ptr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index);
-
-LLVMValueRef
-ac_build_gep0(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr,
- LLVMValueRef index);
-LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef index);
-
-void
-ac_build_indexed_store(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index,
- LLVMValueRef value);
-
-LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
- LLVMValueRef index);
-LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
-LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
-LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
- LLVMValueRef base_ptr, LLVMValueRef index);
-
-void
-ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- unsigned num_channels,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool swizzle_enable_hint);
-
-void
-ac_build_buffer_store_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef data,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_buffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- int num_channels,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned inst_offset,
- unsigned cache_policy,
- bool can_speculate,
- bool allow_smem);
-
-LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- unsigned cache_policy,
- bool can_speculate);
-
-LLVMValueRef
-ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate);
-
-LLVMValueRef
-ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy,
- bool can_speculate);
-
-/* For ac_build_fetch_format.
- *
- * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
- */
-enum {
- AC_FETCH_FORMAT_FLOAT = 0,
- AC_FETCH_FORMAT_FIXED,
- AC_FETCH_FORMAT_UNORM,
- AC_FETCH_FORMAT_SNORM,
- AC_FETCH_FORMAT_USCALED,
- AC_FETCH_FORMAT_SSCALED,
- AC_FETCH_FORMAT_UINT,
- AC_FETCH_FORMAT_SINT,
-};
-
-LLVMValueRef
-ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
- unsigned log_size,
- unsigned num_channels,
- unsigned format,
- bool reverse,
- bool known_aligned,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy,
- bool can_speculate);
-
-void
-ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy);
-
-void
-ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- unsigned cache_policy);
-
-void
-ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy);
-
-void
-ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vdata,
- LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- unsigned num_channels,
- unsigned dfmt,
- unsigned nfmt,
- unsigned cache_policy);
-
-LLVMValueRef
-ac_get_thread_id(struct ac_llvm_context *ctx);
-
-#define AC_TID_MASK_TOP_LEFT 0xfffffffc
-#define AC_TID_MASK_TOP 0xfffffffd
-#define AC_TID_MASK_LEFT 0xfffffffe
-
-LLVMValueRef
-ac_build_ddxy(struct ac_llvm_context *ctx,
- uint32_t mask,
- int idx,
- LLVMValueRef val);
-
-#define AC_SENDMSG_GS 2
-#define AC_SENDMSG_GS_DONE 3
-#define AC_SENDMSG_GS_ALLOC_REQ 9
-
-#define AC_SENDMSG_GS_OP_NOP (0 << 4)
-#define AC_SENDMSG_GS_OP_CUT (1 << 4)
-#define AC_SENDMSG_GS_OP_EMIT (2 << 4)
-#define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
-
-void ac_build_sendmsg(struct ac_llvm_context *ctx,
- uint32_t msg,
- LLVMValueRef wave_id);
-
-LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type);
-
-LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
- LLVMValueRef arg,
- LLVMTypeRef dst_type);
-LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
- LLVMValueRef b);
-LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
-LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
-LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
-
-struct ac_export_args {
- LLVMValueRef out[4];
- unsigned target;
- unsigned enabled_channels;
- bool compr;
- bool done;
- bool valid_mask;
-};
-
-void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
-
-void ac_build_export_null(struct ac_llvm_context *ctx);
-
-enum ac_image_opcode {
- ac_image_sample,
- ac_image_gather4,
- ac_image_load,
- ac_image_load_mip,
- ac_image_store,
- ac_image_store_mip,
- ac_image_get_lod,
- ac_image_get_resinfo,
- ac_image_atomic,
- ac_image_atomic_cmpswap,
-};
-
-enum ac_atomic_op {
- ac_atomic_swap,
- ac_atomic_add,
- ac_atomic_sub,
- ac_atomic_smin,
- ac_atomic_umin,
- ac_atomic_smax,
- ac_atomic_umax,
- ac_atomic_and,
- ac_atomic_or,
- ac_atomic_xor,
- ac_atomic_inc_wrap,
- ac_atomic_dec_wrap,
-};
-
-/* These cache policy bits match the definitions used by the LLVM intrinsics. */
-enum ac_image_cache_policy {
- ac_glc = 1 << 0, /* per-CU cache control */
- ac_slc = 1 << 1, /* global L2 cache control */
- ac_dlc = 1 << 2, /* per-shader-array cache control */
-};
-
-struct ac_image_args {
- enum ac_image_opcode opcode : 4;
- enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
- enum ac_image_dim dim : 3;
- unsigned dmask : 4;
- unsigned cache_policy : 3;
- bool unorm : 1;
- bool level_zero : 1;
- unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
-
- LLVMValueRef resource;
- LLVMValueRef sampler;
- LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
- LLVMValueRef offset;
- LLVMValueRef bias;
- LLVMValueRef compare;
- LLVMValueRef derivs[6];
- LLVMValueRef coords[4];
- LLVMValueRef lod; // also used by ac_image_get_resinfo
-};
-
-LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
- struct ac_image_args *a);
-LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc);
-LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]);
-LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi);
-LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
- LLVMValueRef args[2], unsigned bits, bool hi);
-LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
-void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
-LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
- LLVMValueRef offset, LLVMValueRef width,
- bool is_signed);
-LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2);
-LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
- LLVMValueRef s1, LLVMValueRef s2);
-
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
-
-LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
-
-LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
- LLVMValueRef src1, LLVMValueRef src2,
- unsigned bitsize);
-
-LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
-
-LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
-
-LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
-
-LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
- LLVMValueRef src0);
-
-void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
- LLVMValueRef main_fn,
- uint8_t *vs_output_param_offset,
- uint32_t num_outputs,
- uint8_t *num_param_exports);
-void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
-
-void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
-LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr);
-void ac_lds_store(struct ac_llvm_context *ctx,
- LLVMValueRef dw_addr, LLVMValueRef value);
-
-LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
- LLVMTypeRef dst_type,
- LLVMValueRef src0);
-
-LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
-LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
-
-void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_break(struct ac_llvm_context *ctx);
-void ac_build_continue(struct ac_llvm_context *ctx);
-void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
-void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
-void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
- int lable_id);
-void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
- int lable_id);
-
-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name);
-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
- const char *name);
-
-LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMTypeRef type);
-
-LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
- unsigned count);
-
-LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
- unsigned rshift, unsigned bitwidth);
-
-void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
- LLVMValueRef *addr, bool is_array_tex);
-
-LLVMValueRef
-ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
-
-LLVMValueRef
-ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
-
-LLVMValueRef
-ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
-
-LLVMValueRef
-ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
-
-LLVMValueRef
-ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-
-LLVMValueRef
-ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
-
-LLVMValueRef
-ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
-
-/**
- * Common arguments for a scan/reduce operation that accumulates per-wave
- * values across an entire workgroup, while respecting the order of waves.
- */
-struct ac_wg_scan {
- bool enable_reduce;
- bool enable_exclusive;
- bool enable_inclusive;
- nir_op op;
- LLVMValueRef src; /* clobbered! */
- LLVMValueRef result_reduce;
- LLVMValueRef result_exclusive;
- LLVMValueRef result_inclusive;
- LLVMValueRef extra;
- LLVMValueRef waveidx;
- LLVMValueRef numwaves; /* only needed for "reduce" operations */
-
- /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
- LLVMValueRef scratch;
- unsigned maxwaves;
-};
-
-void
-ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-
-void
-ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-void
-ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
-
-LLVMValueRef
-ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
- unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
-
-LLVMValueRef
-ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
-
-LLVMValueRef
-ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
-
-LLVMValueRef
-ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
- unsigned bitsize);
-
-LLVMValueRef
-ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
-
-LLVMValueRef
-ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
-
-LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
- LLVMValueRef *args, unsigned num_args);
-
-LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
- LLVMValueRef ptr, LLVMValueRef val,
- const char *sync_scope);
-
-LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef cmp, LLVMValueRef val,
- const char *sync_scope);
-
-void
-ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
- LLVMValueRef stencil, LLVMValueRef samplemask,
- struct ac_export_args *args);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+++ /dev/null
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#include "ac_llvm_cull.h"
-#include <llvm-c/Core.h>
-
-struct ac_position_w_info {
- /* If a primitive intersects the W=0 plane, it causes a reflection
- * of the determinant used for face culling. Every vertex behind
- * the W=0 plane negates the determinant, so having 2 vertices behind
- * the plane has no effect. This is i1 true if the determinant should be
- * negated.
- */
- LLVMValueRef w_reflection;
-
- /* If we simplify the "-w <= p <= w" view culling equation, we get
- * "-w <= w", which can't be satisfied when w is negative.
- * In perspective projection, a negative W means that the primitive
- * is behind the viewer, but the equation is independent of the type
- * of projection.
- *
- * w_accepted is false when all W are negative and therefore
- * the primitive is invisible.
- */
- LLVMValueRef w_accepted;
-
- LLVMValueRef all_w_positive;
- LLVMValueRef any_w_negative;
-};
-
-static void ac_analyze_position_w(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- struct ac_position_w_info *w)
-{
- LLVMBuilderRef builder = ctx->builder;
- LLVMValueRef all_w_negative = ctx->i1true;
-
- w->w_reflection = ctx->i1false;
- w->any_w_negative = ctx->i1false;
-
- for (unsigned i = 0; i < 3; i++) {
- LLVMValueRef neg_w;
-
- neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
- /* If neg_w is true, negate w_reflection. */
- w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
- w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
- all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
- }
- w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
- w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
-}
-
-/* Perform front/back face culling and return true if the primitive is accepted. */
-static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- struct ac_position_w_info *w,
- bool cull_front,
- bool cull_back,
- bool cull_zero_area)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- if (cull_front && cull_back)
- return ctx->i1false;
-
- if (!cull_front && !cull_back && !cull_zero_area)
- return ctx->i1true;
-
- /* Front/back face culling. Also if the determinant == 0, the triangle
- * area is 0.
- */
- LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
- LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
- LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
- LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
- LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
- LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
- LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
-
- /* Negative W negates the determinant. */
- det = LLVMBuildSelect(builder, w->w_reflection,
- LLVMBuildFNeg(builder, det, ""),
- det, "");
-
- LLVMValueRef accepted = NULL;
- if (cull_front) {
- LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
- accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
- } else if (cull_back) {
- LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
- accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
- } else if (cull_zero_area) {
- accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
- }
- return accepted;
-}
-
-/* Perform view culling and small primitive elimination and return true
- * if the primitive is accepted and initially_accepted == true. */
-static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- struct ac_position_w_info *w,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- bool cull_view_xy,
- bool cull_view_near_z,
- bool cull_view_far_z,
- bool cull_small_prims,
- bool use_halfz_clip_space)
-{
- LLVMBuilderRef builder = ctx->builder;
-
- if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
- return ctx->i1true;
-
- /* Skip the culling if the primitive has already been rejected or
- * if any W is negative. The bounding box culling doesn't work when
- * W is negative.
- */
- LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
- w->all_w_positive, "");
- LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
- LLVMBuildStore(builder, initially_accepted, accepted_var);
-
- ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
- {
- LLVMValueRef bbox_min[3], bbox_max[3];
- LLVMValueRef accepted = initially_accepted;
-
- /* Compute the primitive bounding box for easy culling. */
- for (unsigned chan = 0; chan < 3; chan++) {
- bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
- bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
-
- bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
- bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
- }
-
- /* View culling. */
- if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
- for (unsigned chan = 0; chan < 3; chan++) {
- LLVMValueRef visible;
-
- if ((cull_view_xy && chan <= 1) ||
- (cull_view_near_z && chan == 2)) {
- float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
- visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
- LLVMConstReal(ctx->f32, t), "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
-
- if ((cull_view_xy && chan <= 1) ||
- (cull_view_far_z && chan == 2)) {
- visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
- ctx->f32_1, "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
- }
- }
-
- /* Small primitive elimination. */
- if (cull_small_prims) {
- /* Assuming a sample position at (0.5, 0.5), if we round
- * the bounding box min/max extents and the results of
- * the rounding are equal in either the X or Y direction,
- * the bounding box does not intersect the sample.
- *
- * See these GDC slides for pictures:
- * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
- */
- LLVMValueRef min, max, not_equal[2], visible;
-
- for (unsigned chan = 0; chan < 2; chan++) {
- /* Convert the position to screen-space coordinates. */
- min = ac_build_fmad(ctx, bbox_min[chan],
- vp_scale[chan], vp_translate[chan]);
- max = ac_build_fmad(ctx, bbox_max[chan],
- vp_scale[chan], vp_translate[chan]);
- /* Scale the bounding box according to the precision of
- * the rasterizer and the number of MSAA samples. */
- min = LLVMBuildFSub(builder, min, small_prim_precision, "");
- max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
-
- /* Determine if the bbox intersects the sample point.
- * It also works for MSAA, but vp_scale, vp_translate,
- * and small_prim_precision are computed differently.
- */
- min = ac_build_round(ctx, min);
- max = ac_build_round(ctx, max);
- not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
- }
- visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
- accepted = LLVMBuildAnd(builder, accepted, visible, "");
- }
-
- LLVMBuildStore(builder, accepted, accepted_var);
- }
- ac_build_endif(ctx, 10000000);
-
- return LLVMBuildLoad(builder, accepted_var, "");
-}
-
-/**
- * Return i1 true if the primitive is accepted (not culled).
- *
- * \param pos Vertex positions 3x vec4
- * \param initially_accepted AND'ed with the result. Some computations can be
- * skipped if this is false.
- * \param vp_scale Viewport scale XY.
- * For MSAA, multiply them by the number of samples.
- * \param vp_translate Viewport translation XY.
- * For MSAA, multiply them by the number of samples.
- * \param small_prim_precision Precision of small primitive culling. This should
- * be the same as or greater than the precision of
- * the rasterizer. Set to num_samples / 2^subpixel_bits.
- * subpixel_bits are defined by the quantization mode.
- * \param options See ac_cull_options.
- */
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- struct ac_cull_options *options)
-{
- struct ac_position_w_info w;
- ac_analyze_position_w(ctx, pos, &w);
-
- /* W culling. */
- LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
- accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
-
- /* Face culling. */
- accepted = LLVMBuildAnd(ctx->builder, accepted,
- ac_cull_face(ctx, pos, &w,
- options->cull_front,
- options->cull_back,
- options->cull_zero_area), "");
-
- /* View culling and small primitive elimination. */
- accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
- small_prim_precision,
- options->cull_view_xy,
- options->cull_view_near_z,
- options->cull_view_far_z,
- options->cull_small_prims,
- options->use_halfz_clip_space);
- return accepted;
-}
+++ /dev/null
-/*
- * Copyright 2019 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#ifndef AC_LLVM_CULL_H
-#define AC_LLVM_CULL_H
-
-#include "ac_llvm_build.h"
-
-struct ac_cull_options {
- /* In general, I recommend setting all to true except view Z culling,
- * which isn't so effective because W culling is cheaper and partially
- * replaces near Z culling, and you don't need to set Position.z
- * if Z culling is disabled.
- *
- * If something doesn't work, turn some of these off to find out what.
- */
- bool cull_front;
- bool cull_back;
- bool cull_view_xy;
- bool cull_view_near_z;
- bool cull_view_far_z;
- bool cull_small_prims;
- bool cull_zero_area;
- bool cull_w; /* cull primitives with all W < 0 */
-
- bool use_halfz_clip_space;
-};
-
-LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
- LLVMValueRef pos[3][4],
- LLVMValueRef initially_accepted,
- LLVMValueRef vp_scale[2],
- LLVMValueRef vp_translate[2],
- LLVMValueRef small_prim_precision,
- struct ac_cull_options *options);
-
-#endif
+++ /dev/null
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#include <cstring>
-
-#include "ac_binary.h"
-#include "ac_llvm_util.h"
-#include "ac_llvm_build.h"
-
-#include "util/macros.h"
-
-#include <llvm-c/Core.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
-#include <llvm/Transforms/IPO.h>
-
-#include <llvm/IR/LegacyPassManager.h>
-
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
-{
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
- A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
-}
-
-bool ac_is_sgpr_param(LLVMValueRef arg)
-{
- llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
- llvm::AttributeList AS = A->getParent()->getAttributes();
- unsigned ArgNo = A->getArgNo();
- return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
-}
-
-LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
-{
- return LLVMGetCalledValue(call);
-}
-
-bool ac_llvm_is_function(LLVMValueRef v)
-{
- return LLVMGetValueKind(v) == LLVMFunctionValueKind;
-}
-
-LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
-{
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
- LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
-
- llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
- llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
- return module;
-}
-
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
- enum ac_float_mode float_mode)
-{
- LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
-
- llvm::FastMathFlags flags;
-
- switch (float_mode) {
- case AC_FLOAT_MODE_DEFAULT:
- break;
- case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH:
- flags.setNoSignedZeros();
- llvm::unwrap(builder)->setFastMathFlags(flags);
- break;
- case AC_FLOAT_MODE_UNSAFE_FP_MATH:
- flags.setFast();
- llvm::unwrap(builder)->setFastMathFlags(flags);
- break;
- }
-
- return builder;
-}
-
-LLVMTargetLibraryInfoRef
-ac_create_target_library_info(const char *triple)
-{
- return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
-}
-
-void
-ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
-{
- delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
-}
-
-/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
- * better compatibility with C code. */
-struct raw_memory_ostream : public llvm::raw_pwrite_stream {
- char *buffer;
- size_t written;
- size_t bufsize;
-
- raw_memory_ostream()
- {
- buffer = NULL;
- written = 0;
- bufsize = 0;
- SetUnbuffered();
- }
-
- ~raw_memory_ostream()
- {
- free(buffer);
- }
-
- void clear()
- {
- written = 0;
- }
-
- void take(char *&out_buffer, size_t &out_size)
- {
- out_buffer = buffer;
- out_size = written;
- buffer = NULL;
- written = 0;
- bufsize = 0;
- }
-
- void flush() = delete;
-
- void write_impl(const char *ptr, size_t size) override
- {
- if (unlikely(written + size < written))
- abort();
- if (written + size > bufsize) {
- bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
- buffer = (char *)realloc(buffer, bufsize);
- if (!buffer) {
- fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
- abort();
- }
- }
- memcpy(buffer + written, ptr, size);
- written += size;
- }
-
- void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
- {
- assert(offset == (size_t)offset &&
- offset + size >= offset && offset + size <= written);
- memcpy(buffer + offset, ptr, size);
- }
-
- uint64_t current_pos() const override
- {
- return written;
- }
-};
-
-/* The LLVM compiler is represented as a pass manager containing passes for
- * optimizations, instruction selection, and code generation.
- */
-struct ac_compiler_passes {
- raw_memory_ostream ostream; /* ELF shader binary stream */
- llvm::legacy::PassManager passmgr; /* list of passes */
-};
-
-struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
-{
- struct ac_compiler_passes *p = new ac_compiler_passes();
- if (!p)
- return NULL;
-
- llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
-
- if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
- nullptr,
- llvm::TargetMachine::CGFT_ObjectFile)) {
- fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
- delete p;
- return NULL;
- }
- return p;
-}
-
-void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
-{
- delete p;
-}
-
-/* This returns false on failure. */
-bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size)
-{
- p->passmgr.run(*llvm::unwrap(module));
- p->ostream.take(*pelf_buffer, *pelf_size);
- return true;
-}
-
-void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
-{
- llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
-}
-
-void ac_enable_global_isel(LLVMTargetMachineRef tm)
-{
- reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
-}
-
-LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
- LLVMValueRef ptr, LLVMValueRef val,
- const char *sync_scope) {
- llvm::AtomicRMWInst::BinOp binop;
- switch (op) {
- case LLVMAtomicRMWBinOpXchg:
- binop = llvm::AtomicRMWInst::Xchg;
- break;
- case LLVMAtomicRMWBinOpAdd:
- binop = llvm::AtomicRMWInst::Add;
- break;
- case LLVMAtomicRMWBinOpSub:
- binop = llvm::AtomicRMWInst::Sub;
- break;
- case LLVMAtomicRMWBinOpAnd:
- binop = llvm::AtomicRMWInst::And;
- break;
- case LLVMAtomicRMWBinOpNand:
- binop = llvm::AtomicRMWInst::Nand;
- break;
- case LLVMAtomicRMWBinOpOr:
- binop = llvm::AtomicRMWInst::Or;
- break;
- case LLVMAtomicRMWBinOpXor:
- binop = llvm::AtomicRMWInst::Xor;
- break;
- case LLVMAtomicRMWBinOpMax:
- binop = llvm::AtomicRMWInst::Max;
- break;
- case LLVMAtomicRMWBinOpMin:
- binop = llvm::AtomicRMWInst::Min;
- break;
- case LLVMAtomicRMWBinOpUMax:
- binop = llvm::AtomicRMWInst::UMax;
- break;
- case LLVMAtomicRMWBinOpUMin:
- binop = llvm::AtomicRMWInst::UMin;
- break;
- default:
- unreachable(!"invalid LLVMAtomicRMWBinOp");
- break;
- }
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
- binop, llvm::unwrap(ptr), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
-}
-
-LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
- LLVMValueRef cmp, LLVMValueRef val,
- const char *sync_scope) {
- unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
- return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
- llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
- llvm::AtomicOrdering::SequentiallyConsistent,
- llvm::AtomicOrdering::SequentiallyConsistent, SSID));
-}
+++ /dev/null
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
-#include "ac_llvm_util.h"
-#include "ac_llvm_build.h"
-#include "util/bitscan.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/Support.h>
-#include <llvm-c/Transforms/IPO.h>
-#include <llvm-c/Transforms/Scalar.h>
-#include <llvm-c/Transforms/Utils.h>
-#include "c11/threads.h"
-#include "gallivm/lp_bld_misc.h"
-#include "util/u_math.h"
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-static void ac_init_llvm_target()
-{
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-
- /* For inline assembly. */
- LLVMInitializeAMDGPUAsmParser();
-
- /* For ACO disassembly. */
- LLVMInitializeAMDGPUDisassembler();
-
- /* Workaround for bug in llvm 4.0 that causes image intrinsics
- * to disappear.
- * https://reviews.llvm.org/D26348
- *
- * "mesa" is the prefix for error messages.
- *
- * -global-isel-abort=2 is a no-op unless global isel has been enabled.
- * This option tells the backend to fall-back to SelectionDAG and print
- * a diagnostic message if global isel fails.
- */
- const char *argv[] = {
- "mesa",
- "-simplifycfg-sink-common=false",
- "-global-isel-abort=2",
-#if LLVM_VERSION_MAJOR >= 10
- /* Atomic optimizations require LLVM 10.0 for gfx10 support. */
- "-amdgpu-atomic-optimizations=true",
-#endif
- };
- LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
-}
-
-static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
-
-void ac_init_llvm_once(void)
-{
- call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
-}
-
-static LLVMTargetRef ac_get_llvm_target(const char *triple)
-{
- LLVMTargetRef target = NULL;
- char *err_message = NULL;
-
- if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
- fprintf(stderr, "Cannot find target for triple %s ", triple);
- if (err_message) {
- fprintf(stderr, "%s\n", err_message);
- }
- LLVMDisposeMessage(err_message);
- return NULL;
- }
- return target;
-}
-
-const char *ac_get_llvm_processor_name(enum radeon_family family)
-{
- switch (family) {
- case CHIP_TAHITI:
- return "tahiti";
- case CHIP_PITCAIRN:
- return "pitcairn";
- case CHIP_VERDE:
- return "verde";
- case CHIP_OLAND:
- return "oland";
- case CHIP_HAINAN:
- return "hainan";
- case CHIP_BONAIRE:
- return "bonaire";
- case CHIP_KABINI:
- return "kabini";
- case CHIP_KAVERI:
- return "kaveri";
- case CHIP_HAWAII:
- return "hawaii";
- case CHIP_TONGA:
- return "tonga";
- case CHIP_ICELAND:
- return "iceland";
- case CHIP_CARRIZO:
- return "carrizo";
- case CHIP_FIJI:
- return "fiji";
- case CHIP_STONEY:
- return "stoney";
- case CHIP_POLARIS10:
- return "polaris10";
- case CHIP_POLARIS11:
- case CHIP_POLARIS12:
- case CHIP_VEGAM:
- return "polaris11";
- case CHIP_VEGA10:
- return "gfx900";
- case CHIP_RAVEN:
- return "gfx902";
- case CHIP_VEGA12:
- return "gfx904";
- case CHIP_VEGA20:
- return "gfx906";
- case CHIP_RAVEN2:
- case CHIP_RENOIR:
- return "gfx909";
- case CHIP_ARCTURUS:
- return "gfx908";
- case CHIP_NAVI10:
- return "gfx1010";
- case CHIP_NAVI12:
- return "gfx1011";
- case CHIP_NAVI14:
- return "gfx1012";
- default:
- return "";
- }
-}
-
-static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
- enum ac_target_machine_options tm_options,
- LLVMCodeGenOptLevel level,
- const char **out_triple)
-{
- assert(family >= CHIP_TAHITI);
- char features[256];
- const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
- LLVMTargetRef target = ac_get_llvm_target(triple);
-
- snprintf(features, sizeof(features),
- "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
- family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
- ",+wavefrontsize64,-wavefrontsize32" : "",
- tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
- tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
- tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
- tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
- tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
-
- LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
- target,
- triple,
- ac_get_llvm_processor_name(family),
- features,
- level,
- LLVMRelocDefault,
- LLVMCodeModelDefault);
-
- if (out_triple)
- *out_triple = triple;
- if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
- ac_enable_global_isel(tm);
- return tm;
-}
-
-static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
- bool check_ir)
-{
- LLVMPassManagerRef passmgr = LLVMCreatePassManager();
- if (!passmgr)
- return NULL;
-
- if (target_library_info)
- LLVMAddTargetLibraryInfo(target_library_info,
- passmgr);
-
- if (check_ir)
- LLVMAddVerifierPass(passmgr);
- LLVMAddAlwaysInlinerPass(passmgr);
- /* Normally, the pass manager runs all passes on one function before
- * moving onto another. Adding a barrier no-op pass forces the pass
- * manager to run the inliner on all functions first, which makes sure
- * that the following passes are only run on the remaining non-inline
- * function, so it removes useless work done on dead inline functions.
- */
- ac_llvm_add_barrier_noop_pass(passmgr);
- /* This pass should eliminate all the load and store instructions. */
- LLVMAddPromoteMemoryToRegisterPass(passmgr);
- LLVMAddScalarReplAggregatesPass(passmgr);
- LLVMAddLICMPass(passmgr);
- LLVMAddAggressiveDCEPass(passmgr);
- LLVMAddCFGSimplificationPass(passmgr);
- /* This is recommended by the instruction combining pass. */
- LLVMAddEarlyCSEMemSSAPass(passmgr);
- LLVMAddInstructionCombiningPass(passmgr);
- return passmgr;
-}
-
-static const char *attr_to_str(enum ac_func_attr attr)
-{
- switch (attr) {
- case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
- case AC_FUNC_ATTR_INREG: return "inreg";
- case AC_FUNC_ATTR_NOALIAS: return "noalias";
- case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
- case AC_FUNC_ATTR_READNONE: return "readnone";
- case AC_FUNC_ATTR_READONLY: return "readonly";
- case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
- case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
- case AC_FUNC_ATTR_CONVERGENT: return "convergent";
- default:
- fprintf(stderr, "Unhandled function attribute: %x\n", attr);
- return 0;
- }
-}
-
-void
-ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
- int attr_idx, enum ac_func_attr attr)
-{
- const char *attr_name = attr_to_str(attr);
- unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
- strlen(attr_name));
- LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
-
- if (LLVMIsAFunction(function))
- LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
- else
- LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
-}
-
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
- unsigned attrib_mask)
-{
- attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
- attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
-
- while (attrib_mask) {
- enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
- ac_add_function_attr(ctx, function, -1, attr);
- }
-}
-
-void
-ac_dump_module(LLVMModuleRef module)
-{
- char *str = LLVMPrintModuleToString(module);
- fprintf(stderr, "%s", str);
- LLVMDisposeMessage(str);
-}
-
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
- const char *name, unsigned value)
-{
- char str[16];
-
- snprintf(str, sizeof(str), "0x%x", value);
- LLVMAddTargetDependentFunctionAttr(F, name, str);
-}
-
-void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
-{
- if (!size)
- return;
-
- char str[32];
- snprintf(str, sizeof(str), "%u,%u", size, size);
- LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
-}
-
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function)
-{
- unsigned private_mem_vgprs = 0;
-
- /* Process all LLVM instructions. */
- LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
- while (bb) {
- LLVMValueRef next = LLVMGetFirstInstruction(bb);
-
- while (next) {
- LLVMValueRef inst = next;
- next = LLVMGetNextInstruction(next);
-
- if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
- continue;
-
- LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
- /* No idea why LLVM aligns allocas to 4 elements. */
- unsigned alignment = LLVMGetAlignment(inst);
- unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
- private_mem_vgprs += dw_size;
- }
- bb = LLVMGetNextBasicBlock(bb);
- }
-
- return private_mem_vgprs;
-}
-
-bool
-ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options)
-{
- const char *triple;
- memset(compiler, 0, sizeof(*compiler));
-
- compiler->tm = ac_create_target_machine(family, tm_options,
- LLVMCodeGenLevelDefault,
- &triple);
- if (!compiler->tm)
- return false;
-
- if (tm_options & AC_TM_CREATE_LOW_OPT) {
- compiler->low_opt_tm =
- ac_create_target_machine(family, tm_options,
- LLVMCodeGenLevelLess, NULL);
- if (!compiler->low_opt_tm)
- goto fail;
- }
-
- if (family >= CHIP_NAVI10) {
- assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
- compiler->tm_wave32 = ac_create_target_machine(family,
- tm_options | AC_TM_WAVE32,
- LLVMCodeGenLevelDefault,
- NULL);
- if (!compiler->tm_wave32)
- goto fail;
- }
-
- compiler->target_library_info =
- ac_create_target_library_info(triple);
- if (!compiler->target_library_info)
- goto fail;
-
- compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
- tm_options & AC_TM_CHECK_IR);
- if (!compiler->passmgr)
- goto fail;
-
- return true;
-fail:
- ac_destroy_llvm_compiler(compiler);
- return false;
-}
-
-void
-ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
-{
- ac_destroy_llvm_passes(compiler->passes);
- ac_destroy_llvm_passes(compiler->passes_wave32);
- ac_destroy_llvm_passes(compiler->low_opt_passes);
-
- if (compiler->passmgr)
- LLVMDisposePassManager(compiler->passmgr);
- if (compiler->target_library_info)
- ac_dispose_target_library_info(compiler->target_library_info);
- if (compiler->low_opt_tm)
- LLVMDisposeTargetMachine(compiler->low_opt_tm);
- if (compiler->tm)
- LLVMDisposeTargetMachine(compiler->tm);
- if (compiler->tm_wave32)
- LLVMDisposeTargetMachine(compiler->tm_wave32);
-}
+++ /dev/null
-/*
- * Copyright 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-
-#ifndef AC_LLVM_UTIL_H
-#define AC_LLVM_UTIL_H
-
-#include <stdbool.h>
-#include <llvm-c/TargetMachine.h>
-#include <llvm/Config/llvm-config.h>
-
-#include "amd_family.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct ac_compiler_passes;
-
-enum ac_func_attr {
- AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
- AC_FUNC_ATTR_INREG = (1 << 2),
- AC_FUNC_ATTR_NOALIAS = (1 << 3),
- AC_FUNC_ATTR_NOUNWIND = (1 << 4),
- AC_FUNC_ATTR_READNONE = (1 << 5),
- AC_FUNC_ATTR_READONLY = (1 << 6),
- AC_FUNC_ATTR_WRITEONLY = (1 << 7),
- AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
- AC_FUNC_ATTR_CONVERGENT = (1 << 9),
-
- /* Legacy intrinsic that needs attributes on function declarations
- * and they must match the internal LLVM definition exactly, otherwise
- * intrinsic selection fails.
- */
- AC_FUNC_ATTR_LEGACY = (1u << 31),
-};
-
-enum ac_target_machine_options {
- AC_TM_SUPPORTS_SPILL = (1 << 0),
- AC_TM_SISCHED = (1 << 1),
- AC_TM_FORCE_ENABLE_XNACK = (1 << 2),
- AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
- AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
- AC_TM_CHECK_IR = (1 << 5),
- AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
- AC_TM_CREATE_LOW_OPT = (1 << 7),
- AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
- AC_TM_WAVE32 = (1 << 9),
-};
-
-enum ac_float_mode {
- AC_FLOAT_MODE_DEFAULT,
- AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
- AC_FLOAT_MODE_UNSAFE_FP_MATH,
-};
-
-/* Per-thread persistent LLVM objects. */
-struct ac_llvm_compiler {
- LLVMTargetLibraryInfoRef target_library_info;
- LLVMPassManagerRef passmgr;
-
- /* Default compiler. */
- LLVMTargetMachineRef tm;
- struct ac_compiler_passes *passes;
-
- /* Wave32 compiler for GFX10. */
- LLVMTargetMachineRef tm_wave32;
- struct ac_compiler_passes *passes_wave32;
-
- /* Optional compiler for faster compilation with fewer optimizations.
- * LLVM modules can be created with "tm" too. There is no difference.
- */
- LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
- struct ac_compiler_passes *low_opt_passes;
-};
-
-const char *ac_get_llvm_processor_name(enum radeon_family family);
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
-bool ac_is_sgpr_param(LLVMValueRef param);
-void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
- int attr_idx, enum ac_func_attr attr);
-void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
- unsigned attrib_mask);
-void ac_dump_module(LLVMModuleRef module);
-
-LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
-bool ac_llvm_is_function(LLVMValueRef v);
-LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
-
-LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
- enum ac_float_mode float_mode);
-
-void
-ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
- const char *name, unsigned value);
-void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
-
-static inline unsigned
-ac_get_load_intr_attribs(bool can_speculate)
-{
- /* READNONE means writes can't affect it, while READONLY means that
- * writes can affect it. */
- return can_speculate ? AC_FUNC_ATTR_READNONE :
- AC_FUNC_ATTR_READONLY;
-}
-
-unsigned
-ac_count_scratch_private_memory(LLVMValueRef function);
-
-LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
-void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
-void ac_init_llvm_once(void);
-
-
-bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options);
-void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
-
-struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
-void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
-bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size);
-void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
-void ac_enable_global_isel(LLVMTargetMachineRef tm);
-
-static inline bool
-ac_has_vec3_support(enum chip_class chip, bool use_format)
-{
- if (chip == GFX6 && !use_format) {
- /* GFX6 only supports vec3 with load/store format. */
- return false;
- }
-
- return LLVM_VERSION_MAJOR >= 9;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* AC_LLVM_UTIL_H */
+++ /dev/null
-/*
- * Copyright © 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include <llvm/Config/llvm-config.h>
-
-#include "ac_nir_to_llvm.h"
-#include "ac_llvm_build.h"
-#include "ac_llvm_util.h"
-#include "ac_binary.h"
-#include "sid.h"
-#include "nir/nir.h"
-#include "nir/nir_deref.h"
-#include "util/bitscan.h"
-#include "util/u_math.h"
-#include "ac_shader_abi.h"
-#include "ac_shader_util.h"
-
-struct ac_nir_context {
- struct ac_llvm_context ac;
- struct ac_shader_abi *abi;
-
- gl_shader_stage stage;
- shader_info *info;
-
- LLVMValueRef *ssa_defs;
-
- LLVMValueRef scratch;
- LLVMValueRef constant_data;
-
- struct hash_table *defs;
- struct hash_table *phis;
- struct hash_table *vars;
-
- LLVMValueRef main_function;
- LLVMBasicBlockRef continue_block;
- LLVMBasicBlockRef break_block;
-
- int num_locals;
- LLVMValueRef *locals;
-};
-
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- bool image, bool write);
-
-static void
-build_store_values_extended(struct ac_llvm_context *ac,
- LLVMValueRef *values,
- unsigned value_count,
- unsigned value_stride,
- LLVMValueRef vec)
-{
- LLVMBuilderRef builder = ac->builder;
- unsigned i;
-
- for (i = 0; i < value_count; i++) {
- LLVMValueRef ptr = values[i * value_stride];
- LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
- LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
- LLVMBuildStore(builder, value, ptr);
- }
-}
-
-static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
- const nir_ssa_def *def)
-{
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
- if (def->num_components > 1) {
- type = LLVMVectorType(type, def->num_components);
- }
- return type;
-}
-
-static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
-{
- assert(src.is_ssa);
- return nir->ssa_defs[src.ssa->index];
-}
-
-static LLVMValueRef
-get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
-{
- LLVMValueRef ptr = get_src(ctx, src);
- ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
- int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
-
- return LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(ctx->ac.i32, addr_space), "");
-}
-
-static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
- const struct nir_block *b)
-{
- struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
- return (LLVMBasicBlockRef)entry->data;
-}
-
-static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
- nir_alu_src src,
- unsigned num_components)
-{
- LLVMValueRef value = get_src(ctx, src.src);
- bool need_swizzle = false;
-
- assert(value);
- unsigned src_components = ac_get_llvm_num_components(value);
- for (unsigned i = 0; i < num_components; ++i) {
- assert(src.swizzle[i] < src_components);
- if (src.swizzle[i] != i)
- need_swizzle = true;
- }
-
- if (need_swizzle || num_components != src_components) {
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
- LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
-
- if (src_components > 1 && num_components == 1) {
- value = LLVMBuildExtractElement(ctx->ac.builder, value,
- masks[0], "");
- } else if (src_components == 1 && num_components > 1) {
- LLVMValueRef values[] = {value, value, value, value};
- value = ac_build_gather_values(&ctx->ac, values, num_components);
- } else {
- LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
- value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
- swizzle, "");
- }
- }
- assert(!src.negate);
- assert(!src.abs);
- return value;
-}
-
-static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
- LLVMIntPredicate pred, LLVMValueRef src0,
- LLVMValueRef src1)
-{
- LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
- return LLVMBuildSelect(ctx->builder, result,
- LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
- ctx->i32_0, "");
-}
-
-static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
- LLVMRealPredicate pred, LLVMValueRef src0,
- LLVMValueRef src1)
-{
- LLVMValueRef result;
- src0 = ac_to_float(ctx, src0);
- src1 = ac_to_float(ctx, src1);
- result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
- return LLVMBuildSelect(ctx->builder, result,
- LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
- ctx->i32_0, "");
-}
-
-static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0)
-{
- char name[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- };
-
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0, LLVMValueRef src1)
-{
- char name[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- ac_to_float(ctx, src1),
- };
-
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMTypeRef result_type,
- LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
-{
- char name[64];
- LLVMValueRef params[] = {
- ac_to_float(ctx, src0),
- ac_to_float(ctx, src1),
- ac_to_float(ctx, src2),
- };
-
- ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
- ac_get_elem_bits(ctx, result_type));
- assert(length < sizeof(name));
- return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
-}
-
-static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
-{
- assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
-
- LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
- ctx->i32_0, "");
- return LLVMBuildSelect(ctx->builder, v,
- ac_to_integer_or_pointer(ctx, src1),
- ac_to_integer_or_pointer(ctx, src2), "");
-}
-
-static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
-}
-
-static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
- const char *intrin,
- LLVMValueRef src0, LLVMValueRef src1)
-{
- LLVMTypeRef ret_type;
- LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
- LLVMValueRef res;
- LLVMValueRef params[] = { src0, src1 };
- ret_type = LLVMStructTypeInContext(ctx->context, types,
- 2, true);
-
- res = ac_build_intrinsic(ctx, intrin, ret_type,
- params, 2, AC_FUNC_ATTR_READNONE);
-
- res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
- res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
- return res;
-}
-
-static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
- LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
- "");
- result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
-
- switch (bitsize) {
- case 16:
- return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
- case 32:
- return result;
- case 64:
- return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
- default:
- unreachable("Unsupported bit size.");
- }
-}
-
-static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- src0 = ac_to_float(ctx, src0);
- LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
- return LLVMBuildSExt(ctx->builder,
- LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
- ctx->i32, "");
-}
-
-static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- unsigned bitsize)
-{
- LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
-
- switch (bitsize) {
- case 8:
- return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
- case 16:
- return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
- case 32:
- return result;
- case 64:
- return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
- default:
- unreachable("Unsupported bit size.");
- }
-}
-
-static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
- return LLVMBuildSExt(ctx->builder,
- LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
- ctx->i32, "");
-}
-
-static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- LLVMValueRef result;
- LLVMValueRef cond = NULL;
-
- src0 = ac_to_float(ctx, src0);
- result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
-
- if (ctx->chip_class >= GFX8) {
- LLVMValueRef args[2];
- /* Check if the result is a denormal - and flush to 0 if so. */
- args[0] = result;
- args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
- cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
- }
-
- /* need to convert back up to f32 */
- result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
-
- if (ctx->chip_class >= GFX8)
- result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
- else {
- /* for GFX6-GFX7 */
- /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
- * so compare the result and flush to 0 if it's smaller.
- */
- LLVMValueRef temp, cond2;
- temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
- cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
- LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
- temp, "");
- cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
- temp, ctx->f32_0, "");
- cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
- result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
- }
- return result;
-}
-
-static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1)
-{
- LLVMValueRef dst64, result;
- src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
- src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
-
- dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
- dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
- result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
- return result;
-}
-
-static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
- LLVMValueRef src0, LLVMValueRef src1)
-{
- LLVMValueRef dst64, result;
- src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
- src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
-
- dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
- dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
- result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
- return result;
-}
-
-static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
- LLVMValueRef bits, LLVMValueRef offset)
-{
- /* mask = ((1 << bits) - 1) << offset */
- return LLVMBuildShl(ctx->builder,
- LLVMBuildSub(ctx->builder,
- LLVMBuildShl(ctx->builder,
- ctx->i32_1,
- bits, ""),
- ctx->i32_1, ""),
- offset, "");
-}
-
-static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
- LLVMValueRef mask, LLVMValueRef insert,
- LLVMValueRef base)
-{
- /* Calculate:
- * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
- * Use the right-hand side, which the LLVM backend can convert to V_BFI.
- */
- return LLVMBuildXor(ctx->builder, base,
- LLVMBuildAnd(ctx->builder, mask,
- LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
-}
-
-static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
- LLVMValueRef src0,
- LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
- LLVMValueRef args[2]))
-{
- LLVMValueRef comp[2];
-
- src0 = ac_to_float(ctx, src0);
- comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
- comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
-
- return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
-}
-
-static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
- LLVMValueRef src0)
-{
- LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
- LLVMValueRef temps[2], val;
- int i;
-
- for (i = 0; i < 2; i++) {
- val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
- val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
- val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
- temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
- }
- return ac_build_gather_values(ctx, temps, 2);
-}
-
-static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
- nir_op op,
- LLVMValueRef src0)
-{
- unsigned mask;
- int idx;
- LLVMValueRef result;
-
- if (op == nir_op_fddx_fine)
- mask = AC_TID_MASK_LEFT;
- else if (op == nir_op_fddy_fine)
- mask = AC_TID_MASK_TOP;
- else
- mask = AC_TID_MASK_TOP_LEFT;
-
- /* for DDX we want to next X pixel, DDY next Y pixel. */
- if (op == nir_op_fddx_fine ||
- op == nir_op_fddx_coarse ||
- op == nir_op_fddx)
- idx = 1;
- else
- idx = 2;
-
- result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
- return result;
-}
-
-static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
-{
- LLVMValueRef src[4], result = NULL;
- unsigned num_components = instr->dest.dest.ssa.num_components;
- unsigned src_components;
- LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
-
- assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
- switch (instr->op) {
- case nir_op_vec2:
- case nir_op_vec3:
- case nir_op_vec4:
- src_components = 1;
- break;
- case nir_op_pack_half_2x16:
- case nir_op_pack_snorm_2x16:
- case nir_op_pack_unorm_2x16:
- src_components = 2;
- break;
- case nir_op_unpack_half_2x16:
- src_components = 1;
- break;
- case nir_op_cube_face_coord:
- case nir_op_cube_face_index:
- src_components = 3;
- break;
- default:
- src_components = num_components;
- break;
- }
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
- src[i] = get_alu_src(ctx, instr->src[i], src_components);
-
- switch (instr->op) {
- case nir_op_mov:
- result = src[0];
- break;
- case nir_op_fneg:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
- break;
- case nir_op_ineg:
- result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
- break;
- case nir_op_inot:
- result = LLVMBuildNot(ctx->ac.builder, src[0], "");
- break;
- case nir_op_iadd:
- result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fadd:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fsub:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_isub:
- result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_imul:
- result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_imod:
- result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_umod:
- result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_irem:
- result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_idiv:
- result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_udiv:
- result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_fmul:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_frcp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
- break;
- case nir_op_iand:
- result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ior:
- result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ixor:
- result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ishl:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ishr:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ushr:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
- src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
- LLVMTypeOf(src[0]), "");
- result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
- break;
- case nir_op_ilt32:
- result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
- break;
- case nir_op_ine32:
- result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
- break;
- case nir_op_ieq32:
- result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
- break;
- case nir_op_ige32:
- result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
- break;
- case nir_op_ult32:
- result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
- break;
- case nir_op_uge32:
- result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
- break;
- case nir_op_feq32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
- break;
- case nir_op_fne32:
- result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
- break;
- case nir_op_flt32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
- break;
- case nir_op_fge32:
- result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
- break;
- case nir_op_fabs:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_iabs:
- result = emit_iabs(&ctx->ac, src[0]);
- break;
- case nir_op_imax:
- result = ac_build_imax(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_imin:
- result = ac_build_imin(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_umax:
- result = ac_build_umax(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_umin:
- result = ac_build_umin(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_isign:
- result = ac_build_isign(&ctx->ac, src[0],
- instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_fsign:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fsign(&ctx->ac, src[0],
- instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_ffloor:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_ftrunc:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fceil:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fround_even:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
- ac_to_float_type(&ctx->ac, def_type),src[0]);
- break;
- case nir_op_ffract:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_fract(&ctx->ac, src[0],
- instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_fsin:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fcos:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fsqrt:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_fexp2:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_flog2:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- break;
- case nir_op_frsq:
- result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
- ac_to_float_type(&ctx->ac, def_type), src[0]);
- result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
- break;
- case nir_op_frexp_exp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_frexp_exp(&ctx->ac, src[0],
- ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
- result = LLVMBuildSExt(ctx->ac.builder, result,
- ctx->ac.i32, "");
- break;
- case nir_op_frexp_sig:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = ac_build_frexp_mant(&ctx->ac, src[0],
- instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_fpow:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- break;
- case nir_op_fmax:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- if (ctx->ac.chip_class < GFX9 &&
- instr->dest.dest.ssa.bit_size == 32) {
- /* Only pre-GFX9 chips do not flush denorms. */
- result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
- ac_to_float_type(&ctx->ac, def_type),
- result);
- }
- break;
- case nir_op_fmin:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- if (ctx->ac.chip_class < GFX9 &&
- instr->dest.dest.ssa.bit_size == 32) {
- /* Only pre-GFX9 chips do not flush denorms. */
- result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
- ac_to_float_type(&ctx->ac, def_type),
- result);
- }
- break;
- case nir_op_ffma:
- /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
- result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
- break;
- case nir_op_ldexp:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
- else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
- else
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
- break;
- case nir_op_bfm:
- result = emit_bfm(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_bitfield_select:
- result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
- break;
- case nir_op_ubfe:
- result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
- break;
- case nir_op_ibfe:
- result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
- break;
- case nir_op_bitfield_reverse:
- result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
- break;
- case nir_op_bit_count:
- result = ac_build_bit_count(&ctx->ac, src[0]);
- break;
- case nir_op_vec2:
- case nir_op_vec3:
- case nir_op_vec4:
- for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
- src[i] = ac_to_integer(&ctx->ac, src[i]);
- result = ac_build_gather_values(&ctx->ac, src, num_components);
- break;
- case nir_op_f2i8:
- case nir_op_f2i16:
- case nir_op_f2i32:
- case nir_op_f2i64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_f2u8:
- case nir_op_f2u16:
- case nir_op_f2u32:
- case nir_op_f2u64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_i2f16:
- case nir_op_i2f32:
- case nir_op_i2f64:
- result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_u2f16:
- case nir_op_u2f32:
- case nir_op_u2f64:
- result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_f2f16_rtz:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- if (LLVMTypeOf(src[0]) == ctx->ac.f64)
- src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
- LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
- result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- break;
- case nir_op_f2f16_rtne:
- case nir_op_f2f16:
- case nir_op_f2f32:
- case nir_op_f2f64:
- src[0] = ac_to_float(&ctx->ac, src[0]);
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- else
- result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
- break;
- case nir_op_u2u8:
- case nir_op_u2u16:
- case nir_op_u2u32:
- case nir_op_u2u64:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
- else
- result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_i2i8:
- case nir_op_i2i16:
- case nir_op_i2i32:
- case nir_op_i2i64:
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
- result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
- else
- result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
- break;
- case nir_op_b32csel:
- result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
- break;
- case nir_op_find_lsb:
- result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
- break;
- case nir_op_ufind_msb:
- result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
- break;
- case nir_op_ifind_msb:
- result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
- break;
- case nir_op_uadd_carry:
- result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
- break;
- case nir_op_usub_borrow:
- result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
- break;
- case nir_op_b2f16:
- case nir_op_b2f32:
- case nir_op_b2f64:
- result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_f2b32:
- result = emit_f2b(&ctx->ac, src[0]);
- break;
- case nir_op_b2i8:
- case nir_op_b2i16:
- case nir_op_b2i32:
- case nir_op_b2i64:
- result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
- break;
- case nir_op_i2b32:
- result = emit_i2b(&ctx->ac, src[0]);
- break;
- case nir_op_fquantize2f16:
- result = emit_f2f16(&ctx->ac, src[0]);
- break;
- case nir_op_umul_high:
- result = emit_umul_high(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_imul_high:
- result = emit_imul_high(&ctx->ac, src[0], src[1]);
- break;
- case nir_op_pack_half_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
- break;
- case nir_op_pack_snorm_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
- break;
- case nir_op_pack_unorm_2x16:
- result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
- break;
- case nir_op_unpack_half_2x16:
- result = emit_unpack_half_2x16(&ctx->ac, src[0]);
- break;
- case nir_op_fddx:
- case nir_op_fddy:
- case nir_op_fddx_fine:
- case nir_op_fddy_fine:
- case nir_op_fddx_coarse:
- case nir_op_fddy_coarse:
- result = emit_ddxy(ctx, instr->op, src[0]);
- break;
-
- case nir_op_unpack_64_2x32_split_x: {
- assert(ac_get_llvm_num_components(src[0]) == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i32,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_0, "");
- break;
- }
-
- case nir_op_unpack_64_2x32_split_y: {
- assert(ac_get_llvm_num_components(src[0]) == 1);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i32,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_1, "");
- break;
- }
-
- case nir_op_pack_64_2x32_split: {
- LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
- result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
- break;
- }
-
- case nir_op_pack_32_2x16_split: {
- LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
- result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
-
- case nir_op_unpack_32_2x16_split_x: {
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i16,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_0, "");
- break;
- }
-
- case nir_op_unpack_32_2x16_split_y: {
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
- ctx->ac.v2i16,
- "");
- result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
- ctx->ac.i32_1, "");
- break;
- }
-
- case nir_op_cube_face_coord: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- LLVMValueRef results[2];
- LLVMValueRef in[3];
- for (unsigned chan = 0; chan < 3; chan++)
- in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
- results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
- results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
- LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
- results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
- results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
- result = ac_build_gather_values(&ctx->ac, results, 2);
- break;
- }
-
- case nir_op_cube_face_index: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- LLVMValueRef in[3];
- for (unsigned chan = 0; chan < 3; chan++)
- in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
- result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
- ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
- break;
- }
-
- case nir_op_fmin3:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
- ac_to_float_type(&ctx->ac, def_type), result, src[2]);
- break;
- case nir_op_umin3:
- result = ac_build_umin(&ctx->ac, src[0], src[1]);
- result = ac_build_umin(&ctx->ac, result, src[2]);
- break;
- case nir_op_imin3:
- result = ac_build_imin(&ctx->ac, src[0], src[1]);
- result = ac_build_imin(&ctx->ac, result, src[2]);
- break;
- case nir_op_fmax3:
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
- result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
- ac_to_float_type(&ctx->ac, def_type), result, src[2]);
- break;
- case nir_op_umax3:
- result = ac_build_umax(&ctx->ac, src[0], src[1]);
- result = ac_build_umax(&ctx->ac, result, src[2]);
- break;
- case nir_op_imax3:
- result = ac_build_imax(&ctx->ac, src[0], src[1]);
- result = ac_build_imax(&ctx->ac, result, src[2]);
- break;
- case nir_op_fmed3: {
- src[0] = ac_to_float(&ctx->ac, src[0]);
- src[1] = ac_to_float(&ctx->ac, src[1]);
- src[2] = ac_to_float(&ctx->ac, src[2]);
- result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
- instr->dest.dest.ssa.bit_size);
- break;
- }
- case nir_op_imed3: {
- LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]);
- LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]);
- tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]);
- result = ac_build_imax(&ctx->ac, tmp1, tmp2);
- break;
- }
- case nir_op_umed3: {
- LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]);
- LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]);
- tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]);
- result = ac_build_umax(&ctx->ac, tmp1, tmp2);
- break;
- }
-
- default:
- fprintf(stderr, "Unknown NIR alu instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
-
- if (result) {
- assert(instr->dest.dest.is_ssa);
- result = ac_to_integer_or_pointer(&ctx->ac, result);
- ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
- }
-}
-
-static void visit_load_const(struct ac_nir_context *ctx,
- const nir_load_const_instr *instr)
-{
- LLVMValueRef values[4], value = NULL;
- LLVMTypeRef element_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
-
- for (unsigned i = 0; i < instr->def.num_components; ++i) {
- switch (instr->def.bit_size) {
- case 8:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u8, false);
- break;
- case 16:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u16, false);
- break;
- case 32:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u32, false);
- break;
- case 64:
- values[i] = LLVMConstInt(element_type,
- instr->value[i].u64, false);
- break;
- default:
- fprintf(stderr,
- "unsupported nir load_const bit_size: %d\n",
- instr->def.bit_size);
- abort();
- }
- }
- if (instr->def.num_components > 1) {
- value = LLVMConstVector(values, instr->def.num_components);
- } else
- value = values[0];
-
- ctx->ssa_defs[instr->def.index] = value;
-}
-
-static LLVMValueRef
-get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
-{
- LLVMValueRef size =
- LLVMBuildExtractElement(ctx->ac.builder, descriptor,
- LLVMConstInt(ctx->ac.i32, 2, false), "");
-
- /* GFX8 only */
- if (ctx->ac.chip_class == GFX8 && in_elements) {
- /* On GFX8, the descriptor contains the size in bytes,
- * but TXQ must return the size in elements.
- * The stride is always non-zero for resources using TXQ.
- */
- LLVMValueRef stride =
- LLVMBuildExtractElement(ctx->ac.builder, descriptor,
- ctx->ac.i32_1, "");
- stride = LLVMBuildLShr(ctx->ac.builder, stride,
- LLVMConstInt(ctx->ac.i32, 16, false), "");
- stride = LLVMBuildAnd(ctx->ac.builder, stride,
- LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
-
- size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
- }
- return size;
-}
-
-/* Gather4 should follow the same rules as bilinear filtering, but the hardware
- * incorrectly forces nearest filtering if the texture format is integer.
- * The only effect it has on Gather4, which always returns 4 texels for
- * bilinear filtering, is that the final coordinates are off by 0.5 of
- * the texel size.
- *
- * The workaround is to subtract 0.5 from the unnormalized coordinates,
- * or (0.5 / size) from the normalized coordinates.
- *
- * However, cube textures with 8_8_8_8 data formats require a different
- * workaround of overriding the num format to USCALED/SSCALED. This would lose
- * precision in 32-bit data formats, so it needs to be applied dynamically at
- * runtime. In this case, return an i1 value that indicates whether the
- * descriptor was overridden (and hence a fixup of the sampler result is needed).
- */
-static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
- nir_variable *var,
- struct ac_image_args *args,
- const nir_tex_instr *instr)
-{
- const struct glsl_type *type = glsl_without_array(var->type);
- enum glsl_base_type stype = glsl_get_sampler_result_type(type);
- LLVMValueRef wa_8888 = NULL;
- LLVMValueRef half_texel[2];
- LLVMValueRef result;
-
- assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- LLVMValueRef formats;
- LLVMValueRef data_format;
- LLVMValueRef wa_formats;
-
- formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
-
- data_format = LLVMBuildLShr(ctx->builder, formats,
- LLVMConstInt(ctx->i32, 20, false), "");
- data_format = LLVMBuildAnd(ctx->builder, data_format,
- LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
- wa_8888 = LLVMBuildICmp(
- ctx->builder, LLVMIntEQ, data_format,
- LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
- "");
-
- uint32_t wa_num_format =
- stype == GLSL_TYPE_UINT ?
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
- S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
- wa_formats = LLVMBuildAnd(ctx->builder, formats,
- LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
- "");
- wa_formats = LLVMBuildOr(ctx->builder, wa_formats,
- LLVMConstInt(ctx->i32, wa_num_format, false), "");
-
- formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
- args->resource = LLVMBuildInsertElement(
- ctx->builder, args->resource, formats, ctx->i32_1, "");
- }
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
- assert(!wa_8888);
- half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
- } else {
- struct ac_image_args resinfo = {};
- LLVMBasicBlockRef bbs[2];
-
- LLVMValueRef unnorm = NULL;
- LLVMValueRef default_offset = ctx->f32_0;
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D &&
- !instr->is_array) {
- /* In vulkan, whether the sampler uses unnormalized
- * coordinates or not is a dynamic property of the
- * sampler. Hence, to figure out whether or not we
- * need to divide by the texture size, we need to test
- * the sampler at runtime. This tests the bit set by
- * radv_init_sampler().
- */
- LLVMValueRef sampler0 =
- LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
- sampler0 = LLVMBuildLShr(ctx->builder, sampler0,
- LLVMConstInt(ctx->i32, 15, false), "");
- sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
- unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
- default_offset = LLVMConstReal(ctx->f32, -0.5);
- }
-
- bbs[0] = LLVMGetInsertBlock(ctx->builder);
- if (wa_8888 || unnorm) {
- assert(!(wa_8888 && unnorm));
- LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
- /* Skip the texture size query entirely if we don't need it. */
- ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
- bbs[1] = LLVMGetInsertBlock(ctx->builder);
- }
-
- /* Query the texture size. */
- resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
- resinfo.opcode = ac_image_get_resinfo;
- resinfo.dmask = 0xf;
- resinfo.lod = ctx->i32_0;
- resinfo.resource = args->resource;
- resinfo.attributes = AC_FUNC_ATTR_READNONE;
- LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
-
- /* Compute -0.5 / size. */
- for (unsigned c = 0; c < 2; c++) {
- half_texel[c] =
- LLVMBuildExtractElement(ctx->builder, size,
- LLVMConstInt(ctx->i32, c, 0), "");
- half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
- half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
- half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
- LLVMConstReal(ctx->f32, -0.5), "");
- }
-
- if (wa_8888 || unnorm) {
- ac_build_endif(ctx, 2000);
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef values[2] = { default_offset, half_texel[c] };
- half_texel[c] = ac_build_phi(ctx, ctx->f32, 2,
- values, bbs);
- }
- }
- }
-
- for (unsigned c = 0; c < 2; c++) {
- LLVMValueRef tmp;
- tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
- args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
- }
-
- args->attributes = AC_FUNC_ATTR_READNONE;
- result = ac_build_image_opcode(ctx, args);
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
- LLVMValueRef tmp, tmp2;
-
- /* if the cube workaround is in place, f2i the result. */
- for (unsigned c = 0; c < 4; c++) {
- tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
- if (stype == GLSL_TYPE_UINT)
- tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
- else
- tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
- tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
- tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
- result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
- }
- }
- return result;
-}
-
-static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
-{
- nir_deref_instr *texture_deref_instr = NULL;
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_texture_deref:
- texture_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- default:
- break;
- }
- }
- return texture_deref_instr;
-}
-
-static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
- const nir_tex_instr *instr,
- struct ac_image_args *args)
-{
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-
- return ac_build_buffer_load_format(&ctx->ac,
- args->resource,
- args->coords[0],
- ctx->ac.i32_0,
- util_last_bit(mask),
- 0, true);
- }
-
- args->opcode = ac_image_sample;
-
- switch (instr->op) {
- case nir_texop_txf:
- case nir_texop_txf_ms:
- case nir_texop_samples_identical:
- args->opcode = args->level_zero ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
- ac_image_load : ac_image_load_mip;
- args->level_zero = false;
- break;
- case nir_texop_txs:
- case nir_texop_query_levels:
- args->opcode = ac_image_get_resinfo;
- if (!args->lod)
- args->lod = ctx->ac.i32_0;
- args->level_zero = false;
- break;
- case nir_texop_tex:
- if (ctx->stage != MESA_SHADER_FRAGMENT) {
- assert(!args->lod);
- args->level_zero = true;
- }
- break;
- case nir_texop_tg4:
- args->opcode = ac_image_gather4;
- args->level_zero = true;
- break;
- case nir_texop_lod:
- args->opcode = ac_image_get_lod;
- break;
- default:
- break;
- }
-
- if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
- nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
- nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
- const struct glsl_type *type = glsl_without_array(var->type);
- enum glsl_base_type stype = glsl_get_sampler_result_type(type);
- if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
- return lower_gather4_integer(&ctx->ac, var, args, instr);
- }
- }
-
- /* Fixup for GFX9 which allocates 1D textures as 2D. */
- if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
- if ((args->dim == ac_image_2darray ||
- args->dim == ac_image_2d) && !args->coords[1]) {
- args->coords[1] = ctx->ac.i32_0;
- }
- }
-
- args->attributes = AC_FUNC_ATTR_READNONE;
- bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
- ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
- if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
- /* Prevent texture instructions with implicit derivatives from being
- * sinked into branches. */
- switch (instr->op) {
- case nir_texop_tex:
- case nir_texop_txb:
- case nir_texop_lod:
- args->attributes |= AC_FUNC_ATTR_CONVERGENT;
- break;
- default:
- break;
- }
- }
-
- return ac_build_image_opcode(&ctx->ac, args);
-}
-
-static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- LLVMValueRef ptr = get_src(ctx, instr->src[0]);
- LLVMValueRef index = get_src(ctx, instr->src[1]);
-
- LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
- LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
- return result;
-}
-
-static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- LLVMValueRef ptr, addr;
- LLVMValueRef src0 = get_src(ctx, instr->src[0]);
- unsigned index = nir_intrinsic_base(instr);
-
- addr = LLVMConstInt(ctx->ac.i32, index, 0);
- addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
-
- /* Load constant values from user SGPRS when possible, otherwise
- * fallback to the default path that loads directly from memory.
- */
- if (LLVMIsConstant(src0) &&
- instr->dest.ssa.bit_size == 32) {
- unsigned count = instr->dest.ssa.num_components;
- unsigned offset = index;
-
- offset += LLVMConstIntGetZExtValue(src0);
- offset /= 4;
-
- offset -= ctx->abi->base_inline_push_consts;
-
- if (offset + count <= ctx->abi->num_inline_push_consts) {
- return ac_build_gather_values(&ctx->ac,
- ctx->abi->inline_push_consts + offset,
- count);
- }
- }
-
- ptr = LLVMBuildGEP(ctx->ac.builder, ctx->abi->push_constants, &addr, 1, "");
-
- if (instr->dest.ssa.bit_size == 8) {
- unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
- ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
- LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
-
- LLVMValueRef params[3];
- if (load_dwords > 1) {
- LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
- params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
- params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
- } else {
- res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
- params[0] = ctx->ac.i32_0;
- params[1] = res;
- }
- params[2] = addr;
- res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
-
- res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
- if (instr->dest.ssa.num_components > 1)
- res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
- return res;
- } else if (instr->dest.ssa.bit_size == 16) {
- unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
- LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
- ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
- LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
- LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
- cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
- LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- LLVMConstInt(ctx->ac.i32, 4, false)};
- LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
- LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
- LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
- LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
- res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
- return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
- }
-
- ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
-
- return LLVMBuildLoad(ctx->ac.builder, ptr, "");
-}
-
-static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef index = get_src(ctx, instr->src[0]);
-
- return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
-}
-
-static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
-{
- uint32_t new_mask = 0;
- for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
- if (mask & (1u << i))
- new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
- return new_mask;
-}
-
-static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
- unsigned start, unsigned count)
-{
- LLVMValueRef mask[] = {
- ctx->i32_0, ctx->i32_1,
- LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
-
- unsigned src_elements = ac_get_llvm_num_components(src);
-
- if (count == src_elements) {
- assert(start == 0);
- return src;
- } else if (count == 1) {
- assert(start < src_elements);
- return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
- } else {
- assert(start + count <= src_elements);
- assert(count <= 4);
- LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
- return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
- }
-}
-
-static unsigned get_cache_policy(struct ac_nir_context *ctx,
- enum gl_access_qualifier access,
- bool may_store_unaligned,
- bool writeonly_memory)
-{
- unsigned cache_policy = 0;
-
- /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
- * store opcodes not aligned to a dword are affected. The only way to
- * get unaligned stores is through shader images.
- */
- if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
- /* If this is write-only, don't keep data in L1 to prevent
- * evicting L1 cache lines that may be needed by other
- * instructions.
- */
- writeonly_memory ||
- access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
- cache_policy |= ac_glc;
- }
-
- if (access & ACCESS_STREAM_CACHE_POLICY)
- cache_policy |= ac_slc;
-
- return cache_policy;
-}
-
-static void visit_store_ssbo(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- LLVMValueRef src_data = get_src(ctx, instr->src[0]);
- int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
- unsigned writemask = nir_intrinsic_write_mask(instr);
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- bool writeonly_memory = access & ACCESS_NON_READABLE;
- unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
-
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[1]), true);
- LLVMValueRef base_data = src_data;
- base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
- LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
-
- while (writemask) {
- int start, count;
- LLVMValueRef data, offset;
- LLVMTypeRef data_type;
-
- u_bit_scan_consecutive_range(&writemask, &start, &count);
-
- /* Due to an LLVM limitation with LLVM < 9, split 3-element
- * writes into a 2-element and a 1-element write. */
- if (count == 3 &&
- (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
- writemask |= 1 << (start + 2);
- count = 2;
- }
- int num_bytes = count * elem_size_bytes; /* count in bytes */
-
- /* we can only store 4 DWords at the same time.
- * can only happen for 64 Bit vectors. */
- if (num_bytes > 16) {
- writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
- count = 2;
- num_bytes = 16;
- }
-
- /* check alignment of 16 Bit stores */
- if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
- writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
- count = 1;
- num_bytes = 2;
- }
- data = extract_vector_range(&ctx->ac, base_data, start, count);
-
- offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
- LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
-
- if (num_bytes == 1) {
- ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
- offset, ctx->ac.i32_0,
- cache_policy);
- } else if (num_bytes == 2) {
- ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
- offset, ctx->ac.i32_0,
- cache_policy);
- } else {
- int num_channels = num_bytes / 4;
-
- switch (num_bytes) {
- case 16: /* v4f32 */
- data_type = ctx->ac.v4f32;
- break;
- case 12: /* v3f32 */
- data_type = ctx->ac.v3f32;
- break;
- case 8: /* v2f32 */
- data_type = ctx->ac.v2f32;
- break;
- case 4: /* f32 */
- data_type = ctx->ac.f32;
- break;
- default:
- unreachable("Malformed vector store.");
- }
- data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
-
- ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
- num_channels, offset,
- ctx->ac.i32_0, 0,
- cache_policy, false);
- }
- }
-}
-
-static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
- LLVMValueRef descriptor,
- LLVMValueRef offset,
- LLVMValueRef compare,
- LLVMValueRef exchange)
-{
- LLVMBasicBlockRef start_block = NULL, then_block = NULL;
- if (ctx->abi->robust_buffer_access) {
- LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
-
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
- start_block = LLVMGetInsertBlock(ctx->ac.builder);
-
- ac_build_ifcc(&ctx->ac, cond, -1);
-
- then_block = LLVMGetInsertBlock(ctx->ac.builder);
- }
-
- LLVMValueRef ptr_parts[2] = {
- ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
- LLVMBuildAnd(ctx->ac.builder,
- ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
- LLVMConstInt(ctx->ac.i32, 65535, 0), "")
- };
-
- ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
- ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
-
- offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
-
- LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
- ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
- ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), "");
-
- LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
- result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
-
- if (ctx->abi->robust_buffer_access) {
- ac_build_endif(&ctx->ac, -1);
-
- LLVMBasicBlockRef incoming_blocks[2] = {
- start_block,
- then_block,
- };
-
- LLVMValueRef incoming_values[2] = {
- LLVMConstInt(ctx->ac.i64, 0, 0),
- result,
- };
- LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
- LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
- return ret;
- } else {
- return result;
- }
-}
-
-static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
- const char *op;
- char name[64], type[8];
- LLVMValueRef params[6], descriptor;
- int arg_count = 0;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_ssbo_atomic_add:
- op = "add";
- break;
- case nir_intrinsic_ssbo_atomic_imin:
- op = "smin";
- break;
- case nir_intrinsic_ssbo_atomic_umin:
- op = "umin";
- break;
- case nir_intrinsic_ssbo_atomic_imax:
- op = "smax";
- break;
- case nir_intrinsic_ssbo_atomic_umax:
- op = "umax";
- break;
- case nir_intrinsic_ssbo_atomic_and:
- op = "and";
- break;
- case nir_intrinsic_ssbo_atomic_or:
- op = "or";
- break;
- case nir_intrinsic_ssbo_atomic_xor:
- op = "xor";
- break;
- case nir_intrinsic_ssbo_atomic_exchange:
- op = "swap";
- break;
- case nir_intrinsic_ssbo_atomic_comp_swap:
- op = "cmpswap";
- break;
- default:
- abort();
- }
-
- descriptor = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]),
- true);
-
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
- return_type == ctx->ac.i64) {
- return emit_ssbo_comp_swap_64(ctx, descriptor,
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]),
- get_src(ctx, instr->src[3]));
- }
- if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
- }
- params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
- params[arg_count++] = descriptor;
-
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy with
- * LLVM 8, see r358579.
- */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i32_0; /* soffset */
- params[arg_count++] = ctx->ac.i32_0; /* slc */
-
- ac_build_type_name_for_intr(return_type, type, sizeof(type));
- snprintf(name, sizeof(name),
- "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
- } else {
- params[arg_count++] = ctx->ac.i32_0; /* vindex */
- params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
- params[arg_count++] = ctx->ac.i1false; /* slc */
-
- assert(return_type == ctx->ac.i32);
- snprintf(name, sizeof(name),
- "llvm.amdgcn.buffer.atomic.%s", op);
- }
-
- return ac_build_intrinsic(&ctx->ac, name, return_type, params,
- arg_count, 0);
-}
-
-static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- int elem_size_bytes = instr->dest.ssa.bit_size / 8;
- int num_components = instr->num_components;
- enum gl_access_qualifier access = nir_intrinsic_access(instr);
- unsigned cache_policy = get_cache_policy(ctx, access, false, false);
-
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
- get_src(ctx, instr->src[0]), false);
- LLVMValueRef vindex = ctx->ac.i32_0;
-
- LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
- LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
-
- LLVMValueRef results[4];
- for (int i = 0; i < num_components;) {
- int num_elems = num_components - i;
- if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
- num_elems = 1;
- if (num_elems * elem_size_bytes > 16)
- num_elems = 16 / elem_size_bytes;
- int load_bytes = num_elems * elem_size_bytes;
-
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
-
- LLVMValueRef ret;
-
- if (load_bytes == 1) {
- ret = ac_build_tbuffer_load_byte(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- cache_policy);
- } else if (load_bytes == 2) {
- ret = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- cache_policy);
- } else {
- int num_channels = util_next_power_of_two(load_bytes) / 4;
- bool can_speculate = access & ACCESS_CAN_REORDER;
-
- ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
- vindex, offset, immoffset, 0,
- cache_policy, can_speculate, false);
- }
-
- LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
- ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
- ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
-
- LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
- ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
-
- for (unsigned j = 0; j < num_elems; j++) {
- results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
- }
- i += num_elems;
- }
-
- return ac_build_gather_values(&ctx->ac, results, num_components);
-}
-
-static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef ret;
- LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- int num_components = instr->num_components;
-
- if (ctx->abi->load_ubo)
- rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
-
- if (instr->dest.ssa.bit_size == 64)
- num_components *= 2;
-
- if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
- unsigned load_bytes = instr->dest.ssa.bit_size / 8;
- LLVMValueRef results[num_components];
- for (unsigned i = 0; i < num_components; ++i) {
- LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
- load_bytes * i, 0);
-
- if (load_bytes == 1) {
- results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- 0);
- } else {
- assert(load_bytes == 2);
- results[i] = ac_build_tbuffer_load_short(&ctx->ac,
- rsrc,
- offset,
- ctx->ac.i32_0,
- immoffset,
- 0);
- }
- }
- ret = ac_build_gather_values(&ctx->ac, results, num_components);
- } else {
- ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
- NULL, 0, 0, true, true);
-
- ret = ac_trim_vector(&ctx->ac, ret, num_components);
- }
-
- return LLVMBuildBitCast(ctx->ac.builder, ret,
- get_def_type(ctx, &instr->dest.ssa), "");
-}
-
-static void
-get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
- bool vs_in, unsigned *vertex_index_out,
- LLVMValueRef *vertex_index_ref,
- unsigned *const_out, LLVMValueRef *indir_out)
-{
- nir_variable *var = nir_deref_instr_get_variable(instr);
- nir_deref_path path;
- unsigned idx_lvl = 1;
-
- nir_deref_path_init(&path, instr, NULL);
-
- if (vertex_index_out != NULL || vertex_index_ref != NULL) {
- if (vertex_index_ref) {
- *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
- if (vertex_index_out)
- *vertex_index_out = 0;
- } else {
- *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
- }
- ++idx_lvl;
- }
-
- uint32_t const_offset = 0;
- LLVMValueRef offset = NULL;
-
- if (var->data.compact) {
- assert(instr->deref_type == nir_deref_type_array);
- const_offset = nir_src_as_uint(instr->arr.index);
- goto out;
- }
-
- for (; path.path[idx_lvl]; ++idx_lvl) {
- const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
- if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
- unsigned index = path.path[idx_lvl]->strct.index;
-
- for (unsigned i = 0; i < index; i++) {
- const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
- const_offset += glsl_count_attribute_slots(ft, vs_in);
- }
- } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
- unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
- if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
- const_offset += size *
- nir_src_as_uint(path.path[idx_lvl]->arr.index);
- } else {
- LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
- get_src(ctx, path.path[idx_lvl]->arr.index), "");
- if (offset)
- offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
- else
- offset = array_off;
- }
- } else
- unreachable("Uhandled deref type in get_deref_instr_offset");
- }
-
-out:
- nir_deref_path_finish(&path);
-
- if (const_offset && offset)
- offset = LLVMBuildAdd(ctx->ac.builder, offset,
- LLVMConstInt(ctx->ac.i32, const_offset, 0),
- "");
-
- *const_out = const_offset;
- *indir_out = offset;
-}
-
-static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr,
- bool load_inputs)
-{
- LLVMValueRef result;
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef indir_index = NULL;
- unsigned const_index = 0;
-
- nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-
- unsigned location = var->data.location;
- unsigned driver_location = var->data.driver_location;
- const bool is_patch = var->data.patch;
- const bool is_compact = var->data.compact;
-
- get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- false, NULL, is_patch ? NULL : &vertex_index,
- &const_index, &indir_index);
-
- LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
-
- LLVMTypeRef src_component_type;
- if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
- src_component_type = LLVMGetElementType(dest_type);
- else
- src_component_type = dest_type;
-
- result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
- vertex_index, indir_index,
- const_index, location, driver_location,
- var->data.location_frac,
- instr->num_components,
- is_patch, is_compact, load_inputs);
- if (instr->dest.ssa.bit_size == 16) {
- result = ac_to_integer(&ctx->ac, result);
- result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
- }
- return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
-}
-
-static unsigned
-type_scalar_size_bytes(const struct glsl_type *type)
-{
- assert(glsl_type_is_vector_or_scalar(type) ||
- glsl_type_is_matrix(type));
- return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
-}
-
-static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- nir_variable *var = nir_deref_instr_get_variable(deref);
-
- LLVMValueRef values[8];
- int idx = 0;
- int ve = instr->dest.ssa.num_components;
- unsigned comp = 0;
- LLVMValueRef indir_index;
- LLVMValueRef ret;
- unsigned const_index;
- unsigned stride = 4;
- int mode = deref->mode;
-
- if (var) {
- bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
- var->data.mode == nir_var_shader_in;
- idx = var->data.driver_location;
- comp = var->data.location_frac;
- mode = var->data.mode;
-
- get_deref_offset(ctx, deref, vs_in, NULL, NULL,
- &const_index, &indir_index);
-
- if (var->data.compact) {
- stride = 1;
- const_index += comp;
- comp = 0;
- }
- }
-
- if (instr->dest.ssa.bit_size == 64 &&
- (deref->mode == nir_var_shader_in ||
- deref->mode == nir_var_shader_out ||
- deref->mode == nir_var_function_temp))
- ve *= 2;
-
- switch (mode) {
- case nir_var_shader_in:
- if (ctx->stage == MESA_SHADER_TESS_CTRL ||
- ctx->stage == MESA_SHADER_TESS_EVAL) {
- return load_tess_varyings(ctx, instr, true);
- }
-
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMValueRef indir_index;
- unsigned const_index, vertex_index;
- get_deref_offset(ctx, deref, false, &vertex_index, NULL,
- &const_index, &indir_index);
- assert(indir_index == NULL);
-
- return ctx->abi->load_inputs(ctx->abi, var->data.location,
- var->data.driver_location,
- var->data.location_frac,
- instr->num_components, vertex_index, const_index, type);
- }
-
- for (unsigned chan = comp; chan < ve + comp; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type,
- ctx->stage == MESA_SHADER_VERTEX);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->inputs + idx + chan, count,
- stride, false, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else
- values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
- }
- break;
- case nir_var_function_temp:
- for (unsigned chan = 0; chan < ve; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->locals + idx + chan, count,
- stride, true, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else {
- values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
- }
- }
- break;
- case nir_var_mem_shared: {
- LLVMValueRef address = get_src(ctx, instr->src[0]);
- LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
- return LLVMBuildBitCast(ctx->ac.builder, val,
- get_def_type(ctx, &instr->dest.ssa),
- "");
- }
- case nir_var_shader_out:
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- return load_tess_varyings(ctx, instr, false);
- }
-
- if (ctx->stage == MESA_SHADER_FRAGMENT &&
- var->data.fb_fetch_output &&
- ctx->abi->emit_fbfetch)
- return ctx->abi->emit_fbfetch(ctx->abi);
-
- for (unsigned chan = comp; chan < ve + comp; chan++) {
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->outputs + idx + chan, count,
- stride, true, true);
-
- values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
- tmp_vec,
- indir_index, "");
- } else {
- values[chan] = LLVMBuildLoad(ctx->ac.builder,
- ctx->abi->outputs[idx + chan + const_index * stride],
- "");
- }
- }
- break;
- case nir_var_mem_global: {
- LLVMValueRef address = get_src(ctx, instr->src[0]);
- unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
- unsigned natural_stride = type_scalar_size_bytes(deref->type);
- unsigned stride = explicit_stride ? explicit_stride : natural_stride;
-
- LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
- if (stride != natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
- values[i] = LLVMBuildLoad(ctx->ac.builder,
- ac_build_gep_ptr(&ctx->ac, address, offset), "");
- }
- return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
- } else {
- LLVMTypeRef ptr_type = LLVMPointerType(result_type,
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
- LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
- return val;
- }
- }
- default:
- unreachable("unhandle variable mode");
- }
- ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
- return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
-}
-
-static void
-visit_store_var(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
- nir_variable *var = nir_deref_instr_get_variable(deref);
-
- LLVMValueRef temp_ptr, value;
- int idx = 0;
- unsigned comp = 0;
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
- int writemask = instr->const_index[0];
- LLVMValueRef indir_index;
- unsigned const_index;
-
- if (var) {
- get_deref_offset(ctx, deref, false,
- NULL, NULL, &const_index, &indir_index);
- idx = var->data.driver_location;
- comp = var->data.location_frac;
-
- if (var->data.compact) {
- const_index += comp;
- comp = 0;
- }
- }
-
- if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
- (deref->mode == nir_var_shader_out ||
- deref->mode == nir_var_function_temp)) {
-
- src = LLVMBuildBitCast(ctx->ac.builder, src,
- LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
- "");
-
- writemask = widen_mask(writemask, 2);
- }
-
- writemask = writemask << comp;
-
- switch (deref->mode) {
- case nir_var_shader_out:
-
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- LLVMValueRef vertex_index = NULL;
- LLVMValueRef indir_index = NULL;
- unsigned const_index = 0;
- const bool is_patch = var->data.patch;
-
- get_deref_offset(ctx, deref, false, NULL,
- is_patch ? NULL : &vertex_index,
- &const_index, &indir_index);
-
- ctx->abi->store_tcs_outputs(ctx->abi, var,
- vertex_index, indir_index,
- const_index, src, writemask);
- return;
- }
-
- for (unsigned chan = 0; chan < 8; chan++) {
- int stride = 4;
- if (!(writemask & (1 << chan)))
- continue;
-
- value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
-
- if (var->data.compact)
- stride = 1;
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->abi->outputs + idx + chan, count,
- stride, true, true);
-
- tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
- value, indir_index, "");
- build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
- count, stride, tmp_vec);
-
- } else {
- temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
-
- LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
- }
- }
- break;
- case nir_var_function_temp:
- for (unsigned chan = 0; chan < 8; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
-
- value = ac_llvm_extract_elem(&ctx->ac, src, chan);
- if (indir_index) {
- unsigned count = glsl_count_attribute_slots(
- var->type, false);
- count -= chan / 4;
- LLVMValueRef tmp_vec = ac_build_gather_values_extended(
- &ctx->ac, ctx->locals + idx + chan, count,
- 4, true, true);
-
- tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
- value, indir_index, "");
- build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
- count, 4, tmp_vec);
- } else {
- temp_ptr = ctx->locals[idx + chan + const_index * 4];
-
- LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
- }
- }
- break;
-
- case nir_var_mem_global:
- case nir_var_mem_shared: {
- int writemask = instr->const_index[0];
- LLVMValueRef address = get_src(ctx, instr->src[0]);
- LLVMValueRef val = get_src(ctx, instr->src[1]);
-
- unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
- unsigned natural_stride = type_scalar_size_bytes(deref->type);
- unsigned stride = explicit_stride ? explicit_stride : natural_stride;
-
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
- stride == natural_stride) {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
-
- val = LLVMBuildBitCast(ctx->ac.builder, val,
- LLVMGetElementType(LLVMTypeOf(address)), "");
- LLVMBuildStore(ctx->ac.builder, val, address);
- } else {
- LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
- LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
- address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
- for (unsigned chan = 0; chan < 4; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
-
- LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
- LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
- chan);
- src = LLVMBuildBitCast(ctx->ac.builder, src,
- LLVMGetElementType(LLVMTypeOf(ptr)), "");
- LLVMBuildStore(ctx->ac.builder, src, ptr);
- }
- }
- break;
- }
- default:
- abort();
- break;
- }
-}
-
-static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
-{
- switch (dim) {
- case GLSL_SAMPLER_DIM_BUF:
- return 1;
- case GLSL_SAMPLER_DIM_1D:
- return array ? 2 : 1;
- case GLSL_SAMPLER_DIM_2D:
- return array ? 3 : 2;
- case GLSL_SAMPLER_DIM_MS:
- return array ? 4 : 3;
- case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- return 3;
- case GLSL_SAMPLER_DIM_RECT:
- case GLSL_SAMPLER_DIM_SUBPASS:
- return 2;
- case GLSL_SAMPLER_DIM_SUBPASS_MS:
- return 3;
- default:
- break;
- }
- return 0;
-}
-
-static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
- LLVMValueRef coord_x, LLVMValueRef coord_y,
- LLVMValueRef coord_z,
- LLVMValueRef sample_index,
- LLVMValueRef fmask_desc_ptr)
-{
- unsigned sample_chan = coord_z ? 3 : 2;
- LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
- addr[sample_chan] = sample_index;
-
- ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
- return addr[sample_chan];
-}
-
-static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
-{
- assert(instr->src[0].is_ssa);
- return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
-}
-
-static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- enum ac_descriptor_type desc_type,
- bool write)
-{
- nir_deref_instr *deref_instr =
- instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
- nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
-
- return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, true, write);
-}
-
-static void get_image_coords(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- struct ac_image_args *args,
- enum glsl_sampler_dim dim,
- bool is_array)
-{
- LLVMValueRef src0 = get_src(ctx, instr->src[1]);
- LLVMValueRef masks[] = {
- LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
- LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
- };
- LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
-
- int count;
- ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
- dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
- dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
- bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
- assert(!add_frag_pos && "Input attachments should be lowered by this point.");
- count = image_type_to_components_count(dim, is_array);
-
- if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
- instr->intrinsic == nir_intrinsic_bindless_image_load)) {
- LLVMValueRef fmask_load_address[3];
-
- fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
- fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
- if (is_array)
- fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
- else
- fmask_load_address[2] = NULL;
-
- sample_index = adjust_sample_index_using_fmask(&ctx->ac,
- fmask_load_address[0],
- fmask_load_address[1],
- fmask_load_address[2],
- sample_index,
- get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
- AC_DESC_FMASK, &instr->instr, true, false));
- }
- if (count == 1 && !gfx9_1d) {
- if (instr->src[1].ssa->num_components)
- args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
- else
- args->coords[0] = src0;
- } else {
- int chan;
- if (is_ms)
- count--;
- for (chan = 0; chan < count; ++chan) {
- args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
- }
-
- if (gfx9_1d) {
- if (is_array) {
- args->coords[2] = args->coords[1];
- args->coords[1] = ctx->ac.i32_0;
- } else
- args->coords[1] = ctx->ac.i32_0;
- count++;
- }
- if (ctx->ac.chip_class == GFX9 &&
- dim == GLSL_SAMPLER_DIM_2D &&
- !is_array) {
- /* The hw can't bind a slice of a 3D image as a 2D
- * image, because it ignores BASE_ARRAY if the target
- * is 3D. The workaround is to read BASE_ARRAY and set
- * it as the 3rd address operand for all 2D images.
- */
- LLVMValueRef first_layer, const5, mask;
-
- const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
- mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
- first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
- first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
-
- args->coords[count] = first_layer;
- count++;
- }
-
-
- if (is_ms) {
- args->coords[count] = sample_index;
- count++;
- }
- }
-}
-
-static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool write, bool atomic)
-{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
- if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
- LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
- LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
- stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
-
- LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
- elem_count, stride, "");
-
- rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
- LLVMConstInt(ctx->ac.i32, 2, 0), "");
- }
- return rsrc;
-}
-
-static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
-{
- LLVMValueRef res;
-
- enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
- bool is_array;
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const nir_deref_instr *image_deref = get_image_deref(instr);
- const struct glsl_type *type = image_deref->type;
- const nir_variable *var = nir_deref_instr_get_variable(image_deref);
- dim = glsl_get_sampler_dim(type);
- access = var->data.image.access;
- is_array = glsl_sampler_type_is_array(type);
- }
-
- struct ac_image_args args = {};
-
- args.cache_policy = get_cache_policy(ctx, access, false, false);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
- unsigned num_channels = util_last_bit(mask);
- LLVMValueRef rsrc, vindex;
-
- rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
- vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, "");
-
- bool can_speculate = access & ACCESS_CAN_REORDER;
- res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
- ctx->ac.i32_0, num_channels,
- args.cache_policy,
- can_speculate);
- res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
-
- res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
- res = ac_to_integer(&ctx->ac, res);
- } else {
- args.opcode = ac_image_load;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- get_image_coords(ctx, instr, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 15;
- args.attributes = AC_FUNC_ATTR_READONLY;
-
- res = ac_build_image_opcode(&ctx->ac, &args);
- }
- return res;
-}
-
-static void visit_image_store(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr,
- bool bindless)
-{
-
-
- enum glsl_sampler_dim dim;
- enum gl_access_qualifier access;
- bool is_array;
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- access = nir_intrinsic_access(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const nir_deref_instr *image_deref = get_image_deref(instr);
- const struct glsl_type *type = image_deref->type;
- const nir_variable *var = nir_deref_instr_get_variable(image_deref);
- dim = glsl_get_sampler_dim(type);
- access = var->data.image.access;
- is_array = glsl_sampler_type_is_array(type);
- }
-
- bool writeonly_memory = access & ACCESS_NON_READABLE;
- struct ac_image_args args = {};
-
- args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
- LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- unsigned src_channels = ac_get_llvm_num_components(src);
- LLVMValueRef vindex;
-
- if (src_channels == 3)
- src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
-
- vindex = LLVMBuildExtractElement(ctx->ac.builder,
- get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, "");
-
- ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
- ctx->ac.i32_0, src_channels,
- args.cache_policy);
- } else {
- args.opcode = ac_image_store;
- args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 15;
-
- ac_build_image_opcode(&ctx->ac, &args);
- }
-
-}
-
-static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
-{
- LLVMValueRef params[7];
- int param_count = 0;
-
- bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
- const char *atomic_name;
- char intrinsic_name[64];
- enum ac_atomic_op atomic_subop;
- ASSERTED int length;
-
- enum glsl_sampler_dim dim;
- bool is_array;
- if (bindless) {
- if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
- instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
- const GLenum format = nir_intrinsic_format(instr);
- assert(format == GL_R32UI || format == GL_R32I);
- }
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const struct glsl_type *type = get_image_deref(instr)->type;
- dim = glsl_get_sampler_dim(type);
- is_array = glsl_sampler_type_is_array(type);
- }
-
- switch (instr->intrinsic) {
- case nir_intrinsic_bindless_image_atomic_add:
- case nir_intrinsic_image_deref_atomic_add:
- atomic_name = "add";
- atomic_subop = ac_atomic_add;
- break;
- case nir_intrinsic_bindless_image_atomic_imin:
- case nir_intrinsic_image_deref_atomic_imin:
- atomic_name = "smin";
- atomic_subop = ac_atomic_smin;
- break;
- case nir_intrinsic_bindless_image_atomic_umin:
- case nir_intrinsic_image_deref_atomic_umin:
- atomic_name = "umin";
- atomic_subop = ac_atomic_umin;
- break;
- case nir_intrinsic_bindless_image_atomic_imax:
- case nir_intrinsic_image_deref_atomic_imax:
- atomic_name = "smax";
- atomic_subop = ac_atomic_smax;
- break;
- case nir_intrinsic_bindless_image_atomic_umax:
- case nir_intrinsic_image_deref_atomic_umax:
- atomic_name = "umax";
- atomic_subop = ac_atomic_umax;
- break;
- case nir_intrinsic_bindless_image_atomic_and:
- case nir_intrinsic_image_deref_atomic_and:
- atomic_name = "and";
- atomic_subop = ac_atomic_and;
- break;
- case nir_intrinsic_bindless_image_atomic_or:
- case nir_intrinsic_image_deref_atomic_or:
- atomic_name = "or";
- atomic_subop = ac_atomic_or;
- break;
- case nir_intrinsic_bindless_image_atomic_xor:
- case nir_intrinsic_image_deref_atomic_xor:
- atomic_name = "xor";
- atomic_subop = ac_atomic_xor;
- break;
- case nir_intrinsic_bindless_image_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_exchange:
- atomic_name = "swap";
- atomic_subop = ac_atomic_swap;
- break;
- case nir_intrinsic_bindless_image_atomic_comp_swap:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- atomic_name = "cmpswap";
- atomic_subop = 0; /* not used */
- break;
- case nir_intrinsic_bindless_image_atomic_inc_wrap:
- case nir_intrinsic_image_deref_atomic_inc_wrap: {
- atomic_name = "inc";
- atomic_subop = ac_atomic_inc_wrap;
- /* ATOMIC_INC instruction does:
- * value = (value + 1) % (data + 1)
- * but we want:
- * value = (value + 1) % data
- * So replace 'data' by 'data - 1'.
- */
- ctx->ssa_defs[instr->src[3].ssa->index] =
- LLVMBuildSub(ctx->ac.builder,
- ctx->ssa_defs[instr->src[3].ssa->index],
- ctx->ac.i32_1, "");
- break;
- }
- case nir_intrinsic_bindless_image_atomic_dec_wrap:
- case nir_intrinsic_image_deref_atomic_dec_wrap:
- atomic_name = "dec";
- atomic_subop = ac_atomic_dec_wrap;
- break;
- default:
- abort();
- }
-
- if (cmpswap)
- params[param_count++] = get_src(ctx, instr->src[4]);
- params[param_count++] = get_src(ctx, instr->src[3]);
-
- if (dim == GLSL_SAMPLER_DIM_BUF) {
- params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
- params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
- ctx->ac.i32_0, ""); /* vindex */
- params[param_count++] = ctx->ac.i32_0; /* voffset */
- if (LLVM_VERSION_MAJOR >= 9) {
- /* XXX: The new raw/struct atomic intrinsics are buggy
- * with LLVM 8, see r358579.
- */
- params[param_count++] = ctx->ac.i32_0; /* soffset */
- params[param_count++] = ctx->ac.i32_0; /* slc */
-
- length = snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
- } else {
- params[param_count++] = ctx->ac.i1false; /* slc */
-
- length = snprintf(intrinsic_name, sizeof(intrinsic_name),
- "llvm.amdgcn.buffer.atomic.%s", atomic_name);
- }
-
- assert(length < sizeof(intrinsic_name));
- return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
- params, param_count, 0);
- } else {
- struct ac_image_args args = {};
- args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
- args.atomic = atomic_subop;
- args.data[0] = params[0];
- if (cmpswap)
- args.data[1] = params[1];
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
- get_image_coords(ctx, instr, &args, dim, is_array);
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
-
- return ac_build_image_opcode(&ctx->ac, &args);
- }
-}
-
-static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
-
- return ac_build_image_get_sample_count(&ctx->ac, rsrc);
-}
-
-static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- bool bindless)
-{
- LLVMValueRef res;
-
- enum glsl_sampler_dim dim;
- bool is_array;
- if (bindless) {
- dim = nir_intrinsic_image_dim(instr);
- is_array = nir_intrinsic_image_array(instr);
- } else {
- const struct glsl_type *type = get_image_deref(instr)->type;
- dim = glsl_get_sampler_dim(type);
- is_array = glsl_sampler_type_is_array(type);
- }
-
- if (dim == GLSL_SAMPLER_DIM_BUF)
- return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
-
- struct ac_image_args args = { 0 };
-
- args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
- args.dmask = 0xf;
- args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
- args.opcode = ac_image_get_resinfo;
- args.lod = ctx->ac.i32_0;
- args.attributes = AC_FUNC_ATTR_READNONE;
-
- res = ac_build_image_opcode(&ctx->ac, &args);
-
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
-
- if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
- }
- if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
- res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
- ctx->ac.i32_1, "");
-
- }
- return res;
-}
-
-static void emit_membar(struct ac_llvm_context *ac,
- const nir_intrinsic_instr *instr)
-{
- unsigned wait_flags = 0;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_group_memory_barrier:
- wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- break;
- case nir_intrinsic_memory_barrier_atomic_counter:
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
- break;
- case nir_intrinsic_memory_barrier_shared:
- wait_flags = AC_WAIT_LGKM;
- break;
- default:
- break;
- }
-
- ac_build_waitcnt(ac, wait_flags);
-}
-
-void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
-{
- /* GFX6 only (thanks to a hw bug workaround):
- * The real barrier instruction isn’t needed, because an entire patch
- * always fits into a single wave.
- */
- if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
- ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
- return;
- }
- ac_build_s_barrier(ac);
-}
-
-static void emit_discard(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef cond;
-
- if (instr->intrinsic == nir_intrinsic_discard_if) {
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- get_src(ctx, instr->src[0]),
- ctx->ac.i32_0, "");
- } else {
- assert(instr->intrinsic == nir_intrinsic_discard);
- cond = ctx->ac.i1false;
- }
-
- ctx->abi->emit_kill(ctx->abi, cond);
-}
-
-static LLVMValueRef
-visit_load_local_invocation_index(struct ac_nir_context *ctx)
-{
- LLVMValueRef result;
- LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
- result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
- LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
-
- return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
-}
-
-static LLVMValueRef
-visit_load_subgroup_id(struct ac_nir_context *ctx)
-{
- if (ctx->stage == MESA_SHADER_COMPUTE) {
- LLVMValueRef result;
- result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
- LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
- return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
- } else {
- return LLVMConstInt(ctx->ac.i32, 0, false);
- }
-}
-
-static LLVMValueRef
-visit_load_num_subgroups(struct ac_nir_context *ctx)
-{
- if (ctx->stage == MESA_SHADER_COMPUTE) {
- return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
- LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
- } else {
- return LLVMConstInt(ctx->ac.i32, 1, false);
- }
-}
-
-static LLVMValueRef
-visit_first_invocation(struct ac_nir_context *ctx)
-{
- LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
- const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
-
- /* The second argument is whether cttz(0) should be defined, but we do not care. */
- LLVMValueRef args[] = {active_set, ctx->ac.i1false};
- LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
- ctx->ac.iN_wavemask, args, 2,
- AC_FUNC_ATTR_NOUNWIND |
- AC_FUNC_ATTR_READNONE);
-
- return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
-}
-
-static LLVMValueRef
-visit_load_shared(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef values[4], derived_ptr, index, ret;
-
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
-
- for (int chan = 0; chan < instr->num_components; chan++) {
- index = LLVMConstInt(ctx->ac.i32, chan, 0);
- derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
- values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
- }
-
- ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
- return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
-}
-
-static void
-visit_store_shared(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr)
-{
- LLVMValueRef derived_ptr, data,index;
- LLVMBuilderRef builder = ctx->ac.builder;
-
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]);
- LLVMValueRef src = get_src(ctx, instr->src[0]);
-
- int writemask = nir_intrinsic_write_mask(instr);
- for (int chan = 0; chan < 4; chan++) {
- if (!(writemask & (1 << chan))) {
- continue;
- }
- data = ac_llvm_extract_elem(&ctx->ac, src, chan);
- index = LLVMConstInt(ctx->ac.i32, chan, 0);
- derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
- LLVMBuildStore(builder, data, derived_ptr);
- }
-}
-
-static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
- const nir_intrinsic_instr *instr,
- LLVMValueRef ptr, int src_idx)
-{
- LLVMValueRef result;
- LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
-
- const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
- if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
- instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
- LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
- result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
- result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
- } else {
- LLVMAtomicRMWBinOp op;
- switch (instr->intrinsic) {
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_deref_atomic_add:
- op = LLVMAtomicRMWBinOpAdd;
- break;
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_deref_atomic_umin:
- op = LLVMAtomicRMWBinOpUMin;
- break;
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_deref_atomic_umax:
- op = LLVMAtomicRMWBinOpUMax;
- break;
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_deref_atomic_imin:
- op = LLVMAtomicRMWBinOpMin;
- break;
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_deref_atomic_imax:
- op = LLVMAtomicRMWBinOpMax;
- break;
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_deref_atomic_and:
- op = LLVMAtomicRMWBinOpAnd;
- break;
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_deref_atomic_or:
- op = LLVMAtomicRMWBinOpOr;
- break;
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_deref_atomic_xor:
- op = LLVMAtomicRMWBinOpXor;
- break;
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_deref_atomic_exchange:
- op = LLVMAtomicRMWBinOpXchg;
- break;
- default:
- return NULL;
- }
-
- result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
- }
- return result;
-}
-
-static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
-{
- LLVMValueRef values[2];
- LLVMValueRef pos[2];
-
- pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]);
- pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]);
-
- values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
- values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
- return ac_build_gather_values(&ctx->ac, values, 2);
-}
-
-static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx,
- enum glsl_interp_mode interp, unsigned location)
-{
- switch (interp) {
- case INTERP_MODE_FLAT:
- default:
- return NULL;
- case INTERP_MODE_SMOOTH:
- case INTERP_MODE_NONE:
- if (location == INTERP_CENTER)
- return ctx->abi->persp_center;
- else if (location == INTERP_CENTROID)
- return ctx->abi->persp_centroid;
- else if (location == INTERP_SAMPLE)
- return ctx->abi->persp_sample;
- break;
- case INTERP_MODE_NOPERSPECTIVE:
- if (location == INTERP_CENTER)
- return ctx->abi->linear_center;
- else if (location == INTERP_CENTROID)
- return ctx->abi->linear_centroid;
- else if (location == INTERP_SAMPLE)
- return ctx->abi->linear_sample;
- break;
- }
- return NULL;
-}
-
-static LLVMValueRef barycentric_center(struct ac_nir_context *ctx,
- unsigned mode)
-{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
-}
-
-static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx,
- unsigned mode,
- LLVMValueRef offset)
-{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
- LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
- LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
-
- LLVMValueRef ij_out[2];
- LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
-
- /*
- * take the I then J parameters, and the DDX/Y for it, and
- * calculate the IJ inputs for the interpolator.
- * temp1 = ddx * offset/sample.x + I;
- * interp_param.I = ddy * offset/sample.y + temp1;
- * temp1 = ddx * offset/sample.x + J;
- * interp_param.J = ddy * offset/sample.y + temp1;
- */
- for (unsigned i = 0; i < 2; i++) {
- LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
- LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
- LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, ix_ll, "");
- LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
- ddxy_out, iy_ll, "");
- LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
- interp_param, ix_ll, "");
- LLVMValueRef temp1, temp2;
-
- interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
- ctx->ac.f32, "");
-
- temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
- temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
-
- ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
- temp2, ctx->ac.i32, "");
- }
- interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
-}
-
-static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx,
- unsigned mode)
-{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
-}
-
-static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
- unsigned mode,
- LLVMValueRef sample_id)
-{
- if (ctx->abi->interp_at_sample_force_center)
- return barycentric_center(ctx, mode);
-
- LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
-
- /* fetch sample ID */
- LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
-
- LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
- src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
- LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
- src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
- LLVMValueRef coords[] = { src_c0, src_c1 };
- LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
-
- return barycentric_offset(ctx, mode, offset);
-}
-
-
-static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx,
- unsigned mode)
-{
- LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
- return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
-}
-
-static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
- LLVMValueRef interp_param,
- unsigned index, unsigned comp_start,
- unsigned num_components,
- unsigned bitsize)
-{
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
-
- interp_param = LLVMBuildBitCast(ctx->ac.builder,
- interp_param, ctx->ac.v2f32, "");
- LLVMValueRef i = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
- LLVMValueRef j = LLVMBuildExtractElement(
- ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
-
- LLVMValueRef values[4];
- assert(bitsize == 16 || bitsize == 32);
- for (unsigned comp = 0; comp < num_components; comp++) {
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
- if (bitsize == 16) {
- values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
- ctx->abi->prim_mask, i, j);
- } else {
- values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
- ctx->abi->prim_mask, i, j);
- }
- }
-
- return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
-}
-
-static LLVMValueRef load_flat_input(struct ac_nir_context *ctx,
- unsigned index, unsigned comp_start,
- unsigned num_components,
- unsigned bit_size)
-{
- LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
-
- LLVMValueRef values[8];
-
- /* Each component of a 64-bit value takes up two GL-level channels. */
- unsigned channels =
- bit_size == 64 ? num_components * 2 : num_components;
-
- for (unsigned chan = 0; chan < channels; chan++) {
- if (comp_start + chan > 4)
- attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (comp_start + chan) % 4, false);
- values[chan] = ac_build_fs_interp_mov(&ctx->ac,
- LLVMConstInt(ctx->ac.i32, 2, false),
- llvm_chan,
- attr_number,
- ctx->abi->prim_mask);
- values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
- values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
- bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
- }
-
- LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels);
- if (bit_size == 64) {
- LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 :
- LLVMVectorType(ctx->ac.i64, num_components);
- result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
- }
- return result;
-}
-
-static void visit_intrinsic(struct ac_nir_context *ctx,
- nir_intrinsic_instr *instr)
-{
- LLVMValueRef result = NULL;
-
- switch (instr->intrinsic) {
- case nir_intrinsic_ballot:
- result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
- if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
- result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
- break;
- case nir_intrinsic_read_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- break;
- case nir_intrinsic_read_first_invocation:
- result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
- break;
- case nir_intrinsic_load_subgroup_invocation:
- result = ac_get_thread_id(&ctx->ac);
- break;
- case nir_intrinsic_load_work_group_id: {
- LLVMValueRef values[3];
-
- for (int i = 0; i < 3; i++) {
- values[i] = ctx->abi->workgroup_ids[i] ?
- ctx->abi->workgroup_ids[i] : ctx->ac.i32_0;
- }
-
- result = ac_build_gather_values(&ctx->ac, values, 3);
- break;
- }
- case nir_intrinsic_load_base_vertex:
- case nir_intrinsic_load_first_vertex:
- result = ctx->abi->load_base_vertex(ctx->abi);
- break;
- case nir_intrinsic_load_local_group_size:
- result = ctx->abi->load_local_group_size(ctx->abi);
- break;
- case nir_intrinsic_load_vertex_id:
- result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id,
- ctx->abi->base_vertex, "");
- break;
- case nir_intrinsic_load_vertex_id_zero_base: {
- result = ctx->abi->vertex_id;
- break;
- }
- case nir_intrinsic_load_local_invocation_id: {
- result = ctx->abi->local_invocation_ids;
- break;
- }
- case nir_intrinsic_load_base_instance:
- result = ctx->abi->start_instance;
- break;
- case nir_intrinsic_load_draw_id:
- result = ctx->abi->draw_id;
- break;
- case nir_intrinsic_load_view_index:
- result = ctx->abi->view_index;
- break;
- case nir_intrinsic_load_invocation_id:
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
- } else {
- if (ctx->ac.chip_class >= GFX10) {
- result = LLVMBuildAnd(ctx->ac.builder,
- ctx->abi->gs_invocation_id,
- LLVMConstInt(ctx->ac.i32, 127, 0), "");
- } else {
- result = ctx->abi->gs_invocation_id;
- }
- }
- break;
- case nir_intrinsic_load_primitive_id:
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- result = ctx->abi->gs_prim_id;
- } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- result = ctx->abi->tcs_patch_id;
- } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- result = ctx->abi->tes_patch_id;
- } else
- fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
- break;
- case nir_intrinsic_load_sample_id:
- result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
- break;
- case nir_intrinsic_load_sample_pos:
- result = load_sample_pos(ctx);
- break;
- case nir_intrinsic_load_sample_mask_in:
- result = ctx->abi->load_sample_mask_in(ctx->abi);
- break;
- case nir_intrinsic_load_frag_coord: {
- LLVMValueRef values[4] = {
- ctx->abi->frag_pos[0],
- ctx->abi->frag_pos[1],
- ctx->abi->frag_pos[2],
- ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
- };
- result = ac_to_integer(&ctx->ac,
- ac_build_gather_values(&ctx->ac, values, 4));
- break;
- }
- case nir_intrinsic_load_layer_id:
- result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
- break;
- case nir_intrinsic_load_front_face:
- result = ctx->abi->front_face;
- break;
- case nir_intrinsic_load_helper_invocation:
- result = ac_build_load_helper_invocation(&ctx->ac);
- break;
- case nir_intrinsic_load_color0:
- result = ctx->abi->color0;
- break;
- case nir_intrinsic_load_color1:
- result = ctx->abi->color1;
- break;
- case nir_intrinsic_load_user_data_amd:
- assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
- result = ctx->abi->user_data;
- break;
- case nir_intrinsic_load_instance_id:
- result = ctx->abi->instance_id;
- break;
- case nir_intrinsic_load_num_work_groups:
- result = ctx->abi->num_work_groups;
- break;
- case nir_intrinsic_load_local_invocation_index:
- result = visit_load_local_invocation_index(ctx);
- break;
- case nir_intrinsic_load_subgroup_id:
- result = visit_load_subgroup_id(ctx);
- break;
- case nir_intrinsic_load_num_subgroups:
- result = visit_load_num_subgroups(ctx);
- break;
- case nir_intrinsic_first_invocation:
- result = visit_first_invocation(ctx);
- break;
- case nir_intrinsic_load_push_constant:
- result = visit_load_push_constant(ctx, instr);
- break;
- case nir_intrinsic_vulkan_resource_index: {
- LLVMValueRef index = get_src(ctx, instr->src[0]);
- unsigned desc_set = nir_intrinsic_desc_set(instr);
- unsigned binding = nir_intrinsic_binding(instr);
-
- result = ctx->abi->load_resource(ctx->abi, index, desc_set,
- binding);
- break;
- }
- case nir_intrinsic_vulkan_resource_reindex:
- result = visit_vulkan_resource_reindex(ctx, instr);
- break;
- case nir_intrinsic_store_ssbo:
- visit_store_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ssbo:
- result = visit_load_buffer(ctx, instr);
- break;
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- result = visit_atomic_ssbo(ctx, instr);
- break;
- case nir_intrinsic_load_ubo:
- result = visit_load_ubo_buffer(ctx, instr);
- break;
- case nir_intrinsic_get_buffer_size:
- result = visit_get_buffer_size(ctx, instr);
- break;
- case nir_intrinsic_load_deref:
- result = visit_load_var(ctx, instr);
- break;
- case nir_intrinsic_store_deref:
- visit_store_var(ctx, instr);
- break;
- case nir_intrinsic_load_shared:
- result = visit_load_shared(ctx, instr);
- break;
- case nir_intrinsic_store_shared:
- visit_store_shared(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_samples:
- case nir_intrinsic_image_deref_samples:
- result = visit_image_samples(ctx, instr);
- break;
- case nir_intrinsic_bindless_image_load:
- result = visit_image_load(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_load:
- result = visit_image_load(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_store:
- visit_image_store(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_store:
- visit_image_store(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_atomic_add:
- case nir_intrinsic_bindless_image_atomic_imin:
- case nir_intrinsic_bindless_image_atomic_umin:
- case nir_intrinsic_bindless_image_atomic_imax:
- case nir_intrinsic_bindless_image_atomic_umax:
- case nir_intrinsic_bindless_image_atomic_and:
- case nir_intrinsic_bindless_image_atomic_or:
- case nir_intrinsic_bindless_image_atomic_xor:
- case nir_intrinsic_bindless_image_atomic_exchange:
- case nir_intrinsic_bindless_image_atomic_comp_swap:
- case nir_intrinsic_bindless_image_atomic_inc_wrap:
- case nir_intrinsic_bindless_image_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- case nir_intrinsic_image_deref_atomic_inc_wrap:
- case nir_intrinsic_image_deref_atomic_dec_wrap:
- result = visit_image_atomic(ctx, instr, false);
- break;
- case nir_intrinsic_bindless_image_size:
- result = visit_image_size(ctx, instr, true);
- break;
- case nir_intrinsic_image_deref_size:
- result = visit_image_size(ctx, instr, false);
- break;
- case nir_intrinsic_shader_clock:
- result = ac_build_shader_clock(&ctx->ac);
- break;
- case nir_intrinsic_discard:
- case nir_intrinsic_discard_if:
- emit_discard(ctx, instr);
- break;
- case nir_intrinsic_memory_barrier:
- case nir_intrinsic_group_memory_barrier:
- case nir_intrinsic_memory_barrier_atomic_counter:
- case nir_intrinsic_memory_barrier_buffer:
- case nir_intrinsic_memory_barrier_image:
- case nir_intrinsic_memory_barrier_shared:
- emit_membar(&ctx->ac, instr);
- break;
- case nir_intrinsic_barrier:
- ac_emit_barrier(&ctx->ac, ctx->stage);
- break;
- case nir_intrinsic_shared_atomic_add:
- case nir_intrinsic_shared_atomic_imin:
- case nir_intrinsic_shared_atomic_umin:
- case nir_intrinsic_shared_atomic_imax:
- case nir_intrinsic_shared_atomic_umax:
- case nir_intrinsic_shared_atomic_and:
- case nir_intrinsic_shared_atomic_or:
- case nir_intrinsic_shared_atomic_xor:
- case nir_intrinsic_shared_atomic_exchange:
- case nir_intrinsic_shared_atomic_comp_swap: {
- LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_deref_atomic_add:
- case nir_intrinsic_deref_atomic_imin:
- case nir_intrinsic_deref_atomic_umin:
- case nir_intrinsic_deref_atomic_imax:
- case nir_intrinsic_deref_atomic_umax:
- case nir_intrinsic_deref_atomic_and:
- case nir_intrinsic_deref_atomic_or:
- case nir_intrinsic_deref_atomic_xor:
- case nir_intrinsic_deref_atomic_exchange:
- case nir_intrinsic_deref_atomic_comp_swap: {
- LLVMValueRef ptr = get_src(ctx, instr->src[0]);
- result = visit_var_atomic(ctx, instr, ptr, 1);
- break;
- }
- case nir_intrinsic_load_barycentric_pixel:
- result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_centroid:
- result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_sample:
- result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
- break;
- case nir_intrinsic_load_barycentric_at_offset: {
- LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
- result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
- break;
- }
- case nir_intrinsic_load_barycentric_at_sample: {
- LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
- result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
- break;
- }
- case nir_intrinsic_load_interpolated_input: {
- /* We assume any indirect loads have been lowered away */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
- assert(offset);
- assert(offset[0].i32 == 0);
-
- LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
- unsigned index = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- result = load_interpolated_input(ctx, interp_param, index,
- component,
- instr->dest.ssa.num_components,
- instr->dest.ssa.bit_size);
- break;
- }
- case nir_intrinsic_load_input: {
- /* We only lower inputs for fragment shaders ATM */
- ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[0]);
- assert(offset);
- assert(offset[0].i32 == 0);
-
- unsigned index = nir_intrinsic_base(instr);
- unsigned component = nir_intrinsic_component(instr);
- result = load_flat_input(ctx, index, component,
- instr->dest.ssa.num_components,
- instr->dest.ssa.bit_size);
- break;
- }
- case nir_intrinsic_emit_vertex:
- ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
- break;
- case nir_intrinsic_end_primitive:
- ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
- break;
- case nir_intrinsic_load_tess_coord:
- result = ctx->abi->load_tess_coord(ctx->abi);
- break;
- case nir_intrinsic_load_tess_level_outer:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
- break;
- case nir_intrinsic_load_tess_level_inner:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
- break;
- case nir_intrinsic_load_tess_level_outer_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
- break;
- case nir_intrinsic_load_tess_level_inner_default:
- result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
- break;
- case nir_intrinsic_load_patch_vertices_in:
- result = ctx->abi->load_patch_vertices_in(ctx->abi);
- break;
- case nir_intrinsic_vote_all: {
- LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_vote_any: {
- LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
- result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
- break;
- }
- case nir_intrinsic_shuffle:
- result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]));
- break;
- case nir_intrinsic_reduce:
- result = ac_build_reduce(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0],
- instr->const_index[1]);
- break;
- case nir_intrinsic_inclusive_scan:
- result = ac_build_inclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_exclusive_scan:
- result = ac_build_exclusive_scan(&ctx->ac,
- get_src(ctx, instr->src[0]),
- instr->const_index[0]);
- break;
- case nir_intrinsic_quad_broadcast: {
- unsigned lane = nir_src_as_uint(instr->src[1]);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- lane, lane, lane, lane);
- break;
- }
- case nir_intrinsic_quad_swap_horizontal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
- break;
- case nir_intrinsic_quad_swap_vertical:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
- break;
- case nir_intrinsic_quad_swap_diagonal:
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
- break;
- case nir_intrinsic_quad_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
- mask & 0x3, (mask >> 2) & 0x3,
- (mask >> 4) & 0x3, (mask >> 6) & 0x3);
- break;
- }
- case nir_intrinsic_masked_swizzle_amd: {
- uint32_t mask = nir_intrinsic_swizzle_mask(instr);
- result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
- break;
- }
- case nir_intrinsic_write_invocation_amd:
- result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
- get_src(ctx, instr->src[1]),
- get_src(ctx, instr->src[2]));
- break;
- case nir_intrinsic_mbcnt_amd:
- result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
- break;
- case nir_intrinsic_load_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- case nir_intrinsic_store_scratch: {
- LLVMValueRef offset = get_src(ctx, instr->src[1]);
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(comp_type, addr_space), "");
- LLVMValueRef src = get_src(ctx, instr->src[0]);
- unsigned wrmask = nir_intrinsic_write_mask(instr);
- while (wrmask) {
- int start, count;
- u_bit_scan_consecutive_range(&wrmask, &start, &count);
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
- LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
- LLVMTypeRef vec_type =
- count == 1 ? comp_type : LLVMVectorType(comp_type, count);
- offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
- offset_ptr,
- LLVMPointerType(vec_type, addr_space),
- "");
- LLVMValueRef offset_src =
- ac_extract_components(&ctx->ac, src, start, count);
- LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
- }
- break;
- }
- case nir_intrinsic_load_constant: {
- LLVMValueRef offset = get_src(ctx, instr->src[0]);
- LLVMValueRef base = LLVMConstInt(ctx->ac.i32,
- nir_intrinsic_base(instr),
- false);
- offset = LLVMBuildAdd(ctx->ac.builder, offset, base, "");
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
- offset);
- LLVMTypeRef comp_type =
- LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
- LLVMTypeRef vec_type =
- instr->dest.ssa.num_components == 1 ? comp_type :
- LLVMVectorType(comp_type, instr->dest.ssa.num_components);
- unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- LLVMPointerType(vec_type, addr_space), "");
- result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
- break;
- }
- default:
- fprintf(stderr, "Unknown intrinsic: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- break;
- }
- if (result) {
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- }
-}
-
-static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
- unsigned base_index,
- unsigned constant_index,
- LLVMValueRef dynamic_index)
-{
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
- LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
- LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
-
- /* Bindless uniforms are 64bit so multiple index by 8 */
- index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
- offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
-
- LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
-
- LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
- NULL, 0, 0, true, true);
-
- return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
-}
-
-static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
- nir_deref_instr *deref_instr,
- enum ac_descriptor_type desc_type,
- const nir_instr *instr,
- bool image, bool write)
-{
- LLVMValueRef index = NULL;
- unsigned constant_index = 0;
- unsigned descriptor_set;
- unsigned base_index;
- bool bindless = false;
-
- if (!deref_instr) {
- descriptor_set = 0;
- if (image) {
- nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
- base_index = 0;
- bindless = true;
- index = get_src(ctx, img_instr->src[0]);
- } else {
- nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
- int sampSrcIdx = nir_tex_instr_src_index(tex_instr,
- nir_tex_src_sampler_handle);
- if (sampSrcIdx != -1) {
- base_index = 0;
- bindless = true;
- index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
- } else {
- assert(tex_instr && !image);
- base_index = tex_instr->sampler_index;
- }
- }
- } else {
- while(deref_instr->deref_type != nir_deref_type_var) {
- if (deref_instr->deref_type == nir_deref_type_array) {
- unsigned array_size = glsl_get_aoa_size(deref_instr->type);
- if (!array_size)
- array_size = 1;
-
- if (nir_src_is_const(deref_instr->arr.index)) {
- constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
- } else {
- LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
-
- indirect = LLVMBuildMul(ctx->ac.builder, indirect,
- LLVMConstInt(ctx->ac.i32, array_size, false), "");
-
- if (!index)
- index = indirect;
- else
- index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
- }
-
- deref_instr = nir_src_as_deref(deref_instr->parent);
- } else if (deref_instr->deref_type == nir_deref_type_struct) {
- unsigned sidx = deref_instr->strct.index;
- deref_instr = nir_src_as_deref(deref_instr->parent);
- constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
- } else {
- unreachable("Unsupported deref type");
- }
- }
- descriptor_set = deref_instr->var->data.descriptor_set;
-
- if (deref_instr->var->data.bindless) {
- /* For now just assert on unhandled variable types */
- assert(deref_instr->var->data.mode == nir_var_uniform);
-
- base_index = deref_instr->var->data.driver_location;
- bindless = true;
-
- index = index ? index : ctx->ac.i32_0;
- index = get_bindless_index_from_uniform(ctx, base_index,
- constant_index, index);
- } else
- base_index = deref_instr->var->data.binding;
- }
-
- return ctx->abi->load_sampler_desc(ctx->abi,
- descriptor_set,
- base_index,
- constant_index, index,
- desc_type, image, write, bindless);
-}
-
-/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
- *
- * GFX6-GFX7:
- * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
- * filtering manually. The driver sets img7 to a mask clearing
- * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
- * s_and_b32 samp0, samp0, img7
- *
- * GFX8:
- * The ANISO_OVERRIDE sampler field enables this fix in TA.
- */
-static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
- LLVMValueRef res, LLVMValueRef samp)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef img7, samp0;
-
- if (ctx->ac.chip_class >= GFX8)
- return samp;
-
- img7 = LLVMBuildExtractElement(builder, res,
- LLVMConstInt(ctx->ac.i32, 7, 0), "");
- samp0 = LLVMBuildExtractElement(builder, samp,
- LLVMConstInt(ctx->ac.i32, 0, 0), "");
- samp0 = LLVMBuildAnd(builder, samp0, img7, "");
- return LLVMBuildInsertElement(builder, samp, samp0,
- LLVMConstInt(ctx->ac.i32, 0, 0), "");
-}
-
-static void tex_fetch_ptrs(struct ac_nir_context *ctx,
- nir_tex_instr *instr,
- LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
- LLVMValueRef *fmask_ptr)
-{
- nir_deref_instr *texture_deref_instr = NULL;
- nir_deref_instr *sampler_deref_instr = NULL;
- int plane = -1;
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_texture_deref:
- texture_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- case nir_tex_src_sampler_deref:
- sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
- break;
- case nir_tex_src_plane:
- plane = nir_src_as_int(instr->src[i].src);
- break;
- default:
- break;
- }
- }
-
- if (!sampler_deref_instr)
- sampler_deref_instr = texture_deref_instr;
-
- enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
-
- if (plane >= 0) {
- assert(instr->op != nir_texop_txf_ms &&
- instr->op != nir_texop_samples_identical);
- assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
-
- main_descriptor = AC_DESC_PLANE_0 + plane;
- }
-
- *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, false, false);
-
- if (samp_ptr) {
- *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, false, false);
- if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
- *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
- }
- if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
- instr->op == nir_texop_samples_identical))
- *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr, false, false);
-}
-
-static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
- LLVMValueRef coord)
-{
- coord = ac_to_float(ctx, coord);
- coord = ac_build_round(ctx, coord);
- coord = ac_to_integer(ctx, coord);
- return coord;
-}
-
-static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
-{
- LLVMValueRef result = NULL;
- struct ac_image_args args = { 0 };
- LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
- LLVMValueRef ddx = NULL, ddy = NULL;
- unsigned offset_src = 0;
-
- tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
-
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_coord: {
- LLVMValueRef coord = get_src(ctx, instr->src[i].src);
- for (unsigned chan = 0; chan < instr->coord_components; ++chan)
- args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
- break;
- }
- case nir_tex_src_projector:
- break;
- case nir_tex_src_comparator:
- if (instr->is_shadow) {
- args.compare = get_src(ctx, instr->src[i].src);
- args.compare = ac_to_float(&ctx->ac, args.compare);
- }
- break;
- case nir_tex_src_offset:
- args.offset = get_src(ctx, instr->src[i].src);
- offset_src = i;
- break;
- case nir_tex_src_bias:
- if (instr->op == nir_texop_txb)
- args.bias = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_lod: {
- if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
- args.level_zero = true;
- else
- args.lod = get_src(ctx, instr->src[i].src);
- break;
- }
- case nir_tex_src_ms_index:
- sample_index = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_ms_mcs:
- break;
- case nir_tex_src_ddx:
- ddx = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_ddy:
- ddy = get_src(ctx, instr->src[i].src);
- break;
- case nir_tex_src_texture_offset:
- case nir_tex_src_sampler_offset:
- case nir_tex_src_plane:
- default:
- break;
- }
- }
-
- if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- result = get_buffer_size(ctx, args.resource, true);
- goto write_result;
- }
-
- if (instr->op == nir_texop_texture_samples) {
- LLVMValueRef res, samples, is_msaa;
- res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
- samples = LLVMBuildExtractElement(ctx->ac.builder, res,
- LLVMConstInt(ctx->ac.i32, 3, false), "");
- is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 28, false), "");
- is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
- LLVMConstInt(ctx->ac.i32, 0xe, false), "");
- is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
- LLVMConstInt(ctx->ac.i32, 0xe, false), "");
-
- samples = LLVMBuildLShr(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 16, false), "");
- samples = LLVMBuildAnd(ctx->ac.builder, samples,
- LLVMConstInt(ctx->ac.i32, 0xf, false), "");
- samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
- samples, "");
- samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
- ctx->ac.i32_1, "");
- result = samples;
- goto write_result;
- }
-
- if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
- LLVMValueRef offset[3], pack;
- for (unsigned chan = 0; chan < 3; ++chan)
- offset[chan] = ctx->ac.i32_0;
-
- unsigned num_components = ac_get_llvm_num_components(args.offset);
- for (unsigned chan = 0; chan < num_components; chan++) {
- offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
- offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
- if (chan)
- offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
- LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
- }
- pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
- pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
- args.offset = pack;
- }
-
- /* Section 8.23.1 (Depth Texture Comparison Mode) of the
- * OpenGL 4.5 spec says:
- *
- * "If the texture’s internal format indicates a fixed-point
- * depth texture, then D_t and D_ref are clamped to the
- * range [0, 1]; otherwise no clamping is performed."
- *
- * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
- * so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
- * an explicitly clamped 32-bit float format.
- */
- if (args.compare &&
- ctx->ac.chip_class >= GFX8 &&
- ctx->ac.chip_class <= GFX9 &&
- ctx->abi->clamp_shadow_reference) {
- LLVMValueRef upgraded, clamped;
-
- upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
- LLVMConstInt(ctx->ac.i32, 3, false), "");
- upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
- LLVMConstInt(ctx->ac.i32, 29, false), "");
- upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
- clamped = ac_build_clamp(&ctx->ac, args.compare);
- args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
- args.compare, "");
- }
-
- /* pack derivatives */
- if (ddx || ddy) {
- int num_src_deriv_channels, num_dest_deriv_channels;
- switch (instr->sampler_dim) {
- case GLSL_SAMPLER_DIM_3D:
- case GLSL_SAMPLER_DIM_CUBE:
- num_src_deriv_channels = 3;
- num_dest_deriv_channels = 3;
- break;
- case GLSL_SAMPLER_DIM_2D:
- default:
- num_src_deriv_channels = 2;
- num_dest_deriv_channels = 2;
- break;
- case GLSL_SAMPLER_DIM_1D:
- num_src_deriv_channels = 1;
- if (ctx->ac.chip_class == GFX9) {
- num_dest_deriv_channels = 2;
- } else {
- num_dest_deriv_channels = 1;
- }
- break;
- }
-
- for (unsigned i = 0; i < num_src_deriv_channels; i++) {
- args.derivs[i] = ac_to_float(&ctx->ac,
- ac_llvm_extract_elem(&ctx->ac, ddx, i));
- args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
- ac_llvm_extract_elem(&ctx->ac, ddy, i));
- }
- for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
- args.derivs[i] = ctx->ac.f32_0;
- args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
- }
- }
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
- for (unsigned chan = 0; chan < instr->coord_components; chan++)
- args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
- if (instr->coord_components == 3)
- args.coords[3] = LLVMGetUndef(ctx->ac.f32);
- ac_prepare_cube_coords(&ctx->ac,
- instr->op == nir_texop_txd, instr->is_array,
- instr->op == nir_texop_lod, args.coords, args.derivs);
- }
-
- /* Texture coordinates fixups */
- if (instr->coord_components > 1 &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->is_array &&
- instr->op != nir_texop_txf) {
- args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
- }
-
- if (instr->coord_components > 2 &&
- (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
- instr->is_array &&
- instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
- args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
- }
-
- if (ctx->ac.chip_class == GFX9 &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->op != nir_texop_lod) {
- LLVMValueRef filler;
- if (instr->op == nir_texop_txf)
- filler = ctx->ac.i32_0;
- else
- filler = LLVMConstReal(ctx->ac.f32, 0.5);
-
- if (instr->is_array)
- args.coords[2] = args.coords[1];
- args.coords[1] = filler;
- }
-
- /* Pack sample index */
- if (instr->op == nir_texop_txf_ms && sample_index)
- args.coords[instr->coord_components] = sample_index;
-
- if (instr->op == nir_texop_samples_identical) {
- struct ac_image_args txf_args = { 0 };
- memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
-
- txf_args.dmask = 0xf;
- txf_args.resource = fmask_ptr;
- txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
- result = build_tex_intrinsic(ctx, instr, &txf_args);
-
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
- goto write_result;
- }
-
- if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
- instr->op != nir_texop_txs) {
- unsigned sample_chan = instr->is_array ? 3 : 2;
- args.coords[sample_chan] = adjust_sample_index_using_fmask(
- &ctx->ac, args.coords[0], args.coords[1],
- instr->is_array ? args.coords[2] : NULL,
- args.coords[sample_chan], fmask_ptr);
- }
-
- if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
- int num_offsets = instr->src[offset_src].src.ssa->num_components;
- num_offsets = MIN2(num_offsets, instr->coord_components);
- for (unsigned i = 0; i < num_offsets; ++i) {
- args.coords[i] = LLVMBuildAdd(
- ctx->ac.builder, args.coords[i],
- LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), "");
- }
- args.offset = NULL;
- }
-
- /* DMASK was repurposed for GATHER4. 4 components are always
- * returned and DMASK works like a swizzle - it selects
- * the component to fetch. The only valid DMASK values are
- * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
- * (red,red,red,red) etc.) The ISA document doesn't mention
- * this.
- */
- args.dmask = 0xf;
- if (instr->op == nir_texop_tg4) {
- if (instr->is_shadow)
- args.dmask = 1;
- else
- args.dmask = 1 << instr->component;
- }
-
- if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
- args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
- args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
- }
- result = build_tex_intrinsic(ctx, instr, &args);
-
- if (instr->op == nir_texop_query_levels)
- result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
- else if (instr->is_shadow && instr->is_new_style_shadow &&
- instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
- instr->op != nir_texop_tg4)
- result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
- else if (instr->op == nir_texop_txs &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
- instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
- LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
- LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
- z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
- result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
- } else if (ctx->ac.chip_class == GFX9 &&
- instr->op == nir_texop_txs &&
- instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
- instr->is_array) {
- LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
- LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
- result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
- ctx->ac.i32_1, "");
- } else if (instr->dest.ssa.num_components != 4)
- result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
-
-write_result:
- if (result) {
- assert(instr->dest.is_ssa);
- result = ac_to_integer(&ctx->ac, result);
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- }
-}
-
-
-static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
-{
- LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
- LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
-
- ctx->ssa_defs[instr->dest.ssa.index] = result;
- _mesa_hash_table_insert(ctx->phis, instr, result);
-}
-
-static void visit_post_phi(struct ac_nir_context *ctx,
- nir_phi_instr *instr,
- LLVMValueRef llvm_phi)
-{
- nir_foreach_phi_src(src, instr) {
- LLVMBasicBlockRef block = get_block(ctx, src->pred);
- LLVMValueRef llvm_src = get_src(ctx, src->src);
-
- LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
- }
-}
-
-static void phi_post_pass(struct ac_nir_context *ctx)
-{
- hash_table_foreach(ctx->phis, entry) {
- visit_post_phi(ctx, (nir_phi_instr*)entry->key,
- (LLVMValueRef)entry->data);
- }
-}
-
-
-static void visit_ssa_undef(struct ac_nir_context *ctx,
- const nir_ssa_undef_instr *instr)
-{
- unsigned num_components = instr->def.num_components;
- LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
- LLVMValueRef undef;
-
- if (num_components == 1)
- undef = LLVMGetUndef(type);
- else {
- undef = LLVMGetUndef(LLVMVectorType(type, num_components));
- }
- ctx->ssa_defs[instr->def.index] = undef;
-}
-
-static void visit_jump(struct ac_llvm_context *ctx,
- const nir_jump_instr *instr)
-{
- switch (instr->type) {
- case nir_jump_break:
- ac_build_break(ctx);
- break;
- case nir_jump_continue:
- ac_build_continue(ctx);
- break;
- default:
- fprintf(stderr, "Unknown NIR jump instr: ");
- nir_print_instr(&instr->instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
-}
-
-static LLVMTypeRef
-glsl_base_to_llvm_type(struct ac_llvm_context *ac,
- enum glsl_base_type type)
-{
- switch (type) {
- case GLSL_TYPE_INT:
- case GLSL_TYPE_UINT:
- case GLSL_TYPE_BOOL:
- case GLSL_TYPE_SUBROUTINE:
- return ac->i32;
- case GLSL_TYPE_INT8:
- case GLSL_TYPE_UINT8:
- return ac->i8;
- case GLSL_TYPE_INT16:
- case GLSL_TYPE_UINT16:
- return ac->i16;
- case GLSL_TYPE_FLOAT:
- return ac->f32;
- case GLSL_TYPE_FLOAT16:
- return ac->f16;
- case GLSL_TYPE_INT64:
- case GLSL_TYPE_UINT64:
- return ac->i64;
- case GLSL_TYPE_DOUBLE:
- return ac->f64;
- default:
- unreachable("unknown GLSL type");
- }
-}
-
-static LLVMTypeRef
-glsl_to_llvm_type(struct ac_llvm_context *ac,
- const struct glsl_type *type)
-{
- if (glsl_type_is_scalar(type)) {
- return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
- }
-
- if (glsl_type_is_vector(type)) {
- return LLVMVectorType(
- glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
- glsl_get_vector_elements(type));
- }
-
- if (glsl_type_is_matrix(type)) {
- return LLVMArrayType(
- glsl_to_llvm_type(ac, glsl_get_column_type(type)),
- glsl_get_matrix_columns(type));
- }
-
- if (glsl_type_is_array(type)) {
- return LLVMArrayType(
- glsl_to_llvm_type(ac, glsl_get_array_element(type)),
- glsl_get_length(type));
- }
-
- assert(glsl_type_is_struct_or_ifc(type));
-
- LLVMTypeRef member_types[glsl_get_length(type)];
-
- for (unsigned i = 0; i < glsl_get_length(type); i++) {
- member_types[i] =
- glsl_to_llvm_type(ac,
- glsl_get_struct_field(type, i));
- }
-
- return LLVMStructTypeInContext(ac->context, member_types,
- glsl_get_length(type), false);
-}
-
-static void visit_deref(struct ac_nir_context *ctx,
- nir_deref_instr *instr)
-{
- if (instr->mode != nir_var_mem_shared &&
- instr->mode != nir_var_mem_global)
- return;
-
- LLVMValueRef result = NULL;
- switch(instr->deref_type) {
- case nir_deref_type_var: {
- struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
- result = entry->data;
- break;
- }
- case nir_deref_type_struct:
- if (instr->mode == nir_var_mem_global) {
- nir_deref_instr *parent = nir_deref_instr_parent(instr);
- uint64_t offset = glsl_get_struct_field_offset(parent->type,
- instr->strct.index);
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
- LLVMConstInt(ctx->ac.i32, offset, 0));
- } else {
- result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
- LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
- }
- break;
- case nir_deref_type_array:
- if (instr->mode == nir_var_mem_global) {
- nir_deref_instr *parent = nir_deref_instr_parent(instr);
- unsigned stride = glsl_get_explicit_stride(parent->type);
-
- if ((glsl_type_is_matrix(parent->type) &&
- glsl_matrix_type_is_row_major(parent->type)) ||
- (glsl_type_is_vector(parent->type) && stride == 0))
- stride = type_scalar_size_bytes(parent->type);
-
- assert(stride > 0);
- LLVMValueRef index = get_src(ctx, instr->arr.index);
- if (LLVMTypeOf(index) != ctx->ac.i64)
- index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
-
- LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
-
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
- } else {
- result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
- get_src(ctx, instr->arr.index));
- }
- break;
- case nir_deref_type_ptr_as_array:
- if (instr->mode == nir_var_mem_global) {
- unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
-
- LLVMValueRef index = get_src(ctx, instr->arr.index);
- if (LLVMTypeOf(index) != ctx->ac.i64)
- index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
-
- LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
-
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
- } else {
- result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
- get_src(ctx, instr->arr.index));
- }
- break;
- case nir_deref_type_cast: {
- result = get_src(ctx, instr->parent);
-
- /* We can't use the structs from LLVM because the shader
- * specifies its own offsets. */
- LLVMTypeRef pointee_type = ctx->ac.i8;
- if (instr->mode == nir_var_mem_shared)
- pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
-
- unsigned address_space;
-
- switch(instr->mode) {
- case nir_var_mem_shared:
- address_space = AC_ADDR_SPACE_LDS;
- break;
- case nir_var_mem_global:
- address_space = AC_ADDR_SPACE_GLOBAL;
- break;
- default:
- unreachable("Unhandled address space");
- }
-
- LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
-
- if (LLVMTypeOf(result) != type) {
- if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
- result = LLVMBuildBitCast(ctx->ac.builder, result,
- type, "");
- } else {
- result = LLVMBuildIntToPtr(ctx->ac.builder, result,
- type, "");
- }
- }
- break;
- }
- default:
- unreachable("Unhandled deref_instr deref type");
- }
-
- ctx->ssa_defs[instr->dest.ssa.index] = result;
-}
-
-static void visit_cf_list(struct ac_nir_context *ctx,
- struct exec_list *list);
-
-static void visit_block(struct ac_nir_context *ctx, nir_block *block)
-{
- nir_foreach_instr(instr, block)
- {
- switch (instr->type) {
- case nir_instr_type_alu:
- visit_alu(ctx, nir_instr_as_alu(instr));
- break;
- case nir_instr_type_load_const:
- visit_load_const(ctx, nir_instr_as_load_const(instr));
- break;
- case nir_instr_type_intrinsic:
- visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
- break;
- case nir_instr_type_tex:
- visit_tex(ctx, nir_instr_as_tex(instr));
- break;
- case nir_instr_type_phi:
- visit_phi(ctx, nir_instr_as_phi(instr));
- break;
- case nir_instr_type_ssa_undef:
- visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
- break;
- case nir_instr_type_jump:
- visit_jump(&ctx->ac, nir_instr_as_jump(instr));
- break;
- case nir_instr_type_deref:
- visit_deref(ctx, nir_instr_as_deref(instr));
- break;
- default:
- fprintf(stderr, "Unknown NIR instr type: ");
- nir_print_instr(instr, stderr);
- fprintf(stderr, "\n");
- abort();
- }
- }
-
- _mesa_hash_table_insert(ctx->defs, block,
- LLVMGetInsertBlock(ctx->ac.builder));
-}
-
-static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
-{
- LLVMValueRef value = get_src(ctx, if_stmt->condition);
-
- nir_block *then_block =
- (nir_block *) exec_list_get_head(&if_stmt->then_list);
-
- ac_build_uif(&ctx->ac, value, then_block->index);
-
- visit_cf_list(ctx, &if_stmt->then_list);
-
- if (!exec_list_is_empty(&if_stmt->else_list)) {
- nir_block *else_block =
- (nir_block *) exec_list_get_head(&if_stmt->else_list);
-
- ac_build_else(&ctx->ac, else_block->index);
- visit_cf_list(ctx, &if_stmt->else_list);
- }
-
- ac_build_endif(&ctx->ac, then_block->index);
-}
-
-static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
-{
- nir_block *first_loop_block =
- (nir_block *) exec_list_get_head(&loop->body);
-
- ac_build_bgnloop(&ctx->ac, first_loop_block->index);
-
- visit_cf_list(ctx, &loop->body);
-
- ac_build_endloop(&ctx->ac, first_loop_block->index);
-}
-
-static void visit_cf_list(struct ac_nir_context *ctx,
- struct exec_list *list)
-{
- foreach_list_typed(nir_cf_node, node, node, list)
- {
- switch (node->type) {
- case nir_cf_node_block:
- visit_block(ctx, nir_cf_node_as_block(node));
- break;
-
- case nir_cf_node_if:
- visit_if(ctx, nir_cf_node_as_if(node));
- break;
-
- case nir_cf_node_loop:
- visit_loop(ctx, nir_cf_node_as_loop(node));
- break;
-
- default:
- assert(0);
- }
- }
-}
-
-void
-ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
- struct ac_shader_abi *abi,
- struct nir_shader *nir,
- struct nir_variable *variable,
- gl_shader_stage stage)
-{
- unsigned output_loc = variable->data.driver_location / 4;
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
-
- /* tess ctrl has it's own load/store paths for outputs */
- if (stage == MESA_SHADER_TESS_CTRL)
- return;
-
- if (stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_TESS_EVAL ||
- stage == MESA_SHADER_GEOMETRY) {
- int idx = variable->data.location + variable->data.index;
- if (idx == VARYING_SLOT_CLIP_DIST0) {
- int length = nir->info.clip_distance_array_size +
- nir->info.cull_distance_array_size;
-
- if (length > 4)
- attrib_count = 2;
- else
- attrib_count = 1;
- }
- }
-
- bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
- LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
- for (unsigned i = 0; i < attrib_count; ++i) {
- for (unsigned chan = 0; chan < 4; chan++) {
- abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
- ac_build_alloca_undef(ctx, type, "");
- }
- }
-}
-
-static void
-setup_locals(struct ac_nir_context *ctx,
- struct nir_function *func)
-{
- int i, j;
- ctx->num_locals = 0;
- nir_foreach_variable(variable, &func->impl->locals) {
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
- variable->data.driver_location = ctx->num_locals * 4;
- variable->data.location_frac = 0;
- ctx->num_locals += attrib_count;
- }
- ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
- if (!ctx->locals)
- return;
-
- for (i = 0; i < ctx->num_locals; i++) {
- for (j = 0; j < 4; j++) {
- ctx->locals[i * 4 + j] =
- ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
- }
- }
-}
-
-static void
-setup_scratch(struct ac_nir_context *ctx,
- struct nir_shader *shader)
-{
- if (shader->scratch_size == 0)
- return;
-
- ctx->scratch = ac_build_alloca_undef(&ctx->ac,
- LLVMArrayType(ctx->ac.i8, shader->scratch_size),
- "scratch");
-}
-
-static void
-setup_constant_data(struct ac_nir_context *ctx,
- struct nir_shader *shader)
-{
- if (!shader->constant_data)
- return;
-
- LLVMValueRef data =
- LLVMConstStringInContext(ctx->ac.context,
- shader->constant_data,
- shader->constant_data_size,
- true);
- LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
-
- /* We want to put the constant data in the CONST address space so that
- * we can use scalar loads. However, LLVM versions before 10 put these
- * variables in the same section as the code, which is unacceptable
- * for RadeonSI as it needs to relocate all the data sections after
- * the code sections. See https://reviews.llvm.org/D65813.
- */
- unsigned address_space =
- LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
-
- LLVMValueRef global =
- LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
- "const_data",
- address_space);
-
- LLVMSetInitializer(global, data);
- LLVMSetGlobalConstant(global, true);
- LLVMSetVisibility(global, LLVMHiddenVisibility);
- ctx->constant_data = global;
-}
-
-static void
-setup_shared(struct ac_nir_context *ctx,
- struct nir_shader *nir)
-{
- nir_foreach_variable(variable, &nir->shared) {
- LLVMValueRef shared =
- LLVMAddGlobalInAddressSpace(
- ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
- variable->name ? variable->name : "",
- AC_ADDR_SPACE_LDS);
- _mesa_hash_table_insert(ctx->vars, variable, shared);
- }
-}
-
-void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
- struct nir_shader *nir)
-{
- struct ac_nir_context ctx = {};
- struct nir_function *func;
-
- ctx.ac = *ac;
- ctx.abi = abi;
-
- ctx.stage = nir->info.stage;
- ctx.info = &nir->info;
-
- ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
-
- nir_foreach_variable(variable, &nir->outputs)
- ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
- ctx.stage);
-
- ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
- _mesa_key_pointer_equal);
-
- func = (struct nir_function *)exec_list_get_head(&nir->functions);
-
- nir_index_ssa_defs(func->impl);
- ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
-
- setup_locals(&ctx, func);
- setup_scratch(&ctx, nir);
- setup_constant_data(&ctx, nir);
-
- if (gl_shader_stage_is_compute(nir->info.stage))
- setup_shared(&ctx, nir);
-
- visit_cf_list(&ctx, &func->impl->body);
- phi_post_pass(&ctx);
-
- if (!gl_shader_stage_is_compute(nir->info.stage))
- ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
- ctx.abi->outputs);
-
- free(ctx.locals);
- free(ctx.ssa_defs);
- ralloc_free(ctx.defs);
- ralloc_free(ctx.phis);
- ralloc_free(ctx.vars);
-}
-
-void
-ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
-{
- /* Lower large variables to scratch first so that we won't bloat the
- * shader by generating large if ladders for them. We later lower
- * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
- */
- NIR_PASS_V(nir, nir_lower_vars_to_scratch,
- nir_var_function_temp,
- 256,
- glsl_get_natural_size_align_bytes);
-
- /* While it would be nice not to have this flag, we are constrained
- * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
- */
- bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
-
- /* TODO: Indirect indexing of GS inputs is unimplemented.
- *
- * TCS and TES load inputs directly from LDS or offchip memory, so
- * indirect indexing is trivial.
- */
- nir_variable_mode indirect_mask = 0;
- if (nir->info.stage == MESA_SHADER_GEOMETRY ||
- (nir->info.stage != MESA_SHADER_TESS_CTRL &&
- nir->info.stage != MESA_SHADER_TESS_EVAL &&
- !llvm_has_working_vgpr_indexing)) {
- indirect_mask |= nir_var_shader_in;
- }
- if (!llvm_has_working_vgpr_indexing &&
- nir->info.stage != MESA_SHADER_TESS_CTRL)
- indirect_mask |= nir_var_shader_out;
-
- /* TODO: We shouldn't need to do this, however LLVM isn't currently
- * smart enough to handle indirects without causing excess spilling
- * causing the gpu to hang.
- *
- * See the following thread for more details of the problem:
- * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
- */
- indirect_mask |= nir_var_function_temp;
-
- nir_lower_indirect_derefs(nir, indirect_mask);
-}
-
-static unsigned
-get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
-{
- if (intrin->intrinsic != nir_intrinsic_store_deref)
- return 0;
-
- nir_variable *var =
- nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
-
- if (var->data.mode != nir_var_shader_out)
- return 0;
-
- unsigned writemask = 0;
- const int location = var->data.location;
- unsigned first_component = var->data.location_frac;
- unsigned num_comps = intrin->dest.ssa.num_components;
-
- if (location == VARYING_SLOT_TESS_LEVEL_INNER)
- writemask = ((1 << (num_comps + 1)) - 1) << first_component;
- else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
- writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
-
- return writemask;
-}
-
-static void
-scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
- unsigned *cond_block_tf_writemask,
- bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
-{
- switch (cf_node->type) {
- case nir_cf_node_block: {
- nir_block *block = nir_cf_node_as_block(cf_node);
- nir_foreach_instr(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- if (intrin->intrinsic == nir_intrinsic_barrier) {
-
- /* If we find a barrier in nested control flow put this in the
- * too hard basket. In GLSL this is not possible but it is in
- * SPIR-V.
- */
- if (is_nested_cf) {
- *tessfactors_are_def_in_all_invocs = false;
- return;
- }
-
- /* The following case must be prevented:
- * gl_TessLevelInner = ...;
- * barrier();
- * if (gl_InvocationID == 1)
- * gl_TessLevelInner = ...;
- *
- * If you consider disjoint code segments separated by barriers, each
- * such segment that writes tess factor channels should write the same
- * channels in all codepaths within that segment.
- */
- if (upper_block_tf_writemask || cond_block_tf_writemask) {
- /* Accumulate the result: */
- *tessfactors_are_def_in_all_invocs &=
- !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
-
- /* Analyze the next code segment from scratch. */
- *upper_block_tf_writemask = 0;
- *cond_block_tf_writemask = 0;
- }
- } else
- *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
- }
-
- break;
- }
- case nir_cf_node_if: {
- unsigned then_tessfactor_writemask = 0;
- unsigned else_tessfactor_writemask = 0;
-
- nir_if *if_stmt = nir_cf_node_as_if(cf_node);
- foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
- scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
- scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- if (then_tessfactor_writemask || else_tessfactor_writemask) {
- /* If both statements write the same tess factor channels,
- * we can say that the upper block writes them too.
- */
- *upper_block_tf_writemask |= then_tessfactor_writemask &
- else_tessfactor_writemask;
- *cond_block_tf_writemask |= then_tessfactor_writemask |
- else_tessfactor_writemask;
- }
-
- break;
- }
- case nir_cf_node_loop: {
- nir_loop *loop = nir_cf_node_as_loop(cf_node);
- foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
- scan_tess_ctrl(nested_node, cond_block_tf_writemask,
- cond_block_tf_writemask,
- tessfactors_are_def_in_all_invocs, true);
- }
-
- break;
- }
- default:
- unreachable("unknown cf node type");
- }
-}
-
-bool
-ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
-{
- assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
-
- /* The pass works as follows:
- * If all codepaths write tess factors, we can say that all
- * invocations define tess factors.
- *
- * Each tess factor channel is tracked separately.
- */
- unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
- unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
-
- /* Initial value = true. Here the pass will accumulate results from
- * multiple segments surrounded by barriers. If tess factors aren't
- * written at all, it's a shader bug and we don't care if this will be
- * true.
- */
- bool tessfactors_are_def_in_all_invocs = true;
-
- nir_foreach_function(function, nir) {
- if (function->impl) {
- foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
- scan_tess_ctrl(node, &main_block_tf_writemask,
- &cond_block_tf_writemask,
- &tessfactors_are_def_in_all_invocs,
- false);
- }
- }
- }
-
- /* Accumulate the result for the last code segment separated by a
- * barrier.
- */
- if (main_block_tf_writemask || cond_block_tf_writemask) {
- tessfactors_are_def_in_all_invocs &=
- !(cond_block_tf_writemask & ~main_block_tf_writemask);
- }
-
- return tessfactors_are_def_in_all_invocs;
-}
+++ /dev/null
-/*
- * Copyright © 2016 Bas Nieuwenhuizen
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef AC_NIR_TO_LLVM_H
-#define AC_NIR_TO_LLVM_H
-
-#include <stdbool.h>
-#include "llvm-c/Core.h"
-#include "llvm-c/TargetMachine.h"
-#include "amd_family.h"
-#include "compiler/shader_enums.h"
-
-struct nir_shader;
-struct nir_variable;
-struct ac_llvm_context;
-struct ac_shader_abi;
-
-/* Interpolation locations */
-#define INTERP_CENTER 0
-#define INTERP_CENTROID 1
-#define INTERP_SAMPLE 2
-
-static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
-{
- return (index * 4) + chan;
-}
-
-void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
-
-bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
-
-void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
- struct nir_shader *nir);
-
-void
-ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
- struct ac_shader_abi *abi,
- struct nir_shader *nir,
- struct nir_variable *variable,
- gl_shader_stage stage);
-
-void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
-
-#endif /* AC_NIR_TO_LLVM_H */
+++ /dev/null
-/*
- * Copyright 2017 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef AC_SHADER_ABI_H
-#define AC_SHADER_ABI_H
-
-#include <llvm-c/Core.h>
-
-#include "compiler/shader_enums.h"
-
-struct nir_variable;
-
-#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
-
-#define AC_MAX_INLINE_PUSH_CONSTS 8
-
-enum ac_descriptor_type {
- AC_DESC_IMAGE,
- AC_DESC_FMASK,
- AC_DESC_SAMPLER,
- AC_DESC_BUFFER,
- AC_DESC_PLANE_0,
- AC_DESC_PLANE_1,
- AC_DESC_PLANE_2,
-};
-
-/* Document the shader ABI during compilation. This is what allows radeonsi and
- * radv to share a compiler backend.
- */
-struct ac_shader_abi {
- LLVMValueRef base_vertex;
- LLVMValueRef start_instance;
- LLVMValueRef draw_id;
- LLVMValueRef vertex_id;
- LLVMValueRef instance_id;
- LLVMValueRef tcs_patch_id;
- LLVMValueRef tcs_rel_ids;
- LLVMValueRef tes_patch_id;
- LLVMValueRef gs_prim_id;
- LLVMValueRef gs_invocation_id;
-
- /* PS */
- LLVMValueRef frag_pos[4];
- LLVMValueRef front_face;
- LLVMValueRef ancillary;
- LLVMValueRef sample_coverage;
- LLVMValueRef prim_mask;
- LLVMValueRef color0;
- LLVMValueRef color1;
- LLVMValueRef user_data;
- LLVMValueRef persp_sample;
- LLVMValueRef persp_center;
- LLVMValueRef persp_centroid;
- LLVMValueRef linear_sample;
- LLVMValueRef linear_center;
- LLVMValueRef linear_centroid;
-
- /* CS */
- LLVMValueRef local_invocation_ids;
- LLVMValueRef num_work_groups;
- LLVMValueRef workgroup_ids[3];
- LLVMValueRef tg_size;
-
- /* Vulkan only */
- LLVMValueRef push_constants;
- LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
- unsigned num_inline_push_consts;
- unsigned base_inline_push_consts;
- LLVMValueRef view_index;
-
- LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
-
- /* For VS and PS: pre-loaded shader inputs.
- *
- * Currently only used for NIR shaders; indexed by variables'
- * driver_location.
- */
- LLVMValueRef *inputs;
-
- /* Varying -> attribute number mapping. Also NIR-only */
- unsigned fs_input_attr_indices[MAX_VARYING];
-
- void (*emit_outputs)(struct ac_shader_abi *abi,
- unsigned max_outputs,
- LLVMValueRef *addrs);
-
- void (*emit_vertex)(struct ac_shader_abi *abi,
- unsigned stream,
- LLVMValueRef *addrs);
-
- void (*emit_primitive)(struct ac_shader_abi *abi,
- unsigned stream);
-
- void (*emit_kill)(struct ac_shader_abi *abi, LLVMValueRef visible);
-
- LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- unsigned vertex_index,
- unsigned const_index,
- LLVMTypeRef type);
-
- LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
- LLVMTypeRef type,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- unsigned location,
- unsigned driver_location,
- unsigned component,
- unsigned num_components,
- bool is_patch,
- bool is_compact,
- bool load_inputs);
-
- void (*store_tcs_outputs)(struct ac_shader_abi *abi,
- const struct nir_variable *var,
- LLVMValueRef vertex_index,
- LLVMValueRef param_index,
- unsigned const_index,
- LLVMValueRef src,
- unsigned writemask);
-
- LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
- unsigned varying_id,
- bool load_default_state);
-
-
- LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
-
- /**
- * Load the descriptor for the given buffer.
- *
- * \param buffer the buffer as presented in NIR: this is the descriptor
- * in Vulkan, and the buffer index in OpenGL/Gallium
- * \param write whether buffer contents will be written
- */
- LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
- LLVMValueRef buffer, bool write);
-
- /**
- * Load a descriptor associated to a sampler.
- *
- * \param descriptor_set the descriptor set index (only for Vulkan)
- * \param base_index the base index of the sampler variable
- * \param constant_index constant part of an array index (or 0, if the
- * sampler variable is not an array)
- * \param index non-constant part of an array index (may be NULL)
- * \param desc_type the type of descriptor to load
- * \param image whether the descriptor is loaded for an image operation
- */
- LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
- unsigned descriptor_set,
- unsigned base_index,
- unsigned constant_index,
- LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool image, bool write,
- bool bindless);
-
- /**
- * Load a Vulkan-specific resource.
- *
- * \param index resource index
- * \param desc_set descriptor set
- * \param binding descriptor set binding
- */
- LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
- LLVMValueRef index,
- unsigned desc_set,
- unsigned binding);
-
- LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
- LLVMValueRef sample_id);
-
- LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
-
- LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
-
- /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
- * uses it due to promoting D16 to D32, but radv needs it off. */
- bool clamp_shadow_reference;
- bool interp_at_sample_force_center;
-
- /* Whether bounds checks are required */
- bool robust_buffer_access;
-};
-
-#endif /* AC_SHADER_ABI_H */
'ac_binary.c',
'ac_binary.h',
'ac_exp_param.h',
- 'ac_llvm_build.c',
- 'ac_llvm_build.h',
- 'ac_llvm_cull.c',
- 'ac_llvm_cull.h',
- 'ac_llvm_helper.cpp',
- 'ac_llvm_util.c',
- 'ac_llvm_util.h',
- 'ac_shader_abi.h',
'ac_shader_util.c',
'ac_shader_util.h',
- 'ac_nir_to_llvm.c',
- 'ac_nir_to_llvm.h',
'ac_gpu_info.c',
'ac_gpu_info.h',
'ac_rtld.c',
inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd,
],
dependencies : [
- dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
+ dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
idep_nir_headers,
],
c_args : [c_vis_args],
'aco',
[libaco_files, aco_opcodes_c, aco_opcodes_h, aco_builder_h],
include_directories : [
- inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd, inc_amd_common,
+ inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd, inc_amd_common, inc_amd_common_llvm,
],
dependencies : [
dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
--- /dev/null
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
+#include "ac_llvm_build.h"
+
+#include <llvm-c/Core.h>
+#include <llvm/Config/llvm-config.h>
+
+#include "c11/threads.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "ac_llvm_util.h"
+#include "ac_shader_util.h"
+#include "ac_exp_param.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "util/u_atomic.h"
+#include "util/u_math.h"
+#include "sid.h"
+
+#include "shader_enums.h"
+
+#define AC_LLVM_INITIAL_CF_DEPTH 4
+
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct ac_llvm_flow {
+ /* Loop exit or next part of if/else/endif. */
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef loop_entry_block;
+};
+
+/* Initialize module-independent parts of the context.
+ *
+ * The caller is responsible for initializing ctx::module and ctx::builder.
+ */
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx,
+ struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size,
+ unsigned ballot_mask_bits)
+{
+ LLVMValueRef args[1];
+
+ ctx->context = LLVMContextCreate();
+
+ ctx->chip_class = chip_class;
+ ctx->family = family;
+ ctx->wave_size = wave_size;
+ ctx->ballot_mask_bits = ballot_mask_bits;
+ ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
+ : compiler->tm,
+ ctx->context);
+ ctx->builder = ac_create_builder(ctx->context, float_mode);
+
+ ctx->voidt = LLVMVoidTypeInContext(ctx->context);
+ ctx->i1 = LLVMInt1TypeInContext(ctx->context);
+ ctx->i8 = LLVMInt8TypeInContext(ctx->context);
+ ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
+ ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+ ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+ ctx->intptr = ctx->i32;
+ ctx->f16 = LLVMHalfTypeInContext(ctx->context);
+ ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+ ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
+ ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
+ ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
+ ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
+ ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
+ ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
+ ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
+ ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
+ ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+ ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
+ ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
+
+ ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
+ ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
+ ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
+ ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
+ ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
+ ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
+ ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
+ ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
+ ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
+ ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
+ ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
+ ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
+ ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
+ ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
+
+ ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
+ ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
+
+ ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+ "range", 5);
+
+ ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+ "invariant.load", 14);
+
+ ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
+
+ args[0] = LLVMConstReal(ctx->f32, 2.5);
+ ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
+
+ ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
+ "amdgpu.uniform", 14);
+
+ ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
+ ctx->flow = calloc(1, sizeof(*ctx->flow));
+}
+
+void
+ac_llvm_context_dispose(struct ac_llvm_context *ctx)
+{
+ free(ctx->flow->stack);
+ free(ctx->flow);
+ ctx->flow = NULL;
+}
+
+int
+ac_get_llvm_num_components(LLVMValueRef value)
+{
+ LLVMTypeRef type = LLVMTypeOf(value);
+ unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
+ ? LLVMGetVectorSize(type)
+ : 1;
+ return num_components;
+}
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+ LLVMValueRef value,
+ int index)
+{
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) != LLVMVectorTypeKind) {
+ assert(index == 0);
+ return value;
+ }
+
+ return LLVMBuildExtractElement(ac->builder, value,
+ LLVMConstInt(ac->i32, index, false), "");
+}
+
+int
+ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
+{
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
+ type = LLVMGetElementType(type);
+
+ if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
+ return LLVMGetIntTypeWidth(type);
+
+ if (type == ctx->f16)
+ return 16;
+ if (type == ctx->f32)
+ return 32;
+ if (type == ctx->f64)
+ return 64;
+
+ unreachable("Unhandled type kind in get_elem_bits");
+}
+
+unsigned
+ac_get_type_size(LLVMTypeRef type)
+{
+ LLVMTypeKind kind = LLVMGetTypeKind(type);
+
+ switch (kind) {
+ case LLVMIntegerTypeKind:
+ return LLVMGetIntTypeWidth(type) / 8;
+ case LLVMHalfTypeKind:
+ return 2;
+ case LLVMFloatTypeKind:
+ return 4;
+ case LLVMDoubleTypeKind:
+ return 8;
+ case LLVMPointerTypeKind:
+ if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
+ return 4;
+ return 8;
+ case LLVMVectorTypeKind:
+ return LLVMGetVectorSize(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ case LLVMArrayTypeKind:
+ return LLVMGetArrayLength(type) *
+ ac_get_type_size(LLVMGetElementType(type));
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->f16 || t == ctx->i16)
+ return ctx->i16;
+ else if (t == ctx->f32 || t == ctx->i32)
+ return ctx->i32;
+ else if (t == ctx->f64 || t == ctx->i64)
+ return ctx->i64;
+ else
+ unreachable("Unhandled integer size");
+}
+
+LLVMTypeRef
+ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
+ switch (LLVMGetPointerAddressSpace(t)) {
+ case AC_ADDR_SPACE_GLOBAL:
+ return ctx->i64;
+ case AC_ADDR_SPACE_LDS:
+ return ctx->i32;
+ default:
+ unreachable("unhandled address space");
+ }
+ }
+ return to_integer_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+ return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+ }
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+}
+
+LLVMValueRef
+ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ if (LLVMGetTypeKind(type) == LLVMPointerTypeKind)
+ return v;
+ return ac_to_integer(ctx, v);
+}
+
+static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (t == ctx->i8)
+ return ctx->i8;
+ else if (t == ctx->i16 || t == ctx->f16)
+ return ctx->f16;
+ else if (t == ctx->i32 || t == ctx->f32)
+ return ctx->f32;
+ else if (t == ctx->i64 || t == ctx->f64)
+ return ctx->f64;
+ else
+ unreachable("Unhandled float size");
+}
+
+LLVMTypeRef
+ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
+{
+ if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
+ LLVMTypeRef elem_type = LLVMGetElementType(t);
+ return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
+ LLVMGetVectorSize(t));
+ }
+ return to_float_type_scalar(ctx, t);
+}
+
+LLVMValueRef
+ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
+{
+ LLVMTypeRef type = LLVMTypeOf(v);
+ return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
+}
+
+
+LLVMValueRef
+ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
+ LLVMTypeRef return_type, LLVMValueRef *params,
+ unsigned param_count, unsigned attrib_mask)
+{
+ LLVMValueRef function, call;
+ bool set_callsite_attrs = !(attrib_mask & AC_FUNC_ATTR_LEGACY);
+
+ function = LLVMGetNamedFunction(ctx->module, name);
+ if (!function) {
+ LLVMTypeRef param_types[32], function_type;
+ unsigned i;
+
+ assert(param_count <= 32);
+
+ for (i = 0; i < param_count; ++i) {
+ assert(params[i]);
+ param_types[i] = LLVMTypeOf(params[i]);
+ }
+ function_type =
+ LLVMFunctionType(return_type, param_types, param_count, 0);
+ function = LLVMAddFunction(ctx->module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ if (!set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, function, attrib_mask);
+ }
+
+ call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
+ if (set_callsite_attrs)
+ ac_add_func_attributes(ctx->context, call, attrib_mask);
+ return call;
+}
+
+/**
+ * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
+ * intrinsic names).
+ */
+void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
+{
+ LLVMTypeRef elem_type = type;
+
+ assert(bufsize >= 8);
+
+ if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
+ int ret = snprintf(buf, bufsize, "v%u",
+ LLVMGetVectorSize(type));
+ if (ret < 0) {
+ char *type_name = LLVMPrintTypeToString(type);
+ fprintf(stderr, "Error building type name for: %s\n",
+ type_name);
+ LLVMDisposeMessage(type_name);
+ return;
+ }
+ elem_type = LLVMGetElementType(type);
+ buf += ret;
+ bufsize -= ret;
+ }
+ switch (LLVMGetTypeKind(elem_type)) {
+ default: break;
+ case LLVMIntegerTypeKind:
+ snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
+ break;
+ case LLVMHalfTypeKind:
+ snprintf(buf, bufsize, "f16");
+ break;
+ case LLVMFloatTypeKind:
+ snprintf(buf, bufsize, "f32");
+ break;
+ case LLVMDoubleTypeKind:
+ snprintf(buf, bufsize, "f64");
+ break;
+ }
+}
+
+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks)
+{
+ LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+ LLVMAddIncoming(phi, values, blocks, count_incoming);
+ return phi;
+}
+
+void ac_build_s_barrier(struct ac_llvm_context *ctx)
+{
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
+ 0, AC_FUNC_ATTR_CONVERGENT);
+}
+
+/* Prevent optimizations (at least of memory accesses) across the current
+ * point in the program by emitting empty inline assembly that is marked as
+ * having side effects.
+ *
+ * Optionally, a value can be passed through the inline assembly to prevent
+ * LLVM from hoisting calls to ReadNone functions.
+ */
+void
+ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr)
+{
+ static int counter = 0;
+
+ LLVMBuilderRef builder = ctx->builder;
+ char code[16];
+
+ snprintf(code, sizeof(code), "; %d", p_atomic_inc_return(&counter));
+
+ if (!pvgpr) {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "", true, false);
+ LLVMBuildCall(builder, inlineasm, NULL, 0, "");
+ } else {
+ LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
+ LLVMValueRef inlineasm = LLVMConstInlineAsm(ftype, code, "=v,0", true, false);
+ LLVMValueRef vgpr = *pvgpr;
+ LLVMTypeRef vgpr_type = LLVMTypeOf(vgpr);
+ unsigned vgpr_size = ac_get_type_size(vgpr_type);
+ LLVMValueRef vgpr0;
+
+ assert(vgpr_size % 4 == 0);
+
+ vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
+ vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
+ vgpr0 = LLVMBuildCall(builder, inlineasm, &vgpr0, 1, "");
+ vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
+ vgpr = LLVMBuildBitCast(builder, vgpr, vgpr_type, "");
+
+ *pvgpr = vgpr;
+ }
+}
+
+LLVMValueRef
+ac_build_shader_clock(struct ac_llvm_context *ctx)
+{
+ const char *intr = LLVM_VERSION_MAJOR >= 9 && ctx->chip_class >= GFX8 ?
+ "llvm.amdgcn.s.memrealtime" : "llvm.readcyclecounter";
+ LLVMValueRef tmp = ac_build_intrinsic(ctx, intr, ctx->i64, NULL, 0, 0);
+ return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
+}
+
+LLVMValueRef
+ac_build_ballot(struct ac_llvm_context *ctx,
+ LLVMValueRef value)
+{
+ const char *name;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ if (ctx->wave_size == 64)
+ name = "llvm.amdgcn.icmp.i64.i32";
+ else
+ name = "llvm.amdgcn.icmp.i32.i32";
+ } else {
+ name = "llvm.amdgcn.icmp.i32";
+ }
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i32_0,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0)
+ };
+
+ /* We currently have no other way to prevent LLVM from lifting the icmp
+ * calls to a dominating basic block.
+ */
+ ac_build_optimization_barrier(ctx, &args[0]);
+
+ args[0] = ac_to_integer(ctx, args[0]);
+
+ return ac_build_intrinsic(ctx, name, ctx->iN_wavemask, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
+ LLVMValueRef value)
+{
+ const char *name = LLVM_VERSION_MAJOR >= 9 ? "llvm.amdgcn.icmp.i64.i1" : "llvm.amdgcn.icmp.i1";
+ LLVMValueRef args[3] = {
+ value,
+ ctx->i1false,
+ LLVMConstInt(ctx->i32, LLVMIntNE, 0),
+ };
+
+ return ac_build_intrinsic(ctx, name, ctx->i64, args, 3,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
+}
+
+LLVMValueRef
+ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+ return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set,
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
+}
+
+LLVMValueRef
+ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
+ LLVMValueRef vote_set = ac_build_ballot(ctx, value);
+
+ LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set, active_set, "");
+ LLVMValueRef none = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ vote_set,
+ LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
+ return LLVMBuildOr(ctx->builder, all, none, "");
+}
+
+LLVMValueRef
+ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component)
+{
+ LLVMValueRef vec = NULL;
+
+ if (value_count == 1) {
+ return values[component];
+ } else if (!value_count)
+ unreachable("value_count is 0");
+
+ for (unsigned i = component; i < value_count + component; i++) {
+ LLVMValueRef value = values[i];
+
+ if (i == component)
+ vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
+ vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
+ }
+ return vec;
+}
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+ LLVMValueRef *values,
+ unsigned value_count,
+ unsigned value_stride,
+ bool load,
+ bool always_vector)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef vec = NULL;
+ unsigned i;
+
+ if (value_count == 1 && !always_vector) {
+ if (load)
+ return LLVMBuildLoad(builder, values[0], "");
+ return values[0];
+ } else if (!value_count)
+ unreachable("value_count is 0");
+
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef value = values[i * value_stride];
+ if (load)
+ value = LLVMBuildLoad(builder, value, "");
+
+ if (!i)
+ vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+ LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+ vec = LLVMBuildInsertElement(builder, vec, value, index, "");
+ }
+ return vec;
+}
+
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+ LLVMValueRef *values,
+ unsigned value_count)
+{
+ return ac_build_gather_values_extended(ctx, values, value_count, 1, false, false);
+}
+
+/* Expand a scalar or vector to <dst_channels x type> by filling the remaining
+ * channels with undef. Extract at most src_channels components from the input.
+ */
+static LLVMValueRef
+ac_build_expand(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned src_channels,
+ unsigned dst_channels)
+{
+ LLVMTypeRef elemtype;
+ LLVMValueRef chan[dst_channels];
+
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+ unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+
+ if (src_channels == dst_channels && vec_size == dst_channels)
+ return value;
+
+ src_channels = MIN2(src_channels, vec_size);
+
+ for (unsigned i = 0; i < src_channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i);
+
+ elemtype = LLVMGetElementType(LLVMTypeOf(value));
+ } else {
+ if (src_channels) {
+ assert(src_channels == 1);
+ chan[0] = value;
+ }
+ elemtype = LLVMTypeOf(value);
+ }
+
+ for (unsigned i = src_channels; i < dst_channels; i++)
+ chan[i] = LLVMGetUndef(elemtype);
+
+ return ac_build_gather_values(ctx, chan, dst_channels);
+}
+
+/* Extract components [start, start + channels) from a vector.
+ */
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned start,
+ unsigned channels)
+{
+ LLVMValueRef chan[channels];
+
+ for (unsigned i = 0; i < channels; i++)
+ chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
+
+ return ac_build_gather_values(ctx, chan, channels);
+}
+
+/* Expand a scalar or vector to <4 x type> by filling the remaining channels
+ * with undef. Extract at most num_channels components from the input.
+ */
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned num_channels)
+{
+ return ac_build_expand(ctx, value, num_channels, 4);
+}
+
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
+ const char *name;
+
+ if (type_size == 2)
+ name = "llvm.rint.f16";
+ else if (type_size == 4)
+ name = "llvm.rint.f32";
+ else
+ name = "llvm.rint.f64";
+
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
+ac_build_fdiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef den)
+{
+ /* If we do (num / den), LLVM >= 7.0 does:
+ * return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f : 1.0f));
+ *
+ * If we do (num * (1 / den)), LLVM does:
+ * return num * v_rcp_f32(den);
+ */
+ LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
+ LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
+ LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");
+
+ /* Use v_rcp_f32 instead of precise division. */
+ if (!LLVMIsConstant(ret))
+ LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+ return ret;
+}
+
+/* See fast_idiv_by_const.h. */
+/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ num = LLVMBuildLShr(builder, num, pre_shift, "");
+ num = LLVMBuildMul(builder,
+ LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildAdd(builder, num,
+ LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
+/* See fast_idiv_by_const.h. */
+/* If num != UINT_MAX, this more efficient version can be used. */
+/* Set: increment = util_fast_udiv_info::increment; */
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ num = LLVMBuildLShr(builder, num, pre_shift, "");
+ num = LLVMBuildNUWAdd(builder, num, increment, "");
+ num = LLVMBuildMul(builder,
+ LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
+/* See fast_idiv_by_const.h. */
+/* Both operands must fit in 31 bits and the divisor must not be 1. */
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef post_shift)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ num = LLVMBuildMul(builder,
+ LLVMBuildZExt(builder, num, ctx->i64, ""),
+ LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+ num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+ num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+ return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
+/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
+ * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
+ * already multiplied by two. id is the cube face number.
+ */
+struct cube_selection_coords {
+ LLVMValueRef stc[2];
+ LLVMValueRef ma;
+ LLVMValueRef id;
+};
+
+static void
+build_cube_intrinsic(struct ac_llvm_context *ctx,
+ LLVMValueRef in[3],
+ struct cube_selection_coords *out)
+{
+ LLVMTypeRef f32 = ctx->f32;
+
+ out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+ out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
+ f32, in, 3, AC_FUNC_ATTR_READNONE);
+}
+
+/**
+ * Build a manual selection sequence for cube face sc/tc coordinates and
+ * major axis vector (multiplied by 2 for consistency) for the given
+ * vec3 \p coords, for the face implied by \p selcoords.
+ *
+ * For the major axis, we always adjust the sign to be in the direction of
+ * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
+ * the selcoords major axis.
+ */
+static void build_cube_select(struct ac_llvm_context *ctx,
+ const struct cube_selection_coords *selcoords,
+ const LLVMValueRef *coords,
+ LLVMValueRef *out_st,
+ LLVMValueRef *out_ma)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
+ LLVMValueRef is_ma_positive;
+ LLVMValueRef sgn_ma;
+ LLVMValueRef is_ma_z, is_not_ma_z;
+ LLVMValueRef is_ma_y;
+ LLVMValueRef is_ma_x;
+ LLVMValueRef sgn;
+ LLVMValueRef tmp;
+
+ is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
+ selcoords->ma, LLVMConstReal(f32, 0.0), "");
+ sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
+ LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
+
+ is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
+ is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
+ is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
+ LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
+ is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
+
+ /* Select sc */
+ tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
+ sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
+ LLVMBuildSelect(builder, is_ma_z, sgn_ma,
+ LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
+ out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+ /* Select tc */
+ tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
+ sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma,
+ LLVMConstReal(f32, -1.0), "");
+ out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+ /* Select ma */
+ tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
+ LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
+ tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
+ *out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
+}
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+ bool is_deriv, bool is_array, bool is_lod,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg)
+{
+
+ LLVMBuilderRef builder = ctx->builder;
+ struct cube_selection_coords selcoords;
+ LLVMValueRef coords[3];
+ LLVMValueRef invma;
+
+ if (is_array && !is_lod) {
+ LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
+
+ /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
+ *
+ * "For Array forms, the array layer used will be
+ *
+ * max(0, min(d−1, floor(layer+0.5)))
+ *
+ * where d is the depth of the texture array and layer
+ * comes from the component indicated in the tables below.
+ * Workaroudn for an issue where the layer is taken from a
+ * helper invocation which happens to fall on a different
+ * layer due to extrapolation."
+ *
+ * GFX8 and earlier attempt to implement this in hardware by
+ * clamping the value of coords[2] = (8 * layer) + face.
+ * Unfortunately, this means that the we end up with the wrong
+ * face when clamping occurs.
+ *
+ * Clamp the layer earlier to work around the issue.
+ */
+ if (ctx->chip_class <= GFX8) {
+ LLVMValueRef ge0;
+ ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
+ tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
+ }
+
+ coords_arg[3] = tmp;
+ }
+
+ build_cube_intrinsic(ctx, coords_arg, &selcoords);
+
+ invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
+ ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
+ invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
+
+ for (int i = 0; i < 2; ++i)
+ coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
+
+ coords[2] = selcoords.id;
+
+ if (is_deriv && derivs_arg) {
+ LLVMValueRef derivs[4];
+ int axis;
+
+ /* Convert cube derivatives to 2D derivatives. */
+ for (axis = 0; axis < 2; axis++) {
+ LLVMValueRef deriv_st[2];
+ LLVMValueRef deriv_ma;
+
+ /* Transform the derivative alongside the texture
+ * coordinate. Mathematically, the correct formula is
+ * as follows. Assume we're projecting onto the +Z face
+ * and denote by dx/dh the derivative of the (original)
+ * X texture coordinate with respect to horizontal
+ * window coordinates. The projection onto the +Z face
+ * plane is:
+ *
+ * f(x,z) = x/z
+ *
+ * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
+ * = 1/z * dx/dh - x/z * 1/z * dz/dh.
+ *
+ * This motivatives the implementation below.
+ *
+ * Whether this actually gives the expected results for
+ * apps that might feed in derivatives obtained via
+ * finite differences is anyone's guess. The OpenGL spec
+ * seems awfully quiet about how textureGrad for cube
+ * maps should be handled.
+ */
+ build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3],
+ deriv_st, &deriv_ma);
+
+ deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
+
+ for (int i = 0; i < 2; ++i)
+ derivs[axis * 2 + i] =
+ LLVMBuildFSub(builder,
+ LLVMBuildFMul(builder, deriv_st[i], invma, ""),
+ LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
+ }
+
+ memcpy(derivs_arg, derivs, sizeof(derivs));
+ }
+
+ /* Shift the texture coordinate. This must be applied after the
+ * derivative calculation.
+ */
+ for (int i = 0; i < 2; ++i)
+ coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
+
+ if (is_array) {
+ /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+ /* coords_arg.w component - array_index for cube arrays */
+ coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
+ }
+
+ memcpy(coords_arg, coords, sizeof(coords));
+}
+
+
+LLVMValueRef
+ac_build_fs_interp(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef args[5];
+ LLVMValueRef p1;
+
+ args[0] = i;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = params;
+
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
+ ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+
+ args[0] = p1;
+ args[1] = j;
+ args[2] = llvm_chan;
+ args[3] = attr_number;
+ args[4] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
+ ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j)
+{
+ LLVMValueRef args[6];
+ LLVMValueRef p1;
+
+ args[0] = i;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = ctx->i1false;
+ args[4] = params;
+
+ p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+ ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+
+ args[0] = p1;
+ args[1] = j;
+ args[2] = llvm_chan;
+ args[3] = attr_number;
+ args[4] = ctx->i1false;
+ args[5] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+ ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
+ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
+ LLVMValueRef parameter,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params)
+{
+ LLVMValueRef args[4];
+
+ args[0] = parameter;
+ args[1] = llvm_chan;
+ args[2] = attr_number;
+ args[3] = params;
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
+ ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index)
+{
+ return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+}
+
+LLVMValueRef
+ac_build_gep0(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index)
+{
+ LLVMValueRef indices[2] = {
+ ctx->i32_0,
+ index,
+ };
+ return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
+}
+
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef index)
+{
+ return LLVMBuildPointerCast(ctx->builder,
+ LLVMBuildGEP(ctx->builder, ptr, &index, 1, ""),
+ LLVMTypeOf(ptr), "");
+}
+
+void
+ac_build_indexed_store(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value)
+{
+ LLVMBuildStore(ctx->builder, value,
+ ac_build_gep0(ctx, base_ptr, index));
+}
+
+/**
+ * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
+ * It's equivalent to doing a load from &base_ptr[index].
+ *
+ * \param base_ptr Where the array starts.
+ * \param index The element index into the array.
+ * \param uniform Whether the base_ptr and index can be assumed to be
+ * dynamically uniform (i.e. load to an SGPR)
+ * \param invariant Whether the load is invariant (no other opcodes affect it)
+ * \param no_unsigned_wraparound
+ * For all possible re-associations and re-distributions of an expression
+ * "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
+ * without inbounds in base_ptr), this parameter is true if "addr + offset"
+ * does not result in an unsigned integer wraparound. This is used for
+ * optimal code generation of 32-bit pointer arithmetic.
+ *
+ * For example, a 32-bit immediate offset that causes a 32-bit unsigned
+ * integer wraparound can't be an imm offset in s_load_dword, because
+ * the instruction performs "addr + offset" in 64 bits.
+ *
+ * Expected usage for bindless textures by chaining GEPs:
+ * // possible unsigned wraparound, don't use InBounds:
+ * ptr1 = LLVMBuildGEP(base_ptr, index);
+ * image = load(ptr1); // becomes "s_load ptr1, 0"
+ *
+ * ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
+ * sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
+ */
+static LLVMValueRef
+ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index, bool uniform, bool invariant,
+ bool no_unsigned_wraparound)
+{
+ LLVMValueRef pointer, result;
+
+ if (no_unsigned_wraparound &&
+ LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
+ pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
+ else
+ pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
+
+ if (uniform)
+ LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
+ result = LLVMBuildLoad(ctx->builder, pointer, "");
+ if (invariant)
+ LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
+ return result;
+}
+
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
+}
+
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
+}
+
+/* This assumes that there is no unsigned integer wraparound during the address
+ * computation, excluding all GEPs within base_ptr. */
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
+}
+
+/* See ac_build_load_custom() documentation. */
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index)
+{
+ return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
+}
+
+static unsigned get_load_cache_policy(struct ac_llvm_context *ctx,
+ unsigned cache_policy)
+{
+ return cache_policy |
+ (ctx->chip_class >= GFX10 && cache_policy & ac_glc ? ac_dlc : 0);
+}
+
+static void
+ac_build_buffer_store_common(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ LLVMTypeRef return_channel_type,
+ unsigned cache_policy,
+ bool use_format,
+ bool structurized)
+{
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = data;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
+ unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(return_channel_type, func) : return_channel_type;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.format.%s",
+ indexing_kind, type_name);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.store.%s",
+ indexing_kind, type_name);
+ }
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
+}
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ unsigned cache_policy)
+{
+ ac_build_buffer_store_common(ctx, rsrc, data, vindex,
+ voffset, NULL, num_channels,
+ ctx->f32, cache_policy,
+ true, true);
+}
+
+/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
+ * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
+ * or v4i32 (num_channels=3,4).
+ */
+void
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ unsigned cache_policy,
+ bool swizzle_enable_hint)
+{
+ /* Split 3 channel stores, because only LLVM 9+ support 3-channel
+ * intrinsics. */
+ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false)) {
+ LLVMValueRef v[3], v01;
+
+ for (int i = 0; i < 3; i++) {
+ v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
+ LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ v01 = ac_build_gather_values(ctx, v, 2);
+
+ ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
+ soffset, inst_offset, cache_policy,
+ swizzle_enable_hint);
+ ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
+ soffset, inst_offset + 8,
+ cache_policy,
+ swizzle_enable_hint);
+ return;
+ }
+
+ /* SWIZZLE_ENABLE requires that soffset isn't folded into voffset
+ * (voffset is swizzled, but soffset isn't swizzled).
+ * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
+ */
+ if (!swizzle_enable_hint) {
+ LLVMValueRef offset = soffset;
+
+ if (inst_offset)
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, inst_offset, 0), "");
+
+ ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata),
+ ctx->i32_0, voffset, offset,
+ num_channels, ctx->f32,
+ cache_policy, false, false);
+ return;
+ }
+
+ static const unsigned dfmts[] = {
+ V_008F0C_BUF_DATA_FORMAT_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32,
+ V_008F0C_BUF_DATA_FORMAT_32_32_32_32
+ };
+ unsigned dfmt = dfmts[num_channels - 1];
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+ LLVMValueRef immoffset = LLVMConstInt(ctx->i32, inst_offset, 0);
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, cache_policy);
+}
+
+static LLVMValueRef
+ac_build_buffer_load_common(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned num_channels,
+ LLVMTypeRef channel_type,
+ unsigned cache_policy,
+ bool can_speculate,
+ bool use_format,
+ bool structurized)
+{
+ LLVMValueRef args[5];
+ int idx = 0;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
+ unsigned func = !ac_has_vec3_support(ctx->chip_class, use_format) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(channel_type, func) : channel_type;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ if (use_format) {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.format.%s",
+ indexing_kind, type_name);
+ } else {
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.buffer.load.%s",
+ indexing_kind, type_name);
+ }
+
+ return ac_build_intrinsic(ctx, name, type, args, idx,
+ ac_get_load_intr_attribs(can_speculate));
+}
+
+LLVMValueRef
+ac_build_buffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ int num_channels,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ unsigned cache_policy,
+ bool can_speculate,
+ bool allow_smem)
+{
+ LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+ if (voffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+ if (soffset)
+ offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+ if (allow_smem && !(cache_policy & ac_slc) &&
+ (!(cache_policy & ac_glc) || ctx->chip_class >= GFX8)) {
+ assert(vindex == NULL);
+
+ LLVMValueRef result[8];
+
+ for (int i = 0; i < num_channels; i++) {
+ if (i) {
+ offset = LLVMBuildAdd(ctx->builder, offset,
+ LLVMConstInt(ctx->i32, 4, 0), "");
+ }
+ LLVMValueRef args[3] = {
+ rsrc,
+ offset,
+ LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
+ };
+ result[i] = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.s.buffer.load.f32",
+ ctx->f32, args, 3,
+ AC_FUNC_ATTR_READNONE);
+ }
+ if (num_channels == 1)
+ return result[0];
+
+ if (num_channels == 3 && !ac_has_vec3_support(ctx->chip_class, false))
+ result[num_channels++] = LLVMGetUndef(ctx->f32);
+ return ac_build_gather_values(ctx, result, num_channels);
+ }
+
+ return ac_build_buffer_load_common(ctx, rsrc, vindex,
+ offset, ctx->i32_0,
+ num_channels, ctx->f32,
+ cache_policy,
+ can_speculate, false, false);
+}
+
+LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ unsigned cache_policy,
+ bool can_speculate)
+{
+ return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
+ ctx->i32_0, num_channels, ctx->f32,
+ cache_policy, can_speculate,
+ true, true);
+}
+
+static LLVMValueRef
+ac_build_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate,
+ bool structurized)
+{
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ LLVMValueRef args[6];
+ int idx = 0;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
+ unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
+ indexing_kind, type_name);
+
+ return ac_build_intrinsic(ctx, name, type, args, idx,
+ ac_get_load_intr_attribs(can_speculate));
+}
+
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt,
+ cache_policy, can_speculate, true);
+}
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate)
+{
+ return ac_build_tbuffer_load(ctx, rsrc, NULL, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt,
+ cache_policy, can_speculate, false);
+}
+
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy)
+{
+ LLVMValueRef res;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ res = ac_build_buffer_load_common(ctx, rsrc, NULL,
+ voffset, soffset,
+ 1, ctx->i16, cache_policy,
+ false, false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
+ immoffset, 1, dfmt, nfmt, cache_policy,
+ false);
+
+ res = LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
+ }
+
+ return res;
+}
+
+LLVMValueRef
+ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy)
+{
+ LLVMValueRef res;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ res = ac_build_buffer_load_common(ctx, rsrc, NULL,
+ voffset, soffset,
+ 1, ctx->i8, cache_policy,
+ false, false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ res = ac_build_raw_tbuffer_load(ctx, rsrc, voffset, soffset,
+ immoffset, 1, dfmt, nfmt, cache_policy,
+ false);
+
+ res = LLVMBuildTrunc(ctx->builder, res, ctx->i8, "");
+ }
+
+ return res;
+}
+
+/**
+ * Convert an 11- or 10-bit unsigned floating point number to an f32.
+ *
+ * The input exponent is expected to be biased analogous to IEEE-754, i.e. by
+ * 2^(exp_bits-1) - 1 (as defined in OpenGL and other graphics APIs).
+ */
+static LLVMValueRef
+ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned exp_bits, unsigned mant_bits)
+{
+ assert(LLVMTypeOf(src) == ctx->i32);
+
+ LLVMValueRef tmp;
+ LLVMValueRef mantissa;
+ mantissa = LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
+
+ /* Converting normal numbers is just a shift + correcting the exponent bias */
+ unsigned normal_shift = 23 - mant_bits;
+ unsigned bias_shift = 127 - ((1 << (exp_bits - 1)) - 1);
+ LLVMValueRef shifted, normal;
+
+ shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
+ normal = LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
+
+ /* Converting nan/inf numbers is the same, but with a different exponent update */
+ LLVMValueRef naninf;
+ naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
+
+ /* Converting denormals is the complex case: determine the leading zeros of the
+ * mantissa to obtain the correct shift for the mantissa and exponent correction.
+ */
+ LLVMValueRef denormal;
+ LLVMValueRef params[2] = {
+ mantissa,
+ ctx->i1true, /* result can be undef when arg is 0 */
+ };
+ LLVMValueRef ctlz = ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32,
+ params, 2, AC_FUNC_ATTR_READNONE);
+
+ /* Shift such that the leading 1 ends up as the LSB of the exponent field. */
+ tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
+ denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
+
+ unsigned denormal_exp = bias_shift + (32 - mant_bits) - 1;
+ tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
+ tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
+ denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
+
+ /* Select the final result. */
+ LLVMValueRef result;
+
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
+ LLVMConstInt(ctx->i32, ((1 << exp_bits) - 1) << mant_bits, false), "");
+ result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
+
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
+ LLVMConstInt(ctx->i32, 1 << mant_bits, false), "");
+ result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
+
+ tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
+ result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
+
+ return ac_to_float(ctx, result);
+}
+
+/**
+ * Generate a fully general open coded buffer format fetch with all required
+ * fixups suitable for vertex fetch, using non-format buffer loads.
+ *
+ * Some combinations of argument values have special interpretations:
+ * - size = 8 bytes, format = fixed indicates PIPE_FORMAT_R11G11B10_FLOAT
+ * - size = 8 bytes, format != {float,fixed} indicates a 2_10_10_10 data format
+ *
+ * \param log_size log(size of channel in bytes)
+ * \param num_channels number of channels (1 to 4)
+ * \param format AC_FETCH_FORMAT_xxx value
+ * \param reverse whether XYZ channels are reversed
+ * \param known_aligned whether the source is known to be aligned to hardware's
+ * effective element size for loading the given format
+ * (note: this means dword alignment for 8_8_8_8, 16_16, etc.)
+ * \param rsrc buffer resource descriptor
+ * \return the resulting vector of floats or integers bitcast to <4 x i32>
+ */
+LLVMValueRef
+ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
+ unsigned log_size,
+ unsigned num_channels,
+ unsigned format,
+ bool reverse,
+ bool known_aligned,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy,
+ bool can_speculate)
+{
+ LLVMValueRef tmp;
+ unsigned load_log_size = log_size;
+ unsigned load_num_channels = num_channels;
+ if (log_size == 3) {
+ load_log_size = 2;
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ load_num_channels = 2 * num_channels;
+ } else {
+ load_num_channels = 1; /* 10_11_11 or 2_10_10_10 */
+ }
+ }
+
+ int log_recombine = 0;
+ if (ctx->chip_class == GFX6 && !known_aligned) {
+ /* Avoid alignment restrictions by loading one byte at a time. */
+ load_num_channels <<= load_log_size;
+ log_recombine = load_log_size;
+ load_log_size = 0;
+ } else if (load_num_channels == 2 || load_num_channels == 4) {
+ log_recombine = -util_logbase2(load_num_channels);
+ load_num_channels = 1;
+ load_log_size += -log_recombine;
+ }
+
+ assert(load_log_size >= 2 || LLVM_VERSION_MAJOR >= 9);
+
+ LLVMValueRef loads[32]; /* up to 32 bytes */
+ for (unsigned i = 0; i < load_num_channels; ++i) {
+ tmp = LLVMBuildAdd(ctx->builder, soffset,
+ LLVMConstInt(ctx->i32, i << load_log_size, false), "");
+ LLVMTypeRef channel_type = load_log_size == 0 ? ctx->i8 :
+ load_log_size == 1 ? ctx->i16 : ctx->i32;
+ unsigned num_channels = 1 << (MAX2(load_log_size, 2) - 2);
+ loads[i] = ac_build_buffer_load_common(
+ ctx, rsrc, vindex, voffset, tmp,
+ num_channels, channel_type, cache_policy,
+ can_speculate, false, true);
+ if (load_log_size >= 2)
+ loads[i] = ac_to_integer(ctx, loads[i]);
+ }
+
+ if (log_recombine > 0) {
+ /* Recombine bytes if necessary (GFX6 only) */
+ LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
+
+ for (unsigned src = 0, dst = 0; src < load_num_channels; ++dst) {
+ LLVMValueRef accum = NULL;
+ for (unsigned i = 0; i < (1 << log_recombine); ++i, ++src) {
+ tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
+ if (i == 0) {
+ accum = tmp;
+ } else {
+ tmp = LLVMBuildShl(ctx->builder, tmp,
+ LLVMConstInt(dst_type, 8 * i, false), "");
+ accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
+ }
+ }
+ loads[dst] = accum;
+ }
+ } else if (log_recombine < 0) {
+ /* Split vectors of dwords */
+ if (load_log_size > 2) {
+ assert(load_num_channels == 1);
+ LLVMValueRef loaded = loads[0];
+ unsigned log_split = load_log_size - 2;
+ log_recombine += log_split;
+ load_num_channels = 1 << log_split;
+ load_log_size = 2;
+ for (unsigned i = 0; i < load_num_channels; ++i) {
+ tmp = LLVMConstInt(ctx->i32, i, false);
+ loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
+ }
+ }
+
+ /* Further split dwords and shorts if required */
+ if (log_recombine < 0) {
+ for (unsigned src = load_num_channels,
+ dst = load_num_channels << -log_recombine;
+ src > 0; --src) {
+ unsigned dst_bits = 1 << (3 + load_log_size + log_recombine);
+ LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
+ LLVMValueRef loaded = loads[src - 1];
+ LLVMTypeRef loaded_type = LLVMTypeOf(loaded);
+ for (unsigned i = 1 << -log_recombine; i > 0; --i, --dst) {
+ tmp = LLVMConstInt(loaded_type, dst_bits * (i - 1), false);
+ tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
+ loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
+ }
+ }
+ }
+ }
+
+ if (log_size == 3) {
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ for (unsigned i = 0; i < num_channels; ++i) {
+ tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
+ loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
+ }
+ } else if (format == AC_FETCH_FORMAT_FIXED) {
+ /* 10_11_11_FLOAT */
+ LLVMValueRef data = loads[0];
+ LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
+ LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
+ LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
+ LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
+
+ loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
+ loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
+ loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
+
+ num_channels = 3;
+ log_size = 2;
+ format = AC_FETCH_FORMAT_FLOAT;
+ } else {
+ /* 2_10_10_10 data formats */
+ LLVMValueRef data = loads[0];
+ LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
+ LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
+ loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
+ loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
+ loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
+ tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
+ loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
+
+ num_channels = 4;
+ }
+ }
+
+ if (format == AC_FETCH_FORMAT_FLOAT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan) {
+ tmp = ac_to_float(ctx, loads[chan]);
+ if (log_size == 3)
+ tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
+ else if (log_size == 1)
+ tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
+ loads[chan] = ac_to_integer(ctx, tmp);
+ }
+ }
+ } else if (format == AC_FETCH_FORMAT_UINT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan)
+ loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
+ }
+ } else if (format == AC_FETCH_FORMAT_SINT) {
+ if (log_size != 2) {
+ for (unsigned chan = 0; chan < num_channels; ++chan)
+ loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
+ }
+ } else {
+ bool unsign = format == AC_FETCH_FORMAT_UNORM ||
+ format == AC_FETCH_FORMAT_USCALED ||
+ format == AC_FETCH_FORMAT_UINT;
+
+ for (unsigned chan = 0; chan < num_channels; ++chan) {
+ if (unsign) {
+ tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
+ } else {
+ tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
+ }
+
+ LLVMValueRef scale = NULL;
+ if (format == AC_FETCH_FORMAT_FIXED) {
+ assert(log_size == 2);
+ scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
+ } else if (format == AC_FETCH_FORMAT_UNORM) {
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
+ scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
+ } else if (format == AC_FETCH_FORMAT_SNORM) {
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(loads[chan]));
+ scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
+ }
+ if (scale)
+ tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
+
+ if (format == AC_FETCH_FORMAT_SNORM) {
+ /* Clamp to [-1, 1] */
+ LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
+ LLVMValueRef clamp =
+ LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
+ tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
+ }
+
+ loads[chan] = ac_to_integer(ctx, tmp);
+ }
+ }
+
+ while (num_channels < 4) {
+ if (format == AC_FETCH_FORMAT_UINT || format == AC_FETCH_FORMAT_SINT) {
+ loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
+ } else {
+ loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
+ }
+ num_channels++;
+ }
+
+ if (reverse) {
+ tmp = loads[0];
+ loads[0] = loads[2];
+ loads[2] = tmp;
+ }
+
+ return ac_build_gather_values(ctx, loads, 4);
+}
+
+static void
+ac_build_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool structurized)
+{
+ voffset = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
+ immoffset, "");
+
+ LLVMValueRef args[7];
+ int idx = 0;
+ args[idx++] = vdata;
+ args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+ if (structurized)
+ args[idx++] = vindex ? vindex : ctx->i32_0;
+ args[idx++] = voffset ? voffset : ctx->i32_0;
+ args[idx++] = soffset ? soffset : ctx->i32_0;
+ args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->chip_class, dfmt, nfmt), 0);
+ args[idx++] = LLVMConstInt(ctx->i32, cache_policy, 0);
+ unsigned func = !ac_has_vec3_support(ctx->chip_class, true) && num_channels == 3 ? 4 : num_channels;
+ const char *indexing_kind = structurized ? "struct" : "raw";
+ char name[256], type_name[8];
+
+ LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
+ ac_build_type_name_for_intr(type, type_name, sizeof(type_name));
+
+ snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.store.%s",
+ indexing_kind, type_name);
+
+ ac_build_intrinsic(ctx, name, ctx->voidt, args, idx,
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
+}
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, vindex, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, cache_policy,
+ true);
+}
+
+void
+ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy)
+{
+ ac_build_tbuffer_store(ctx, rsrc, vdata, NULL, voffset, soffset,
+ immoffset, num_channels, dfmt, nfmt, cache_policy,
+ false);
+}
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy)
+{
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
+ voffset, soffset, 1,
+ ctx->i16, cache_policy,
+ false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ ctx->i32_0, 1, dfmt, nfmt, cache_policy);
+ }
+}
+
+void
+ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy)
+{
+ vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* LLVM 9+ supports i8/i16 with struct/raw intrinsics. */
+ ac_build_buffer_store_common(ctx, rsrc, vdata, NULL,
+ voffset, soffset, 1,
+ ctx->i8, cache_policy,
+ false, false);
+ } else {
+ unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_8;
+ unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+
+ vdata = LLVMBuildZExt(ctx->builder, vdata, ctx->i32, "");
+
+ ac_build_raw_tbuffer_store(ctx, rsrc, vdata, voffset, soffset,
+ ctx->i32_0, 1, dfmt, nfmt, cache_policy);
+ }
+}
+/**
+ * Set range metadata on an instruction. This can only be used on load and
+ * call instructions. If you know an instruction can only produce the values
+ * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
+ * \p lo is the minimum value inclusive.
+ * \p hi is the maximum value exclusive.
+ */
+static void set_range_metadata(struct ac_llvm_context *ctx,
+ LLVMValueRef value, unsigned lo, unsigned hi)
+{
+ LLVMValueRef range_md, md_args[2];
+ LLVMTypeRef type = LLVMTypeOf(value);
+ LLVMContextRef context = LLVMGetTypeContext(type);
+
+ md_args[0] = LLVMConstInt(type, lo, false);
+ md_args[1] = LLVMConstInt(type, hi, false);
+ range_md = LLVMMDNodeInContext(context, md_args, 2);
+ LLVMSetMetadata(value, ctx->range_md_kind, range_md);
+}
+
+LLVMValueRef
+ac_get_thread_id(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef tid;
+
+ LLVMValueRef tid_args[2];
+ tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
+ tid_args[1] = ctx->i32_0;
+ tid_args[1] = ac_build_intrinsic(ctx,
+ "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ tid_args, 2, AC_FUNC_ATTR_READNONE);
+
+ if (ctx->wave_size == 32) {
+ tid = tid_args[1];
+ } else {
+ tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
+ ctx->i32, tid_args,
+ 2, AC_FUNC_ATTR_READNONE);
+ }
+ set_range_metadata(ctx, tid, 0, ctx->wave_size);
+ return tid;
+}
+
+/*
+ * AMD GCN implements derivatives using the local data store (LDS)
+ * All writes to the LDS happen in all executing threads at
+ * the same time. TID is the Thread ID for the current
+ * thread and is a value between 0 and 63, representing
+ * the thread's position in the wavefront.
+ *
+ * For the pixel shader threads are grouped into quads of four pixels.
+ * The TIDs of the pixels of a quad are:
+ *
+ * +------+------+
+ * |4n + 0|4n + 1|
+ * +------+------+
+ * |4n + 2|4n + 3|
+ * +------+------+
+ *
+ * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
+ * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
+ * the current pixel's column, and masking with 0xfffffffe yields the TID
+ * of the left pixel of the current pixel's row.
+ *
+ * Adding 1 yields the TID of the pixel to the right of the left pixel, and
+ * adding 2 yields the TID of the pixel below the top pixel.
+ */
+LLVMValueRef
+ac_build_ddxy(struct ac_llvm_context *ctx,
+ uint32_t mask,
+ int idx,
+ LLVMValueRef val)
+{
+ unsigned tl_lanes[4], trbl_lanes[4];
+ char name[32], type[8];
+ LLVMValueRef tl, trbl;
+ LLVMTypeRef result_type;
+ LLVMValueRef result;
+
+ result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
+
+ if (result_type == ctx->f16)
+ val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
+
+ for (unsigned i = 0; i < 4; ++i) {
+ tl_lanes[i] = i & mask;
+ trbl_lanes[i] = (i & mask) + idx;
+ }
+
+ tl = ac_build_quad_swizzle(ctx, val,
+ tl_lanes[0], tl_lanes[1],
+ tl_lanes[2], tl_lanes[3]);
+ trbl = ac_build_quad_swizzle(ctx, val,
+ trbl_lanes[0], trbl_lanes[1],
+ trbl_lanes[2], trbl_lanes[3]);
+
+ if (result_type == ctx->f16) {
+ tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
+ trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
+ }
+
+ tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
+ trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
+ result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
+
+ ac_build_type_name_for_intr(result_type, type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wqm.%s", type);
+
+ return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
+}
+
+void
+ac_build_sendmsg(struct ac_llvm_context *ctx,
+ uint32_t msg,
+ LLVMValueRef wave_id)
+{
+ LLVMValueRef args[2];
+ args[0] = LLVMConstInt(ctx->i32, msg, false);
+ args[1] = wave_id;
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
+}
+
+LLVMValueRef
+ac_build_imsb(struct ac_llvm_context *ctx,
+ LLVMValueRef arg,
+ LLVMTypeRef dst_type)
+{
+ LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32",
+ dst_type, &arg, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ /* The HW returns the last bit index from MSB, but NIR/TGSI wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
+ msb, "");
+
+ LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
+ LLVMValueRef cond = LLVMBuildOr(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ arg, ctx->i32_0, ""),
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ,
+ arg, all_ones, ""), "");
+
+ return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
+}
+
+LLVMValueRef
+ac_build_umsb(struct ac_llvm_context *ctx,
+ LLVMValueRef arg,
+ LLVMTypeRef dst_type)
+{
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef highest_bit;
+ LLVMValueRef zero;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+ switch (bitsize) {
+ case 64:
+ intrin_name = "llvm.ctlz.i64";
+ type = ctx->i64;
+ highest_bit = LLVMConstInt(ctx->i64, 63, false);
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.ctlz.i32";
+ type = ctx->i32;
+ highest_bit = LLVMConstInt(ctx->i32, 31, false);
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.ctlz.i16";
+ type = ctx->i16;
+ highest_bit = LLVMConstInt(ctx->i16, 15, false);
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.ctlz.i8";
+ type = ctx->i8;
+ highest_bit = LLVMConstInt(ctx->i8, 7, false);
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ LLVMValueRef params[2] = {
+ arg,
+ ctx->i1true,
+ };
+
+ LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type,
+ params, 2,
+ AC_FUNC_ATTR_READNONE);
+
+ /* The HW returns the last bit index from MSB, but TGSI/NIR wants
+ * the index from LSB. Invert it by doing "31 - msb". */
+ msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
+
+ if (bitsize == 64) {
+ msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
+ } else if (bitsize < 32) {
+ msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
+ }
+
+ /* check for zero */
+ return LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
+ LLVMConstInt(ctx->i32, -1, true), msb, "");
+}
+
+LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ char name[64];
+ snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
+ LLVMValueRef args[2] = {a, b};
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ char name[64];
+ snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
+ LLVMValueRef args[2] = {a, b};
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
+ return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
+}
+
+LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+ LLVMTypeRef t = LLVMTypeOf(value);
+ return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
+ LLVMConstReal(t, 1.0));
+}
+
+void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
+{
+ LLVMValueRef args[9];
+
+ args[0] = LLVMConstInt(ctx->i32, a->target, 0);
+ args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
+
+ if (a->compr) {
+ LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
+ LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
+
+ args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
+ v2i16, "");
+ args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
+ v2i16, "");
+ args[4] = LLVMConstInt(ctx->i1, a->done, 0);
+ args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
+
+ ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
+ ctx->voidt, args, 6, 0);
+ } else {
+ args[2] = a->out[0];
+ args[3] = a->out[1];
+ args[4] = a->out[2];
+ args[5] = a->out[3];
+ args[6] = LLVMConstInt(ctx->i1, a->done, 0);
+ args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
+
+ ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
+ ctx->voidt, args, 8, 0);
+ }
+}
+
+void ac_build_export_null(struct ac_llvm_context *ctx)
+{
+ struct ac_export_args args;
+
+ args.enabled_channels = 0x0; /* enabled channels */
+ args.valid_mask = 1; /* whether the EXEC mask is valid */
+ args.done = 1; /* DONE bit */
+ args.target = V_008DFC_SQ_EXP_NULL;
+ args.compr = 0; /* COMPR flag (0 = 32-bit export) */
+ args.out[0] = LLVMGetUndef(ctx->f32); /* R */
+ args.out[1] = LLVMGetUndef(ctx->f32); /* G */
+ args.out[2] = LLVMGetUndef(ctx->f32); /* B */
+ args.out[3] = LLVMGetUndef(ctx->f32); /* A */
+
+ ac_build_export(ctx, &args);
+}
+
+static unsigned ac_num_coords(enum ac_image_dim dim)
+{
+ switch (dim) {
+ case ac_image_1d:
+ return 1;
+ case ac_image_2d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_3d:
+ case ac_image_cube:
+ case ac_image_2darray:
+ case ac_image_2dmsaa:
+ return 3;
+ case ac_image_2darraymsaa:
+ return 4;
+ default:
+ unreachable("ac_num_coords: bad dim");
+ }
+}
+
+static unsigned ac_num_derivs(enum ac_image_dim dim)
+{
+ switch (dim) {
+ case ac_image_1d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_2d:
+ case ac_image_2darray:
+ case ac_image_cube:
+ return 4;
+ case ac_image_3d:
+ return 6;
+ case ac_image_2dmsaa:
+ case ac_image_2darraymsaa:
+ default:
+ unreachable("derivatives not supported");
+ }
+}
+
+static const char *get_atomic_name(enum ac_atomic_op op)
+{
+ switch (op) {
+ case ac_atomic_swap: return "swap";
+ case ac_atomic_add: return "add";
+ case ac_atomic_sub: return "sub";
+ case ac_atomic_smin: return "smin";
+ case ac_atomic_umin: return "umin";
+ case ac_atomic_smax: return "smax";
+ case ac_atomic_umax: return "umax";
+ case ac_atomic_and: return "and";
+ case ac_atomic_or: return "or";
+ case ac_atomic_xor: return "xor";
+ case ac_atomic_inc_wrap: return "inc";
+ case ac_atomic_dec_wrap: return "dec";
+ }
+ unreachable("bad atomic op");
+}
+
+LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
+ struct ac_image_args *a)
+{
+ const char *overload[3] = { "", "", "" };
+ unsigned num_overloads = 0;
+ LLVMValueRef args[18];
+ unsigned num_args = 0;
+ enum ac_image_dim dim = a->dim;
+
+ assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
+ !a->level_zero);
+ assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip &&
+ a->opcode != ac_image_store_mip) ||
+ a->lod);
+ assert(a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ (!a->compare && !a->offset));
+ assert((a->opcode == ac_image_sample || a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_get_lod) ||
+ !a->bias);
+ assert((a->bias ? 1 : 0) +
+ (a->lod ? 1 : 0) +
+ (a->level_zero ? 1 : 0) +
+ (a->derivs[0] ? 1 : 0) <= 1);
+
+ if (a->opcode == ac_image_get_lod) {
+ switch (dim) {
+ case ac_image_1darray:
+ dim = ac_image_1d;
+ break;
+ case ac_image_2darray:
+ case ac_image_cube:
+ dim = ac_image_2d;
+ break;
+ default:
+ break;
+ }
+ }
+
+ bool sample = a->opcode == ac_image_sample ||
+ a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_get_lod;
+ bool atomic = a->opcode == ac_image_atomic ||
+ a->opcode == ac_image_atomic_cmpswap;
+ bool load = a->opcode == ac_image_sample ||
+ a->opcode == ac_image_gather4 ||
+ a->opcode == ac_image_load ||
+ a->opcode == ac_image_load_mip;
+ LLVMTypeRef coord_type = sample ? ctx->f32 : ctx->i32;
+
+ if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
+ args[num_args++] = a->data[0];
+ if (a->opcode == ac_image_atomic_cmpswap)
+ args[num_args++] = a->data[1];
+ }
+
+ if (!atomic)
+ args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, false);
+
+ if (a->offset)
+ args[num_args++] = ac_to_integer(ctx, a->offset);
+ if (a->bias) {
+ args[num_args++] = ac_to_float(ctx, a->bias);
+ overload[num_overloads++] = ".f32";
+ }
+ if (a->compare)
+ args[num_args++] = ac_to_float(ctx, a->compare);
+ if (a->derivs[0]) {
+ unsigned count = ac_num_derivs(dim);
+ for (unsigned i = 0; i < count; ++i)
+ args[num_args++] = ac_to_float(ctx, a->derivs[i]);
+ overload[num_overloads++] = ".f32";
+ }
+ unsigned num_coords =
+ a->opcode != ac_image_get_resinfo ? ac_num_coords(dim) : 0;
+ for (unsigned i = 0; i < num_coords; ++i)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
+ if (a->lod)
+ args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
+ overload[num_overloads++] = sample ? ".f32" : ".i32";
+
+ args[num_args++] = a->resource;
+ if (sample) {
+ args[num_args++] = a->sampler;
+ args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
+ }
+
+ args[num_args++] = ctx->i32_0; /* texfailctrl */
+ args[num_args++] = LLVMConstInt(ctx->i32,
+ load ? get_load_cache_policy(ctx, a->cache_policy) :
+ a->cache_policy, false);
+
+ const char *name;
+ const char *atomic_subop = "";
+ switch (a->opcode) {
+ case ac_image_sample: name = "sample"; break;
+ case ac_image_gather4: name = "gather4"; break;
+ case ac_image_load: name = "load"; break;
+ case ac_image_load_mip: name = "load.mip"; break;
+ case ac_image_store: name = "store"; break;
+ case ac_image_store_mip: name = "store.mip"; break;
+ case ac_image_atomic:
+ name = "atomic.";
+ atomic_subop = get_atomic_name(a->atomic);
+ break;
+ case ac_image_atomic_cmpswap:
+ name = "atomic.";
+ atomic_subop = "cmpswap";
+ break;
+ case ac_image_get_lod: name = "getlod"; break;
+ case ac_image_get_resinfo: name = "getresinfo"; break;
+ default: unreachable("invalid image opcode");
+ }
+
+ const char *dimname;
+ switch (dim) {
+ case ac_image_1d: dimname = "1d"; break;
+ case ac_image_2d: dimname = "2d"; break;
+ case ac_image_3d: dimname = "3d"; break;
+ case ac_image_cube: dimname = "cube"; break;
+ case ac_image_1darray: dimname = "1darray"; break;
+ case ac_image_2darray: dimname = "2darray"; break;
+ case ac_image_2dmsaa: dimname = "2dmsaa"; break;
+ case ac_image_2darraymsaa: dimname = "2darraymsaa"; break;
+ default: unreachable("invalid dim");
+ }
+
+ bool lod_suffix =
+ a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
+ char intr_name[96];
+ snprintf(intr_name, sizeof(intr_name),
+ "llvm.amdgcn.image.%s%s" /* base name */
+ "%s%s%s" /* sample/gather modifiers */
+ ".%s.%s%s%s%s", /* dimension and type overloads */
+ name, atomic_subop,
+ a->compare ? ".c" : "",
+ a->bias ? ".b" :
+ lod_suffix ? ".l" :
+ a->derivs[0] ? ".d" :
+ a->level_zero ? ".lz" : "",
+ a->offset ? ".o" : "",
+ dimname,
+ atomic ? "i32" : "v4f32",
+ overload[0], overload[1], overload[2]);
+
+ LLVMTypeRef retty;
+ if (atomic)
+ retty = ctx->i32;
+ else if (a->opcode == ac_image_store || a->opcode == ac_image_store_mip)
+ retty = ctx->voidt;
+ else
+ retty = ctx->v4f32;
+
+ LLVMValueRef result =
+ ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
+ a->attributes);
+ if (!sample && retty == ctx->v4f32) {
+ result = LLVMBuildBitCast(ctx->builder, result,
+ ctx->v4i32, "");
+ }
+ return result;
+}
+
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc)
+{
+ LLVMValueRef samples;
+
+ /* Read the samples from the descriptor directly.
+ * Hardware doesn't have any instruction for this.
+ */
+ samples = LLVMBuildExtractElement(ctx->builder, rsrc,
+ LLVMConstInt(ctx->i32, 3, 0), "");
+ samples = LLVMBuildLShr(ctx->builder, samples,
+ LLVMConstInt(ctx->i32, 16, 0), "");
+ samples = LLVMBuildAnd(ctx->builder, samples,
+ LLVMConstInt(ctx->i32, 0xf, 0), "");
+ samples = LLVMBuildShl(ctx->builder, ctx->i32_1,
+ samples, "");
+ return samples;
+}
+
+LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ LLVMTypeRef v2f16 =
+ LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
+
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", v2f16,
+ args, 2, AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2])
+{
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
+ LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : ctx->i32_1;
+ LLVMValueRef min_alpha =
+ bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
+
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_imin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ args[i] = ac_build_imax(ctx, args[i],
+ alpha ? min_alpha : min_rgb);
+ }
+ }
+
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+/* The 8-bit and 10-bit clamping is for HW workarounds. */
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi)
+{
+ assert(bits == 8 || bits == 10 || bits == 16);
+
+ LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+ bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
+ LLVMValueRef max_alpha =
+ bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
+
+ /* Clamp. */
+ if (bits != 16) {
+ for (int i = 0; i < 2; i++) {
+ bool alpha = hi && i == 1;
+ args[i] = ac_build_umin(ctx, args[i],
+ alpha ? max_alpha : max_rgb);
+ }
+ }
+
+ LLVMValueRef res =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
+ ctx->v2i16, args, 2,
+ AC_FUNC_ATTR_READNONE);
+ return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
+}
+
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
+ &i1, 1, AC_FUNC_ATTR_READNONE);
+}
+
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
+{
+ ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
+ &i1, 1, 0);
+}
+
+LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
+ LLVMValueRef offset, LLVMValueRef width,
+ bool is_signed)
+{
+ LLVMValueRef args[] = {
+ input,
+ offset,
+ width,
+ };
+
+ return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" :
+ "llvm.amdgcn.ubfe.i32",
+ ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
+
+}
+
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2)
+{
+ return LLVMBuildAdd(ctx->builder,
+ LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2)
+{
+ /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
+ if (ctx->chip_class >= GFX10) {
+ return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32,
+ (LLVMValueRef []) {s0, s1, s2}, 3,
+ AC_FUNC_ATTR_READNONE);
+ }
+
+ return LLVMBuildFAdd(ctx->builder,
+ LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
+{
+ if (!wait_flags)
+ return;
+
+ unsigned lgkmcnt = 63;
+ unsigned vmcnt = ctx->chip_class >= GFX9 ? 63 : 15;
+ unsigned vscnt = 63;
+
+ if (wait_flags & AC_WAIT_LGKM)
+ lgkmcnt = 0;
+ if (wait_flags & AC_WAIT_VLOAD)
+ vmcnt = 0;
+
+ if (wait_flags & AC_WAIT_VSTORE) {
+ if (ctx->chip_class >= GFX10)
+ vscnt = 0;
+ else
+ vmcnt = 0;
+ }
+
+ /* There is no intrinsic for vscnt(0), so use a fence. */
+ if ((wait_flags & AC_WAIT_LGKM &&
+ wait_flags & AC_WAIT_VLOAD &&
+ wait_flags & AC_WAIT_VSTORE) ||
+ vscnt == 0) {
+ LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
+ return;
+ }
+
+ unsigned simm16 = (lgkmcnt << 8) |
+ (7 << 4) | /* expcnt */
+ (vmcnt & 0xf) |
+ ((vmcnt >> 4) << 14);
+
+ LLVMValueRef args[1] = {
+ LLVMConstInt(ctx->i32, simm16, false),
+ };
+ ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
+ ctx->voidt, args, 1, 0);
+}
+
+LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fmed3.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.fmed3.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.fmed3.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ src1,
+ src2,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 3,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.fract.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.fract.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.fract.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+
+LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize);
+ LLVMValueRef zero = LLVMConstInt(type, 0, false);
+ LLVMValueRef one = LLVMConstInt(type, 1, false);
+
+ LLVMValueRef cmp, val;
+ cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
+ val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
+ cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
+ val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
+ return val;
+}
+
+LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMValueRef cmp, val, zero, one;
+ LLVMTypeRef type;
+
+ if (bitsize == 16) {
+ type = ctx->f16;
+ zero = ctx->f16_0;
+ one = ctx->f16_1;
+ } else if (bitsize == 32) {
+ type = ctx->f32;
+ zero = ctx->f32_0;
+ one = ctx->f32_1;
+ } else {
+ type = ctx->f64;
+ zero = ctx->f64_0;
+ one = ctx->f64_1;
+ }
+
+ cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
+ val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
+ cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
+ val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
+ return val;
+}
+
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 64:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ unsigned bitsize;
+
+ bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+ switch (bitsize) {
+ case 64:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+ break;
+ case 32:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+ break;
+ case 16:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ case 8:
+ result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8,
+ (LLVMValueRef []) { src0 }, 1,
+ AC_FUNC_ATTR_READNONE);
+
+ result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ break;
+ }
+
+ return result;
+}
+
+#define AC_EXP_TARGET 0
+#define AC_EXP_ENABLED_CHANNELS 1
+#define AC_EXP_OUT0 2
+
+enum ac_ir_type {
+ AC_IR_UNDEF,
+ AC_IR_CONST,
+ AC_IR_VALUE,
+};
+
+struct ac_vs_exp_chan
+{
+ LLVMValueRef value;
+ float const_float;
+ enum ac_ir_type type;
+};
+
+struct ac_vs_exp_inst {
+ unsigned offset;
+ LLVMValueRef inst;
+ struct ac_vs_exp_chan chan[4];
+};
+
+struct ac_vs_exports {
+ unsigned num;
+ struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
+};
+
+/* Return true if the PARAM export has been eliminated. */
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+ bool is_zero[4] = {}, is_one[4] = {};
+
+ for (i = 0; i < 4; i++) {
+ /* It's a constant expression. Undef outputs are eliminated too. */
+ if (exp->chan[i].type == AC_IR_UNDEF) {
+ is_zero[i] = true;
+ is_one[i] = true;
+ } else if (exp->chan[i].type == AC_IR_CONST) {
+ if (exp->chan[i].const_float == 0)
+ is_zero[i] = true;
+ else if (exp->chan[i].const_float == 1)
+ is_one[i] = true;
+ else
+ return false; /* other constant */
+ } else
+ return false;
+ }
+
+ /* Only certain combinations of 0 and 1 can be eliminated. */
+ if (is_zero[0] && is_zero[1] && is_zero[2])
+ default_val = is_zero[3] ? 0 : 1;
+ else if (is_one[0] && is_one[1] && is_one[2])
+ default_val = is_zero[3] ? 2 : 3;
+ else
+ return false;
+
+ /* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to DEFAULT_VAL. */
+ for (i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] =
+ AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+ break;
+ }
+ }
+ return true;
+}
+
+static bool ac_eliminate_duplicated_output(struct ac_llvm_context *ctx,
+ uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ struct ac_vs_exports *processed,
+ struct ac_vs_exp_inst *exp)
+{
+ unsigned p, copy_back_channels = 0;
+
+ /* See if the output is already in the list of processed outputs.
+ * The LLVMValueRef comparison relies on SSA.
+ */
+ for (p = 0; p < processed->num; p++) {
+ bool different = false;
+
+ for (unsigned j = 0; j < 4; j++) {
+ struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
+ struct ac_vs_exp_chan *c2 = &exp->chan[j];
+
+ /* Treat undef as a match. */
+ if (c2->type == AC_IR_UNDEF)
+ continue;
+
+ /* If c1 is undef but c2 isn't, we can copy c2 to c1
+ * and consider the instruction duplicated.
+ */
+ if (c1->type == AC_IR_UNDEF) {
+ copy_back_channels |= 1 << j;
+ continue;
+ }
+
+ /* Test whether the channels are not equal. */
+ if (c1->type != c2->type ||
+ (c1->type == AC_IR_CONST &&
+ c1->const_float != c2->const_float) ||
+ (c1->type == AC_IR_VALUE &&
+ c1->value != c2->value)) {
+ different = true;
+ break;
+ }
+ }
+ if (!different)
+ break;
+
+ copy_back_channels = 0;
+ }
+ if (p == processed->num)
+ return false;
+
+ /* If a match was found, but the matching export has undef where the new
+ * one has a normal value, copy the normal value to the undef channel.
+ */
+ struct ac_vs_exp_inst *match = &processed->exp[p];
+
+ /* Get current enabled channels mask. */
+ LLVMValueRef arg = LLVMGetOperand(match->inst, AC_EXP_ENABLED_CHANNELS);
+ unsigned enabled_channels = LLVMConstIntGetZExtValue(arg);
+
+ while (copy_back_channels) {
+ unsigned chan = u_bit_scan(©_back_channels);
+
+ assert(match->chan[chan].type == AC_IR_UNDEF);
+ LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
+ exp->chan[chan].value);
+ match->chan[chan] = exp->chan[chan];
+
+ /* Update number of enabled channels because the original mask
+ * is not always 0xf.
+ */
+ enabled_channels |= (1 << chan);
+ LLVMSetOperand(match->inst, AC_EXP_ENABLED_CHANNELS,
+ LLVMConstInt(ctx->i32, enabled_channels, 0));
+ }
+
+ /* The PARAM export is duplicated. Kill it. */
+ LLVMInstructionEraseFromParent(exp->inst);
+
+ /* Change OFFSET to the matching export. */
+ for (unsigned i = 0; i < num_outputs; i++) {
+ if (vs_output_param_offset[i] == exp->offset) {
+ vs_output_param_offset[i] = match->offset;
+ break;
+ }
+ }
+ return true;
+}
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
+ LLVMValueRef main_fn,
+ uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ uint8_t *num_param_exports)
+{
+ LLVMBasicBlockRef bb;
+ bool removed_any = false;
+ struct ac_vs_exports exports;
+
+ exports.num = 0;
+
+ /* Process all LLVM instructions. */
+ bb = LLVMGetFirstBasicBlock(main_fn);
+ while (bb) {
+ LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+ while (inst) {
+ LLVMValueRef cur = inst;
+ inst = LLVMGetNextInstruction(inst);
+ struct ac_vs_exp_inst exp;
+
+ if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+ continue;
+
+ LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+ if (!ac_llvm_is_function(callee))
+ continue;
+
+ const char *name = LLVMGetValueName(callee);
+ unsigned num_args = LLVMCountParams(callee);
+
+ /* Check if this is an export instruction. */
+ if ((num_args != 9 && num_args != 8) ||
+ (strcmp(name, "llvm.SI.export") &&
+ strcmp(name, "llvm.amdgcn.exp.f32")))
+ continue;
+
+ LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+ unsigned target = LLVMConstIntGetZExtValue(arg);
+
+ if (target < V_008DFC_SQ_EXP_PARAM)
+ continue;
+
+ target -= V_008DFC_SQ_EXP_PARAM;
+
+ /* Parse the instruction. */
+ memset(&exp, 0, sizeof(exp));
+ exp.offset = target;
+ exp.inst = cur;
+
+ for (unsigned i = 0; i < 4; i++) {
+ LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
+
+ exp.chan[i].value = v;
+
+ if (LLVMIsUndef(v)) {
+ exp.chan[i].type = AC_IR_UNDEF;
+ } else if (LLVMIsAConstantFP(v)) {
+ LLVMBool loses_info;
+ exp.chan[i].type = AC_IR_CONST;
+ exp.chan[i].const_float =
+ LLVMConstRealGetDouble(v, &loses_info);
+ } else {
+ exp.chan[i].type = AC_IR_VALUE;
+ }
+ }
+
+ /* Eliminate constant and duplicated PARAM exports. */
+ if (ac_eliminate_const_output(vs_output_param_offset,
+ num_outputs, &exp) ||
+ ac_eliminate_duplicated_output(ctx,
+ vs_output_param_offset,
+ num_outputs, &exports,
+ &exp)) {
+ removed_any = true;
+ } else {
+ exports.exp[exports.num++] = exp;
+ }
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ /* Remove holes in export memory due to removed PARAM exports.
+ * This is done by renumbering all PARAM exports.
+ */
+ if (removed_any) {
+ uint8_t old_offset[VARYING_SLOT_MAX];
+ unsigned out, i;
+
+ /* Make a copy of the offsets. We need the old version while
+ * we are modifying some of them. */
+ memcpy(old_offset, vs_output_param_offset,
+ sizeof(old_offset));
+
+ for (i = 0; i < exports.num; i++) {
+ unsigned offset = exports.exp[i].offset;
+
+ /* Update vs_output_param_offset. Multiple outputs can
+ * have the same offset.
+ */
+ for (out = 0; out < num_outputs; out++) {
+ if (old_offset[out] == offset)
+ vs_output_param_offset[out] = i;
+ }
+
+ /* Change the PARAM offset in the instruction. */
+ LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
+ LLVMConstInt(ctx->i32,
+ V_008DFC_SQ_EXP_PARAM + i, 0));
+ }
+ *num_param_exports = exports.num;
+ }
+}
+
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
+ ac_build_intrinsic(ctx,
+ "llvm.amdgcn.init.exec", ctx->voidt,
+ &full_mask, 1, AC_FUNC_ATTR_CONVERGENT);
+}
+
+void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
+{
+ unsigned lds_size = ctx->chip_class >= GFX7 ? 65536 : 32768;
+ ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
+ LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
+ "lds");
+}
+
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr)
+{
+ return LLVMBuildLoad(ctx->builder, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
+}
+
+void ac_lds_store(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr,
+ LLVMValueRef value)
+{
+ value = ac_to_integer(ctx, value);
+ ac_build_indexed_store(ctx, ctx->lds,
+ dw_addr, value);
+}
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
+ LLVMTypeRef dst_type,
+ LLVMValueRef src0)
+{
+ unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+ const char *intrin_name;
+ LLVMTypeRef type;
+ LLVMValueRef zero;
+
+ switch (src0_bitsize) {
+ case 64:
+ intrin_name = "llvm.cttz.i64";
+ type = ctx->i64;
+ zero = ctx->i64_0;
+ break;
+ case 32:
+ intrin_name = "llvm.cttz.i32";
+ type = ctx->i32;
+ zero = ctx->i32_0;
+ break;
+ case 16:
+ intrin_name = "llvm.cttz.i16";
+ type = ctx->i16;
+ zero = ctx->i16_0;
+ break;
+ case 8:
+ intrin_name = "llvm.cttz.i8";
+ type = ctx->i8;
+ zero = ctx->i8_0;
+ break;
+ default:
+ unreachable(!"invalid bitsize");
+ }
+
+ LLVMValueRef params[2] = {
+ src0,
+
+ /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
+ * add special code to check for x=0. The reason is that
+ * the LLVM behavior for x=0 is different from what we
+ * need here. However, LLVM also assumes that ffs(x) is
+ * in [0, 31], but GLSL expects that ffs(0) = -1, so
+ * a conditional assignment to handle 0 is still required.
+ *
+ * The hardware already implements the correct behavior.
+ */
+ ctx->i1true,
+ };
+
+ LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
+ params, 2,
+ AC_FUNC_ATTR_READNONE);
+
+ if (src0_bitsize == 64) {
+ lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
+ } else if (src0_bitsize < 32) {
+ lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
+ }
+
+ /* TODO: We need an intrinsic to skip this conditional. */
+ /* Check for zero: */
+ return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+ LLVMIntEQ, src0,
+ zero, ""),
+ LLVMConstInt(ctx->i32, -1, 0), lsb, "");
+}
+
+LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
+{
+ return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST);
+}
+
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
+{
+ return LLVMPointerType(elem_type, AC_ADDR_SPACE_CONST_32BIT);
+}
+
+static struct ac_llvm_flow *
+get_current_flow(struct ac_llvm_context *ctx)
+{
+ if (ctx->flow->depth > 0)
+ return &ctx->flow->stack[ctx->flow->depth - 1];
+ return NULL;
+}
+
+static struct ac_llvm_flow *
+get_innermost_loop(struct ac_llvm_context *ctx)
+{
+ for (unsigned i = ctx->flow->depth; i > 0; --i) {
+ if (ctx->flow->stack[i - 1].loop_entry_block)
+ return &ctx->flow->stack[i - 1];
+ }
+ return NULL;
+}
+
+static struct ac_llvm_flow *
+push_flow(struct ac_llvm_context *ctx)
+{
+ struct ac_llvm_flow *flow;
+
+ if (ctx->flow->depth >= ctx->flow->depth_max) {
+ unsigned new_max = MAX2(ctx->flow->depth << 1,
+ AC_LLVM_INITIAL_CF_DEPTH);
+
+ ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
+ ctx->flow->depth_max = new_max;
+ }
+
+ flow = &ctx->flow->stack[ctx->flow->depth];
+ ctx->flow->depth++;
+
+ flow->next_block = NULL;
+ flow->loop_entry_block = NULL;
+ return flow;
+}
+
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base,
+ int label_id)
+{
+ char buf[32];
+ snprintf(buf, sizeof(buf), "%s%d", base, label_id);
+ LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+}
+
+/* Append a basic block at the level of the parent flow.
+ */
+static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx,
+ const char *name)
+{
+ assert(ctx->flow->depth >= 1);
+
+ if (ctx->flow->depth >= 2) {
+ struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
+
+ return LLVMInsertBasicBlockInContext(ctx->context,
+ flow->next_block, name);
+ }
+
+ LLVMValueRef main_fn =
+ LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
+ return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
+}
+
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder,
+ LLVMBasicBlockRef target)
+{
+ if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+ LLVMBuildBr(builder, target);
+}
+
+void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
+{
+ struct ac_llvm_flow *flow = push_flow(ctx);
+ flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+ flow->next_block = append_basic_block(ctx, "ENDLOOP");
+ set_basicblock_name(flow->loop_entry_block, "loop", label_id);
+ LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
+}
+
+void ac_build_break(struct ac_llvm_context *ctx)
+{
+ struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+ LLVMBuildBr(ctx->builder, flow->next_block);
+}
+
+void ac_build_continue(struct ac_llvm_context *ctx)
+{
+ struct ac_llvm_flow *flow = get_innermost_loop(ctx);
+ LLVMBuildBr(ctx->builder, flow->loop_entry_block);
+}
+
+void ac_build_else(struct ac_llvm_context *ctx, int label_id)
+{
+ struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+ LLVMBasicBlockRef endif_block;
+
+ assert(!current_branch->loop_entry_block);
+
+ endif_block = append_basic_block(ctx, "ENDIF");
+ emit_default_branch(ctx->builder, endif_block);
+
+ LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "else", label_id);
+
+ current_branch->next_block = endif_block;
+}
+
+void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
+{
+ struct ac_llvm_flow *current_branch = get_current_flow(ctx);
+
+ assert(!current_branch->loop_entry_block);
+
+ emit_default_branch(ctx->builder, current_branch->next_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "endif", label_id);
+
+ ctx->flow->depth--;
+}
+
+void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
+{
+ struct ac_llvm_flow *current_loop = get_current_flow(ctx);
+
+ assert(current_loop->loop_entry_block);
+
+ emit_default_branch(ctx->builder, current_loop->loop_entry_block);
+
+ LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
+ set_basicblock_name(current_loop->next_block, "endloop", label_id);
+ ctx->flow->depth--;
+}
+
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
+{
+ struct ac_llvm_flow *flow = push_flow(ctx);
+ LLVMBasicBlockRef if_block;
+
+ if_block = append_basic_block(ctx, "IF");
+ flow->next_block = append_basic_block(ctx, "ELSE");
+ set_basicblock_name(if_block, "if", label_id);
+ LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
+ LLVMPositionBuilderAtEnd(ctx->builder, if_block);
+}
+
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int label_id)
+{
+ LLVMValueRef cond = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+ value, ctx->f32_0, "");
+ ac_build_ifcc(ctx, cond, label_id);
+}
+
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int label_id)
+{
+ LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
+ ac_to_integer(ctx, value),
+ ctx->i32_0, "");
+ ac_build_ifcc(ctx, cond, label_id);
+}
+
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
+ const char *name)
+{
+ LLVMBuilderRef builder = ac->builder;
+ LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
+ LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
+ LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
+ LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
+ LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
+ LLVMValueRef res;
+
+ if (first_instr) {
+ LLVMPositionBuilderBefore(first_builder, first_instr);
+ } else {
+ LLVMPositionBuilderAtEnd(first_builder, first_block);
+ }
+
+ res = LLVMBuildAlloca(first_builder, type, name);
+ LLVMDisposeBuilder(first_builder);
+ return res;
+}
+
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
+ LLVMTypeRef type, const char *name)
+{
+ LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
+ LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
+ return ptr;
+}
+
+LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMTypeRef type)
+{
+ int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ return LLVMBuildBitCast(ctx->builder, ptr,
+ LLVMPointerType(type, addr_space), "");
+}
+
+LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned count)
+{
+ unsigned num_components = ac_get_llvm_num_components(value);
+ if (count == num_components)
+ return value;
+
+ LLVMValueRef masks[MAX2(count, 2)];
+ masks[0] = ctx->i32_0;
+ masks[1] = ctx->i32_1;
+ for (unsigned i = 2; i < count; i++)
+ masks[i] = LLVMConstInt(ctx->i32, i, false);
+
+ if (count == 1)
+ return LLVMBuildExtractElement(ctx->builder, value, masks[0],
+ "");
+
+ LLVMValueRef swizzle = LLVMConstVector(masks, count);
+ return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
+}
+
+LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
+ unsigned rshift, unsigned bitwidth)
+{
+ LLVMValueRef value = param;
+ if (rshift)
+ value = LLVMBuildLShr(ctx->builder, value,
+ LLVMConstInt(ctx->i32, rshift, false), "");
+
+ if (rshift + bitwidth < 32) {
+ unsigned mask = (1 << bitwidth) - 1;
+ value = LLVMBuildAnd(ctx->builder, value,
+ LLVMConstInt(ctx->i32, mask, false), "");
+ }
+ return value;
+}
+
+/* Adjust the sample index according to FMASK.
+ *
+ * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
+ * which is the identity mapping. Each nibble says which physical sample
+ * should be fetched to get that sample.
+ *
+ * For example, 0x11111100 means there are only 2 samples stored and
+ * the second sample covers 3/4 of the pixel. When reading samples 0
+ * and 1, return physical sample 0 (determined by the first two 0s
+ * in FMASK), otherwise return physical sample 1.
+ *
+ * The sample index should be adjusted as follows:
+ * addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
+ */
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
+ LLVMValueRef *addr, bool is_array_tex)
+{
+ struct ac_image_args fmask_load = {};
+ fmask_load.opcode = ac_image_load;
+ fmask_load.resource = fmask;
+ fmask_load.dmask = 0xf;
+ fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
+ fmask_load.attributes = AC_FUNC_ATTR_READNONE;
+
+ fmask_load.coords[0] = addr[0];
+ fmask_load.coords[1] = addr[1];
+ if (is_array_tex)
+ fmask_load.coords[2] = addr[2];
+
+ LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
+ fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
+ ac->i32_0, "");
+
+ /* Apply the formula. */
+ unsigned sample_chan = is_array_tex ? 3 : 2;
+ LLVMValueRef final_sample;
+ final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
+ LLVMConstInt(ac->i32, 4, 0), "");
+ final_sample = LLVMBuildLShr(ac->builder, fmask_value, final_sample, "");
+ /* Mask the sample index by 0x7, because 0x8 means an unknown value
+ * with EQAA, so those will map to 0. */
+ final_sample = LLVMBuildAnd(ac->builder, final_sample,
+ LLVMConstInt(ac->i32, 0x7, 0), "");
+
+ /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
+ * resource descriptor is 0 (invalid).
+ */
+ LLVMValueRef tmp;
+ tmp = LLVMBuildBitCast(ac->builder, fmask, ac->v8i32, "");
+ tmp = LLVMBuildExtractElement(ac->builder, tmp, ac->i32_1, "");
+ tmp = LLVMBuildICmp(ac->builder, LLVMIntNE, tmp, ac->i32_0, "");
+
+ /* Replace the MSAA sample index. */
+ addr[sample_chan] = LLVMBuildSelect(ac->builder, tmp, final_sample,
+ addr[sample_chan], "");
+}
+
+static LLVMValueRef
+_ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+ ac_build_optimization_barrier(ctx, &src);
+ return ac_build_intrinsic(ctx,
+ lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
+ LLVMTypeOf(src), (LLVMValueRef []) {
+ src, lane },
+ lane == NULL ? 1 : 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+/**
+ * Builds the "llvm.amdgcn.readlane" or "llvm.amdgcn.readfirstlane" intrinsic.
+ * @param ctx
+ * @param src
+ * @param lane - id of the lane or NULL for the first active lane
+ * @return value of the lane
+ */
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+
+ if (bits == 32) {
+ ret = _ac_build_readlane(ctx, src, lane);
+ } else {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector =
+ LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector,
+ LLVMConstInt(ctx->i32, i, 0), "");
+ LLVMValueRef ret_comp = _ac_build_readlane(ctx, src, lane);
+ ret = LLVMBuildInsertElement(ctx->builder, ret, ret_comp,
+ LLVMConstInt(ctx->i32, i, 0), "");
+ }
+ }
+ if (LLVMGetTypeKind(src_type) == LLVMPointerTypeKind)
+ return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane)
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
+ (LLVMValueRef []) {value, lane, src}, 3,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
+{
+ if (ctx->wave_size == 32) {
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef []) { mask, ctx->i32_0 },
+ 2, AC_FUNC_ATTR_READNONE);
+ }
+ LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask,
+ LLVMVectorType(ctx->i32, 2),
+ "");
+ LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec,
+ ctx->i32_0, "");
+ LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec,
+ ctx->i32_1, "");
+ LLVMValueRef val =
+ ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
+ (LLVMValueRef []) { mask_lo, ctx->i32_0 },
+ 2, AC_FUNC_ATTR_READNONE);
+ val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32,
+ (LLVMValueRef []) { mask_hi, val },
+ 2, AC_FUNC_ATTR_READNONE);
+ return val;
+}
+
+enum dpp_ctrl {
+ _dpp_quad_perm = 0x000,
+ _dpp_row_sl = 0x100,
+ _dpp_row_sr = 0x110,
+ _dpp_row_rr = 0x120,
+ dpp_wf_sl1 = 0x130,
+ dpp_wf_rl1 = 0x134,
+ dpp_wf_sr1 = 0x138,
+ dpp_wf_rr1 = 0x13C,
+ dpp_row_mirror = 0x140,
+ dpp_row_half_mirror = 0x141,
+ dpp_row_bcast15 = 0x142,
+ dpp_row_bcast31 = 0x143
+};
+
+static inline enum dpp_ctrl
+dpp_quad_perm(unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
+{
+ assert(lane0 < 4 && lane1 < 4 && lane2 < 4 && lane3 < 4);
+ return _dpp_quad_perm | lane0 | (lane1 << 2) | (lane2 << 4) | (lane3 << 6);
+}
+
+static inline enum dpp_ctrl
+dpp_row_sl(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sl | amount;
+}
+
+static inline enum dpp_ctrl
+dpp_row_sr(unsigned amount)
+{
+ assert(amount > 0 && amount < 16);
+ return _dpp_row_sr | amount;
+}
+
+static LLVMValueRef
+_ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.update.dpp.i32",
+ LLVMTypeOf(old),
+ (LLVMValueRef[]) {
+ old, src,
+ LLVMConstInt(ctx->i32, dpp_ctrl, 0),
+ LLVMConstInt(ctx->i32, row_mask, 0),
+ LLVMConstInt(ctx->i32, bank_mask, 0),
+ LLVMConstInt(ctx->i1, bound_ctrl, 0) },
+ 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+static LLVMValueRef
+ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
+ enum dpp_ctrl dpp_ctrl, unsigned row_mask, unsigned bank_mask,
+ bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ old = ac_to_integer(ctx, old);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits == 32) {
+ ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask,
+ bank_mask, bound_ctrl);
+ } else {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector =
+ LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ LLVMValueRef old_vector =
+ LLVMBuildBitCast(ctx->builder, old, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ old = LLVMBuildExtractElement(ctx->builder, old_vector,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ LLVMValueRef ret_comp = _ac_build_dpp(ctx, old, src,
+ dpp_ctrl,
+ row_mask,
+ bank_mask,
+ bound_ctrl);
+ ret = LLVMBuildInsertElement(ctx->builder, ret,
+ ret_comp,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ }
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef
+_ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMValueRef args[6] = {
+ src,
+ src,
+ LLVMConstInt(ctx->i32, sel, false),
+ LLVMConstInt(ctx->i32, sel >> 32, false),
+ ctx->i1true, /* fi */
+ bound_ctrl ? ctx->i1true : ctx->i1false,
+ };
+ return ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16"
+ : "llvm.amdgcn.permlane16",
+ ctx->i32, args, 6,
+ AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+static LLVMValueRef
+ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
+ bool exchange_rows, bool bound_ctrl)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits == 32) {
+ ret = _ac_build_permlane16(ctx, src, sel, exchange_rows,
+ bound_ctrl);
+ } else {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector =
+ LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ LLVMValueRef ret_comp =
+ _ac_build_permlane16(ctx, src, sel,
+ exchange_rows,
+ bound_ctrl);
+ ret = LLVMBuildInsertElement(ctx->builder, ret,
+ ret_comp,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ }
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static inline unsigned
+ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask)
+{
+ assert(and_mask < 32 && or_mask < 32 && xor_mask < 32);
+ return and_mask | (or_mask << 5) | (xor_mask << 10);
+}
+
+static LLVMValueRef
+_ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
+{
+ return ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle",
+ LLVMTypeOf(src), (LLVMValueRef []) {
+ src, LLVMConstInt(ctx->i32, mask, 0) },
+ 2, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
+{
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ unsigned bits = LLVMGetIntTypeWidth(LLVMTypeOf(src));
+ LLVMValueRef ret;
+ if (bits == 32) {
+ ret = _ac_build_ds_swizzle(ctx, src, mask);
+ } else {
+ assert(bits % 32 == 0);
+ LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
+ LLVMValueRef src_vector =
+ LLVMBuildBitCast(ctx->builder, src, vec_type, "");
+ ret = LLVMGetUndef(vec_type);
+ for (unsigned i = 0; i < bits / 32; i++) {
+ src = LLVMBuildExtractElement(ctx->builder, src_vector,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src,
+ mask);
+ ret = LLVMBuildInsertElement(ctx->builder, ret,
+ ret_comp,
+ LLVMConstInt(ctx->i32, i,
+ 0), "");
+ }
+ }
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef
+ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
+{
+ char name[32], type[8];
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.wwm.%s", type);
+ return ac_build_intrinsic(ctx, name, LLVMTypeOf(src),
+ (LLVMValueRef []) { src }, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef
+ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
+ LLVMValueRef inactive)
+{
+ char name[33], type[8];
+ LLVMTypeRef src_type = LLVMTypeOf(src);
+ src = ac_to_integer(ctx, src);
+ inactive = ac_to_integer(ctx, inactive);
+ ac_build_type_name_for_intr(LLVMTypeOf(src), type, sizeof(type));
+ snprintf(name, sizeof(name), "llvm.amdgcn.set.inactive.%s", type);
+ LLVMValueRef ret =
+ ac_build_intrinsic(ctx, name,
+ LLVMTypeOf(src), (LLVMValueRef []) {
+ src, inactive }, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+ return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
+}
+
+static LLVMValueRef
+get_reduction_identity(struct ac_llvm_context *ctx, nir_op op, unsigned type_size)
+{
+ if (type_size == 4) {
+ switch (op) {
+ case nir_op_iadd: return ctx->i32_0;
+ case nir_op_fadd: return ctx->f32_0;
+ case nir_op_imul: return ctx->i32_1;
+ case nir_op_fmul: return ctx->f32_1;
+ case nir_op_imin: return LLVMConstInt(ctx->i32, INT32_MAX, 0);
+ case nir_op_umin: return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
+ case nir_op_fmin: return LLVMConstReal(ctx->f32, INFINITY);
+ case nir_op_imax: return LLVMConstInt(ctx->i32, INT32_MIN, 0);
+ case nir_op_umax: return ctx->i32_0;
+ case nir_op_fmax: return LLVMConstReal(ctx->f32, -INFINITY);
+ case nir_op_iand: return LLVMConstInt(ctx->i32, -1, 0);
+ case nir_op_ior: return ctx->i32_0;
+ case nir_op_ixor: return ctx->i32_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ } else { /* type_size == 64bit */
+ switch (op) {
+ case nir_op_iadd: return ctx->i64_0;
+ case nir_op_fadd: return ctx->f64_0;
+ case nir_op_imul: return ctx->i64_1;
+ case nir_op_fmul: return ctx->f64_1;
+ case nir_op_imin: return LLVMConstInt(ctx->i64, INT64_MAX, 0);
+ case nir_op_umin: return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
+ case nir_op_fmin: return LLVMConstReal(ctx->f64, INFINITY);
+ case nir_op_imax: return LLVMConstInt(ctx->i64, INT64_MIN, 0);
+ case nir_op_umax: return ctx->i64_0;
+ case nir_op_fmax: return LLVMConstReal(ctx->f64, -INFINITY);
+ case nir_op_iand: return LLVMConstInt(ctx->i64, -1, 0);
+ case nir_op_ior: return ctx->i64_0;
+ case nir_op_ixor: return ctx->i64_0;
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+ }
+}
+
+static LLVMValueRef
+ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs, nir_op op)
+{
+ bool _64bit = ac_get_type_size(LLVMTypeOf(lhs)) == 8;
+ switch (op) {
+ case nir_op_iadd: return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_fadd: return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
+ case nir_op_imul: return LLVMBuildMul(ctx->builder, lhs, rhs, "");
+ case nir_op_fmul: return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
+ case nir_op_imin: return LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umin: return LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmin: return ac_build_intrinsic(ctx,
+ _64bit ? "llvm.minnum.f64" : "llvm.minnum.f32",
+ _64bit ? ctx->f64 : ctx->f32,
+ (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
+ case nir_op_imax: return LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_umax: return LLVMBuildSelect(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
+ lhs, rhs, "");
+ case nir_op_fmax: return ac_build_intrinsic(ctx,
+ _64bit ? "llvm.maxnum.f64" : "llvm.maxnum.f32",
+ _64bit ? ctx->f64 : ctx->f32,
+ (LLVMValueRef[]){lhs, rhs}, 2, AC_FUNC_ATTR_READNONE);
+ case nir_op_iand: return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
+ case nir_op_ior: return LLVMBuildOr(ctx->builder, lhs, rhs, "");
+ case nir_op_ixor: return LLVMBuildXor(ctx->builder, lhs, rhs, "");
+ default:
+ unreachable("bad reduction intrinsic");
+ }
+}
+
+/**
+ * \param maxprefix specifies that the result only needs to be correct for a
+ * prefix of this many threads
+ *
+ * TODO: add inclusive and excluse scan functions for GFX6.
+ */
+static LLVMValueRef
+ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src, LLVMValueRef identity,
+ unsigned maxprefix, bool inclusive)
+{
+ LLVMValueRef result, tmp;
+
+ if (ctx->chip_class >= GFX10) {
+ result = inclusive ? src : identity;
+ } else {
+ if (!inclusive)
+ src = ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
+ result = src;
+ }
+ if (maxprefix <= 1)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 2)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 3)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 4)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 8)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 16)
+ return result;
+
+ if (ctx->chip_class >= GFX10) {
+ /* dpp_row_bcast{15,31} are not supported on gfx10. */
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef cc;
+ /* TODO-GFX10: Can we get better code-gen by putting this into
+ * a branch so that LLVM generates EXEC mask manipulations? */
+ if (inclusive)
+ tmp = result;
+ else
+ tmp = ac_build_alu_op(ctx, result, src, op);
+ tmp = ac_build_permlane16(ctx, tmp, ~(uint64_t)0, true, false);
+ tmp = ac_build_alu_op(ctx, result, tmp, op);
+ cc = LLVMBuildAnd(builder, tid, LLVMConstInt(ctx->i32, 16, false), "");
+ cc = LLVMBuildICmp(builder, LLVMIntNE, cc, ctx->i32_0, "");
+ result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ if (maxprefix <= 32)
+ return result;
+
+ if (inclusive)
+ tmp = result;
+ else
+ tmp = ac_build_alu_op(ctx, result, src, op);
+ tmp = ac_build_readlane(ctx, tmp, LLVMConstInt(ctx->i32, 31, false));
+ tmp = ac_build_alu_op(ctx, result, tmp, op);
+ cc = LLVMBuildICmp(builder, LLVMIntUGE, tid,
+ LLVMConstInt(ctx->i32, 32, false), "");
+ result = LLVMBuildSelect(builder, cc, tmp, result, "");
+ return result;
+ }
+
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ if (maxprefix <= 32)
+ return result;
+ tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, tmp, op);
+ return result;
+}
+
+LLVMValueRef
+ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
+{
+ LLVMValueRef result;
+
+ if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+ LLVMBuilderRef builder = ctx->builder;
+ src = LLVMBuildZExt(builder, src, ctx->i32, "");
+ result = ac_build_ballot(ctx, src);
+ result = ac_build_mbcnt(ctx, result);
+ result = LLVMBuildAdd(builder, result, src, "");
+ return result;
+ }
+
+ ac_build_optimization_barrier(ctx, &src);
+
+ LLVMValueRef identity =
+ get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
+
+ return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef
+ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
+{
+ LLVMValueRef result;
+
+ if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
+ LLVMBuilderRef builder = ctx->builder;
+ src = LLVMBuildZExt(builder, src, ctx->i32, "");
+ result = ac_build_ballot(ctx, src);
+ result = ac_build_mbcnt(ctx, result);
+ return result;
+ }
+
+ ac_build_optimization_barrier(ctx, &src);
+
+ LLVMValueRef identity =
+ get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
+
+ return ac_build_wwm(ctx, result);
+}
+
+LLVMValueRef
+ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
+{
+ if (cluster_size == 1) return src;
+ ac_build_optimization_barrier(ctx, &src);
+ LLVMValueRef result, swap;
+ LLVMValueRef identity = get_reduction_identity(ctx, op,
+ ac_get_type_size(LLVMTypeOf(src)));
+ result = LLVMBuildBitCast(ctx->builder,
+ ac_build_set_inactive(ctx, src, identity),
+ LLVMTypeOf(identity), "");
+ swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 2) return ac_build_wwm(ctx, result);
+
+ swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 4) return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 8) return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 16) return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_permlane16(ctx, result, 0, true, false);
+ else if (ctx->chip_class >= GFX8 && cluster_size != 32)
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
+ else
+ swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ if (cluster_size == 32) return ac_build_wwm(ctx, result);
+
+ if (ctx->chip_class >= GFX8) {
+ if (ctx->chip_class >= GFX10)
+ swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
+ else
+ swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
+ result = ac_build_alu_op(ctx, result, swap, op);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
+ return ac_build_wwm(ctx, result);
+ } else {
+ swap = ac_build_readlane(ctx, result, ctx->i32_0);
+ result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
+ result = ac_build_alu_op(ctx, result, swap, op);
+ return ac_build_wwm(ctx, result);
+ }
+}
+
+/**
+ * "Top half" of a scan that reduces per-wave values across an entire
+ * workgroup.
+ *
+ * The source value must be present in the highest lane of the wave, and the
+ * highest lane must be live.
+ */
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ if (ws->maxwaves <= 1)
+ return;
+
+ const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMValueRef tmp;
+
+ tmp = LLVMBuildICmp(builder, LLVMIntEQ, tid, last_lane, "");
+ ac_build_ifcc(ctx, tmp, 1000);
+ LLVMBuildStore(builder, ws->src, LLVMBuildGEP(builder, ws->scratch, &ws->waveidx, 1, ""));
+ ac_build_endif(ctx, 1000);
+}
+
+/**
+ * "Bottom half" of a scan that reduces per-wave values across an entire
+ * workgroup.
+ *
+ * The caller must place a barrier between the top and bottom halves.
+ */
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ const LLVMTypeRef type = LLVMTypeOf(ws->src);
+ const LLVMValueRef identity =
+ get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
+
+ if (ws->maxwaves <= 1) {
+ ws->result_reduce = ws->src;
+ ws->result_inclusive = ws->src;
+ ws->result_exclusive = identity;
+ return;
+ }
+ assert(ws->maxwaves <= 32);
+
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef tid = ac_get_thread_id(ctx);
+ LLVMBasicBlockRef bbs[2];
+ LLVMValueRef phivalues_scan[2];
+ LLVMValueRef tmp, tmp2;
+
+ bbs[0] = LLVMGetInsertBlock(builder);
+ phivalues_scan[0] = LLVMGetUndef(type);
+
+ if (ws->enable_reduce)
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->numwaves, "");
+ else if (ws->enable_inclusive)
+ tmp = LLVMBuildICmp(builder, LLVMIntULE, tid, ws->waveidx, "");
+ else
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, ws->waveidx, "");
+ ac_build_ifcc(ctx, tmp, 1001);
+ {
+ tmp = LLVMBuildLoad(builder, LLVMBuildGEP(builder, ws->scratch, &tid, 1, ""), "");
+
+ ac_build_optimization_barrier(ctx, &tmp);
+
+ bbs[1] = LLVMGetInsertBlock(builder);
+ phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
+ }
+ ac_build_endif(ctx, 1001);
+
+ const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
+
+ if (ws->enable_reduce) {
+ tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
+ ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
+ }
+ if (ws->enable_inclusive)
+ ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
+ if (ws->enable_exclusive) {
+ tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
+ tmp = ac_build_readlane(ctx, scan, tmp);
+ tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
+ ws->result_exclusive = LLVMBuildSelect(builder, tmp2, identity, tmp, "");
+ }
+}
+
+/**
+ * Inclusive scan of a per-wave value across an entire workgroup.
+ *
+ * This implies an s_barrier instruction.
+ *
+ * Unlike ac_build_inclusive_scan, the caller \em must ensure that all threads
+ * of the workgroup are live. (This requirement cannot easily be relaxed in a
+ * useful manner because of the barrier in the algorithm.)
+ */
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ ac_build_wg_wavescan_top(ctx, ws);
+ ac_build_s_barrier(ctx);
+ ac_build_wg_wavescan_bottom(ctx, ws);
+}
+
+/**
+ * "Top half" of a scan that reduces per-thread values across an entire
+ * workgroup.
+ *
+ * All lanes must be active when this code runs.
+ */
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ if (ws->enable_exclusive) {
+ ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
+ if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
+ ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
+ ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
+ } else {
+ ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
+ }
+
+ bool enable_inclusive = ws->enable_inclusive;
+ bool enable_exclusive = ws->enable_exclusive;
+ ws->enable_inclusive = false;
+ ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+ ac_build_wg_wavescan_top(ctx, ws);
+ ws->enable_inclusive = enable_inclusive;
+ ws->enable_exclusive = enable_exclusive;
+}
+
+/**
+ * "Bottom half" of a scan that reduces per-thread values across an entire
+ * workgroup.
+ *
+ * The caller must place a barrier between the top and bottom halves.
+ */
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ bool enable_inclusive = ws->enable_inclusive;
+ bool enable_exclusive = ws->enable_exclusive;
+ ws->enable_inclusive = false;
+ ws->enable_exclusive = ws->enable_exclusive || enable_inclusive;
+ ac_build_wg_wavescan_bottom(ctx, ws);
+ ws->enable_inclusive = enable_inclusive;
+ ws->enable_exclusive = enable_exclusive;
+
+ /* ws->result_reduce is already the correct value */
+ if (ws->enable_inclusive)
+ ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
+ if (ws->enable_exclusive)
+ ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
+}
+
+/**
+ * A scan that reduces per-thread values across an entire workgroup.
+ *
+ * The caller must ensure that all lanes are active when this code runs
+ * (WWM is insufficient!), because there is an implied barrier.
+ */
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
+{
+ ac_build_wg_scan_top(ctx, ws);
+ ac_build_s_barrier(ctx);
+ ac_build_wg_scan_bottom(ctx, ws);
+}
+
+LLVMValueRef
+ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
+{
+ unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
+ if (ctx->chip_class >= GFX8) {
+ return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
+ } else {
+ return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
+ }
+}
+
+LLVMValueRef
+ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
+{
+ index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
+ return ac_build_intrinsic(ctx,
+ "llvm.amdgcn.ds.bpermute", ctx->i32,
+ (LLVMValueRef []) {index, src}, 2,
+ AC_FUNC_ATTR_READNONE |
+ AC_FUNC_ATTR_CONVERGENT);
+}
+
+LLVMValueRef
+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.exp.i16.f16";
+ type = ctx->i16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.exp.i32.f32";
+ type = ctx->i32;
+ } else {
+ intr = "llvm.amdgcn.frexp.exp.i32.f64";
+ type = ctx->i32;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+LLVMValueRef
+ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMTypeRef type;
+ char *intr;
+
+ if (bitsize == 16) {
+ intr = "llvm.amdgcn.frexp.mant.f16";
+ type = ctx->f16;
+ } else if (bitsize == 32) {
+ intr = "llvm.amdgcn.frexp.mant.f32";
+ type = ctx->f32;
+ } else {
+ intr = "llvm.amdgcn.frexp.mant.f64";
+ type = ctx->f64;
+ }
+
+ LLVMValueRef params[] = {
+ src0,
+ };
+ return ac_build_intrinsic(ctx, intr, type, params, 1,
+ AC_FUNC_ATTR_READNONE);
+}
+
+/*
+ * this takes an I,J coordinate pair,
+ * and works out the X and Y derivatives.
+ * it returns DDX(I), DDX(J), DDY(I), DDY(J).
+ */
+LLVMValueRef
+ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
+{
+ LLVMValueRef result[4], a;
+ unsigned i;
+
+ for (i = 0; i < 2; i++) {
+ a = LLVMBuildExtractElement(ctx->builder, interp_ij,
+ LLVMConstInt(ctx->i32, i, false), "");
+ result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
+ result[2+i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
+ }
+ return ac_build_gather_values(ctx, result, 4);
+}
+
+LLVMValueRef
+ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
+{
+ LLVMValueRef result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live",
+ ctx->i1, NULL, 0,
+ AC_FUNC_ATTR_READNONE);
+ result = LLVMBuildNot(ctx->builder, result, "");
+ return LLVMBuildSExt(ctx->builder, result, ctx->i32, "");
+}
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+ LLVMValueRef *args, unsigned num_args)
+{
+ LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
+ LLVMSetInstructionCallConv(ret, LLVMGetFunctionCallConv(func));
+ return ret;
+}
+
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+ LLVMValueRef stencil, LLVMValueRef samplemask,
+ struct ac_export_args *args)
+{
+ unsigned mask = 0;
+ unsigned format = ac_get_spi_shader_z_format(depth != NULL,
+ stencil != NULL,
+ samplemask != NULL);
+
+ assert(depth || stencil || samplemask);
+
+ memset(args, 0, sizeof(*args));
+
+ args->valid_mask = 1; /* whether the EXEC mask is valid */
+ args->done = 1; /* DONE bit */
+
+ /* Specify the target we are exporting */
+ args->target = V_008DFC_SQ_EXP_MRTZ;
+
+ args->compr = 0; /* COMP flag */
+ args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
+ args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
+ args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
+ args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
+
+ if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
+ assert(!depth);
+ args->compr = 1; /* COMPR flag */
+
+ if (stencil) {
+ /* Stencil should be in X[23:16]. */
+ stencil = ac_to_integer(ctx, stencil);
+ stencil = LLVMBuildShl(ctx->builder, stencil,
+ LLVMConstInt(ctx->i32, 16, 0), "");
+ args->out[0] = ac_to_float(ctx, stencil);
+ mask |= 0x3;
+ }
+ if (samplemask) {
+ /* SampleMask should be in Y[15:0]. */
+ args->out[1] = samplemask;
+ mask |= 0xc;
+ }
+ } else {
+ if (depth) {
+ args->out[0] = depth;
+ mask |= 0x1;
+ }
+ if (stencil) {
+ args->out[1] = stencil;
+ mask |= 0x2;
+ }
+ if (samplemask) {
+ args->out[2] = samplemask;
+ mask |= 0x4;
+ }
+ }
+
+ /* GFX6 (except OLAND and HAINAN) has a bug that it only looks
+ * at the X writemask component. */
+ if (ctx->chip_class == GFX6 &&
+ ctx->family != CHIP_OLAND &&
+ ctx->family != CHIP_HAINAN)
+ mask |= 0x1;
+
+ /* Specify which components to enable */
+ args->enabled_channels = mask;
+}
+
--- /dev/null
+/*
+ * Copyright 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef AC_LLVM_BUILD_H
+#define AC_LLVM_BUILD_H
+
+#include <stdbool.h>
+#include <llvm-c/Core.h>
+#include "compiler/nir/nir.h"
+#include "amd_family.h"
+#include "ac_shader_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
+ AC_ADDR_SPACE_GLOBAL = 1,
+ AC_ADDR_SPACE_GDS = 2,
+ AC_ADDR_SPACE_LDS = 3,
+ AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
+};
+
+#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
+
+struct ac_llvm_flow;
+struct ac_llvm_compiler;
+enum ac_float_mode;
+
+struct ac_llvm_flow_state {
+ struct ac_llvm_flow *stack;
+ unsigned depth_max;
+ unsigned depth;
+};
+
+struct ac_llvm_context {
+ LLVMContextRef context;
+ LLVMModuleRef module;
+ LLVMBuilderRef builder;
+
+ LLVMTypeRef voidt;
+ LLVMTypeRef i1;
+ LLVMTypeRef i8;
+ LLVMTypeRef i16;
+ LLVMTypeRef i32;
+ LLVMTypeRef i64;
+ LLVMTypeRef intptr;
+ LLVMTypeRef f16;
+ LLVMTypeRef f32;
+ LLVMTypeRef f64;
+ LLVMTypeRef v2i16;
+ LLVMTypeRef v2i32;
+ LLVMTypeRef v3i32;
+ LLVMTypeRef v4i32;
+ LLVMTypeRef v2f32;
+ LLVMTypeRef v3f32;
+ LLVMTypeRef v4f32;
+ LLVMTypeRef v8i32;
+ LLVMTypeRef iN_wavemask;
+ LLVMTypeRef iN_ballotmask;
+
+ LLVMValueRef i8_0;
+ LLVMValueRef i8_1;
+ LLVMValueRef i16_0;
+ LLVMValueRef i16_1;
+ LLVMValueRef i32_0;
+ LLVMValueRef i32_1;
+ LLVMValueRef i64_0;
+ LLVMValueRef i64_1;
+ LLVMValueRef f16_0;
+ LLVMValueRef f16_1;
+ LLVMValueRef f32_0;
+ LLVMValueRef f32_1;
+ LLVMValueRef f64_0;
+ LLVMValueRef f64_1;
+ LLVMValueRef i1true;
+ LLVMValueRef i1false;
+
+ /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
+ * are two ac_llvm_contexts. Declare a pointer here, so that the control
+ * flow stack is shared by both ac_llvm_contexts.
+ */
+ struct ac_llvm_flow_state *flow;
+
+ unsigned range_md_kind;
+ unsigned invariant_load_md_kind;
+ unsigned uniform_md_kind;
+ unsigned fpmath_md_kind;
+ LLVMValueRef fpmath_md_2p5_ulp;
+ LLVMValueRef empty_md;
+
+ enum chip_class chip_class;
+ enum radeon_family family;
+
+ unsigned wave_size;
+ unsigned ballot_mask_bits;
+
+ LLVMValueRef lds;
+};
+
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx,
+ struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size,
+ unsigned ballot_mask_bits);
+
+void
+ac_llvm_context_dispose(struct ac_llvm_context *ctx);
+
+int
+ac_get_llvm_num_components(LLVMValueRef value);
+
+int
+ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+ LLVMValueRef value,
+ int index);
+
+unsigned ac_get_type_size(LLVMTypeRef type);
+
+LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
+LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
+
+LLVMValueRef
+ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
+ LLVMTypeRef return_type, LLVMValueRef *params,
+ unsigned param_count, unsigned attrib_mask);
+
+void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
+
+LLVMValueRef
+ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+ unsigned count_incoming, LLVMValueRef *values,
+ LLVMBasicBlockRef *blocks);
+
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
+void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
+ LLVMValueRef *pvgpr);
+
+LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
+
+LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
+LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
+ LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef
+ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
+ unsigned value_count, unsigned component);
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+ LLVMValueRef *values,
+ unsigned value_count,
+ unsigned value_stride,
+ bool load,
+ bool always_vector);
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+ LLVMValueRef *values,
+ unsigned value_count);
+
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned start,
+ unsigned channels);
+
+LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+LLVMValueRef
+ac_build_fdiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef den);
+
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef post_shift);
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+ bool is_deriv, bool is_array, bool is_lod,
+ LLVMValueRef *coords_arg,
+ LLVMValueRef *derivs_arg);
+
+
+LLVMValueRef
+ac_build_fs_interp(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
+LLVMValueRef
+ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
+ LLVMValueRef parameter,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params);
+
+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index);
+
+LLVMValueRef
+ac_build_gep0(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef index);
+
+void
+ac_build_indexed_store(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index,
+ LLVMValueRef value);
+
+LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
+ LLVMValueRef index);
+LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
+
+void
+ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ unsigned num_channels,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ unsigned cache_policy,
+ bool swizzle_enable_hint);
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_build_buffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ int num_channels,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned inst_offset,
+ unsigned cache_policy,
+ bool can_speculate,
+ bool allow_smem);
+
+LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ unsigned cache_policy,
+ bool can_speculate);
+
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate);
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate);
+
+/* For ac_build_fetch_format.
+ *
+ * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
+ */
+enum {
+ AC_FETCH_FORMAT_FLOAT = 0,
+ AC_FETCH_FORMAT_FIXED,
+ AC_FETCH_FORMAT_UNORM,
+ AC_FETCH_FORMAT_SNORM,
+ AC_FETCH_FORMAT_USCALED,
+ AC_FETCH_FORMAT_SSCALED,
+ AC_FETCH_FORMAT_UINT,
+ AC_FETCH_FORMAT_SINT,
+};
+
+LLVMValueRef
+ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
+ unsigned log_size,
+ unsigned num_channels,
+ unsigned format,
+ bool reverse,
+ bool known_aligned,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy,
+ bool can_speculate);
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy);
+
+void
+ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy);
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy);
+
+void
+ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_get_thread_id(struct ac_llvm_context *ctx);
+
+#define AC_TID_MASK_TOP_LEFT 0xfffffffc
+#define AC_TID_MASK_TOP 0xfffffffd
+#define AC_TID_MASK_LEFT 0xfffffffe
+
+LLVMValueRef
+ac_build_ddxy(struct ac_llvm_context *ctx,
+ uint32_t mask,
+ int idx,
+ LLVMValueRef val);
+
+#define AC_SENDMSG_GS 2
+#define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS_ALLOC_REQ 9
+
+#define AC_SENDMSG_GS_OP_NOP (0 << 4)
+#define AC_SENDMSG_GS_OP_CUT (1 << 4)
+#define AC_SENDMSG_GS_OP_EMIT (2 << 4)
+#define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
+
+void ac_build_sendmsg(struct ac_llvm_context *ctx,
+ uint32_t msg,
+ LLVMValueRef wave_id);
+
+LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
+ LLVMValueRef arg,
+ LLVMTypeRef dst_type);
+
+LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
+ LLVMValueRef arg,
+ LLVMTypeRef dst_type);
+LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
+ LLVMValueRef b);
+LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
+
+struct ac_export_args {
+ LLVMValueRef out[4];
+ unsigned target;
+ unsigned enabled_channels;
+ bool compr;
+ bool done;
+ bool valid_mask;
+};
+
+void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
+
+void ac_build_export_null(struct ac_llvm_context *ctx);
+
+enum ac_image_opcode {
+ ac_image_sample,
+ ac_image_gather4,
+ ac_image_load,
+ ac_image_load_mip,
+ ac_image_store,
+ ac_image_store_mip,
+ ac_image_get_lod,
+ ac_image_get_resinfo,
+ ac_image_atomic,
+ ac_image_atomic_cmpswap,
+};
+
+enum ac_atomic_op {
+ ac_atomic_swap,
+ ac_atomic_add,
+ ac_atomic_sub,
+ ac_atomic_smin,
+ ac_atomic_umin,
+ ac_atomic_smax,
+ ac_atomic_umax,
+ ac_atomic_and,
+ ac_atomic_or,
+ ac_atomic_xor,
+ ac_atomic_inc_wrap,
+ ac_atomic_dec_wrap,
+};
+
+/* These cache policy bits match the definitions used by the LLVM intrinsics. */
+enum ac_image_cache_policy {
+ ac_glc = 1 << 0, /* per-CU cache control */
+ ac_slc = 1 << 1, /* global L2 cache control */
+ ac_dlc = 1 << 2, /* per-shader-array cache control */
+};
+
+struct ac_image_args {
+ enum ac_image_opcode opcode : 4;
+ enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+ enum ac_image_dim dim : 3;
+ unsigned dmask : 4;
+ unsigned cache_policy : 3;
+ bool unorm : 1;
+ bool level_zero : 1;
+ unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
+
+ LLVMValueRef resource;
+ LLVMValueRef sampler;
+ LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
+ LLVMValueRef offset;
+ LLVMValueRef bias;
+ LLVMValueRef compare;
+ LLVMValueRef derivs[6];
+ LLVMValueRef coords[4];
+ LLVMValueRef lod; // also used by ac_image_get_resinfo
+};
+
+LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
+ struct ac_image_args *a);
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc);
+LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]);
+LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
+LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2], unsigned bits, bool hi);
+LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
+void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
+LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
+ LLVMValueRef offset, LLVMValueRef width,
+ bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
+
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
+
+LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+ LLVMValueRef src0);
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
+ LLVMValueRef main_fn,
+ uint8_t *vs_output_param_offset,
+ uint32_t num_outputs,
+ uint8_t *num_param_exports);
+void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
+
+void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
+LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr);
+void ac_lds_store(struct ac_llvm_context *ctx,
+ LLVMValueRef dw_addr, LLVMValueRef value);
+
+LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
+ LLVMTypeRef dst_type,
+ LLVMValueRef src0);
+
+LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
+
+void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_break(struct ac_llvm_context *ctx);
+void ac_build_continue(struct ac_llvm_context *ctx);
+void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int lable_id);
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int lable_id);
+
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
+ const char *name);
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
+ const char *name);
+
+LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMTypeRef type);
+
+LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned count);
+
+LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
+ unsigned rshift, unsigned bitwidth);
+
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
+ LLVMValueRef *addr, bool is_array_tex);
+
+LLVMValueRef
+ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
+
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
+
+LLVMValueRef
+ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+
+LLVMValueRef
+ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+
+LLVMValueRef
+ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
+
+/**
+ * Common arguments for a scan/reduce operation that accumulates per-wave
+ * values across an entire workgroup, while respecting the order of waves.
+ */
+struct ac_wg_scan {
+ bool enable_reduce;
+ bool enable_exclusive;
+ bool enable_inclusive;
+ nir_op op;
+ LLVMValueRef src; /* clobbered! */
+ LLVMValueRef result_reduce;
+ LLVMValueRef result_exclusive;
+ LLVMValueRef result_inclusive;
+ LLVMValueRef extra;
+ LLVMValueRef waveidx;
+ LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+ /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+ LLVMValueRef scratch;
+ unsigned maxwaves;
+};
+
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+LLVMValueRef
+ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
+
+LLVMValueRef
+ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
+
+LLVMValueRef
+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
+
+LLVMValueRef
+ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+ LLVMValueRef *args, unsigned num_args);
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
+ LLVMValueRef ptr, LLVMValueRef val,
+ const char *sync_scope);
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope);
+
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+ LLVMValueRef stencil, LLVMValueRef samplemask,
+ struct ac_export_args *args);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include "ac_llvm_cull.h"
+#include <llvm-c/Core.h>
+
+struct ac_position_w_info {
+ /* If a primitive intersects the W=0 plane, it causes a reflection
+ * of the determinant used for face culling. Every vertex behind
+ * the W=0 plane negates the determinant, so having 2 vertices behind
+ * the plane has no effect. This is i1 true if the determinant should be
+ * negated.
+ */
+ LLVMValueRef w_reflection;
+
+ /* If we simplify the "-w <= p <= w" view culling equation, we get
+ * "-w <= w", which can't be satisfied when w is negative.
+ * In perspective projection, a negative W means that the primitive
+ * is behind the viewer, but the equation is independent of the type
+ * of projection.
+ *
+ * w_accepted is false when all W are negative and therefore
+ * the primitive is invisible.
+ */
+ LLVMValueRef w_accepted;
+
+ LLVMValueRef all_w_positive;
+ LLVMValueRef any_w_negative;
+};
+
+static void ac_analyze_position_w(struct ac_llvm_context *ctx,
+ LLVMValueRef pos[3][4],
+ struct ac_position_w_info *w)
+{
+ LLVMBuilderRef builder = ctx->builder;
+ LLVMValueRef all_w_negative = ctx->i1true;
+
+ w->w_reflection = ctx->i1false;
+ w->any_w_negative = ctx->i1false;
+
+ for (unsigned i = 0; i < 3; i++) {
+ LLVMValueRef neg_w;
+
+ neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
+ /* If neg_w is true, negate w_reflection. */
+ w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
+ w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
+ all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
+ }
+ w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
+ w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
+}
+
+/* Perform front/back face culling and return true if the primitive is accepted. */
+static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
+ LLVMValueRef pos[3][4],
+ struct ac_position_w_info *w,
+ bool cull_front,
+ bool cull_back,
+ bool cull_zero_area)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ if (cull_front && cull_back)
+ return ctx->i1false;
+
+ if (!cull_front && !cull_back && !cull_zero_area)
+ return ctx->i1true;
+
+ /* Front/back face culling. Also if the determinant == 0, the triangle
+ * area is 0.
+ */
+ LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
+ LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
+ LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
+ LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
+ LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
+ LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
+ LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
+
+ /* Negative W negates the determinant. */
+ det = LLVMBuildSelect(builder, w->w_reflection,
+ LLVMBuildFNeg(builder, det, ""),
+ det, "");
+
+ LLVMValueRef accepted = NULL;
+ if (cull_front) {
+ LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
+ accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+ } else if (cull_back) {
+ LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
+ accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
+ } else if (cull_zero_area) {
+ accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
+ }
+ return accepted;
+}
+
+/* Perform view culling and small primitive elimination and return true
+ * if the primitive is accepted and initially_accepted == true. */
+static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
+ LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted,
+ struct ac_position_w_info *w,
+ LLVMValueRef vp_scale[2],
+ LLVMValueRef vp_translate[2],
+ LLVMValueRef small_prim_precision,
+ bool cull_view_xy,
+ bool cull_view_near_z,
+ bool cull_view_far_z,
+ bool cull_small_prims,
+ bool use_halfz_clip_space)
+{
+ LLVMBuilderRef builder = ctx->builder;
+
+ if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
+ return ctx->i1true;
+
+ /* Skip the culling if the primitive has already been rejected or
+ * if any W is negative. The bounding box culling doesn't work when
+ * W is negative.
+ */
+ LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
+ w->all_w_positive, "");
+ LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
+ LLVMBuildStore(builder, initially_accepted, accepted_var);
+
+ ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
+ {
+ LLVMValueRef bbox_min[3], bbox_max[3];
+ LLVMValueRef accepted = initially_accepted;
+
+ /* Compute the primitive bounding box for easy culling. */
+ for (unsigned chan = 0; chan < 3; chan++) {
+ bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
+ bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
+
+ bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
+ bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
+ }
+
+ /* View culling. */
+ if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
+ for (unsigned chan = 0; chan < 3; chan++) {
+ LLVMValueRef visible;
+
+ if ((cull_view_xy && chan <= 1) ||
+ (cull_view_near_z && chan == 2)) {
+ float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
+ visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
+ LLVMConstReal(ctx->f32, t), "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+
+ if ((cull_view_xy && chan <= 1) ||
+ (cull_view_far_z && chan == 2)) {
+ visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
+ ctx->f32_1, "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+ }
+ }
+
+ /* Small primitive elimination. */
+ if (cull_small_prims) {
+ /* Assuming a sample position at (0.5, 0.5), if we round
+ * the bounding box min/max extents and the results of
+ * the rounding are equal in either the X or Y direction,
+ * the bounding box does not intersect the sample.
+ *
+ * See these GDC slides for pictures:
+ * https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
+ */
+ LLVMValueRef min, max, not_equal[2], visible;
+
+ for (unsigned chan = 0; chan < 2; chan++) {
+ /* Convert the position to screen-space coordinates. */
+ min = ac_build_fmad(ctx, bbox_min[chan],
+ vp_scale[chan], vp_translate[chan]);
+ max = ac_build_fmad(ctx, bbox_max[chan],
+ vp_scale[chan], vp_translate[chan]);
+ /* Scale the bounding box according to the precision of
+ * the rasterizer and the number of MSAA samples. */
+ min = LLVMBuildFSub(builder, min, small_prim_precision, "");
+ max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
+
+ /* Determine if the bbox intersects the sample point.
+ * It also works for MSAA, but vp_scale, vp_translate,
+ * and small_prim_precision are computed differently.
+ */
+ min = ac_build_round(ctx, min);
+ max = ac_build_round(ctx, max);
+ not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
+ }
+ visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
+ accepted = LLVMBuildAnd(builder, accepted, visible, "");
+ }
+
+ LLVMBuildStore(builder, accepted, accepted_var);
+ }
+ ac_build_endif(ctx, 10000000);
+
+ return LLVMBuildLoad(builder, accepted_var, "");
+}
+
+/**
+ * Return i1 true if the primitive is accepted (not culled).
+ *
+ * \param pos Vertex positions 3x vec4
+ * \param initially_accepted AND'ed with the result. Some computations can be
+ * skipped if this is false.
+ * \param vp_scale Viewport scale XY.
+ * For MSAA, multiply them by the number of samples.
+ * \param vp_translate Viewport translation XY.
+ * For MSAA, multiply them by the number of samples.
+ * \param small_prim_precision Precision of small primitive culling. This should
+ * be the same as or greater than the precision of
+ * the rasterizer. Set to num_samples / 2^subpixel_bits.
+ * subpixel_bits are defined by the quantization mode.
+ * \param options See ac_cull_options.
+ */
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
+ LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted,
+ LLVMValueRef vp_scale[2],
+ LLVMValueRef vp_translate[2],
+ LLVMValueRef small_prim_precision,
+ struct ac_cull_options *options)
+{
+ struct ac_position_w_info w;
+ ac_analyze_position_w(ctx, pos, &w);
+
+ /* W culling. */
+ LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
+ accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
+
+ /* Face culling. */
+ accepted = LLVMBuildAnd(ctx->builder, accepted,
+ ac_cull_face(ctx, pos, &w,
+ options->cull_front,
+ options->cull_back,
+ options->cull_zero_area), "");
+
+ /* View culling and small primitive elimination. */
+ accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
+ small_prim_precision,
+ options->cull_view_xy,
+ options->cull_view_near_z,
+ options->cull_view_far_z,
+ options->cull_small_prims,
+ options->use_halfz_clip_space);
+ return accepted;
+}
--- /dev/null
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef AC_LLVM_CULL_H
+#define AC_LLVM_CULL_H
+
+#include "ac_llvm_build.h"
+
+struct ac_cull_options {
+ /* In general, I recommend setting all to true except view Z culling,
+ * which isn't so effective because W culling is cheaper and partially
+ * replaces near Z culling, and you don't need to set Position.z
+ * if Z culling is disabled.
+ *
+ * If something doesn't work, turn some of these off to find out what.
+ */
+ bool cull_front;
+ bool cull_back;
+ bool cull_view_xy;
+ bool cull_view_near_z;
+ bool cull_view_far_z;
+ bool cull_small_prims;
+ bool cull_zero_area;
+ bool cull_w; /* cull primitives with all W < 0 */
+
+ bool use_halfz_clip_space;
+};
+
+LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
+ LLVMValueRef pos[3][4],
+ LLVMValueRef initially_accepted,
+ LLVMValueRef vp_scale[2],
+ LLVMValueRef vp_translate[2],
+ LLVMValueRef small_prim_precision,
+ struct ac_cull_options *options);
+
+#endif
--- /dev/null
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#include <cstring>
+
+#include "ac_binary.h"
+#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
+
+#include "util/macros.h"
+
+#include <llvm-c/Core.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Transforms/IPO.h>
+
+#include <llvm/IR/LegacyPassManager.h>
+
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
+{
+ llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
+ A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
+}
+
+bool ac_is_sgpr_param(LLVMValueRef arg)
+{
+ llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
+ llvm::AttributeList AS = A->getParent()->getAttributes();
+ unsigned ArgNo = A->getArgNo();
+ return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
+}
+
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
+{
+ return LLVMGetCalledValue(call);
+}
+
+bool ac_llvm_is_function(LLVMValueRef v)
+{
+ return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+}
+
+LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
+{
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+ LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
+
+ llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
+ llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
+ return module;
+}
+
+LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
+ enum ac_float_mode float_mode)
+{
+ LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
+
+ llvm::FastMathFlags flags;
+
+ switch (float_mode) {
+ case AC_FLOAT_MODE_DEFAULT:
+ break;
+ case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH:
+ flags.setNoSignedZeros();
+ llvm::unwrap(builder)->setFastMathFlags(flags);
+ break;
+ case AC_FLOAT_MODE_UNSAFE_FP_MATH:
+ flags.setFast();
+ llvm::unwrap(builder)->setFastMathFlags(flags);
+ break;
+ }
+
+ return builder;
+}
+
+LLVMTargetLibraryInfoRef
+ac_create_target_library_info(const char *triple)
+{
+ return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
+}
+
+void
+ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
+{
+ delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
+}
+
+/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
+ * better compatibility with C code. */
+struct raw_memory_ostream : public llvm::raw_pwrite_stream {
+ char *buffer;
+ size_t written;
+ size_t bufsize;
+
+ raw_memory_ostream()
+ {
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ SetUnbuffered();
+ }
+
+ ~raw_memory_ostream()
+ {
+ free(buffer);
+ }
+
+ void clear()
+ {
+ written = 0;
+ }
+
+ void take(char *&out_buffer, size_t &out_size)
+ {
+ out_buffer = buffer;
+ out_size = written;
+ buffer = NULL;
+ written = 0;
+ bufsize = 0;
+ }
+
+ void flush() = delete;
+
+ void write_impl(const char *ptr, size_t size) override
+ {
+ if (unlikely(written + size < written))
+ abort();
+ if (written + size > bufsize) {
+ bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
+ buffer = (char *)realloc(buffer, bufsize);
+ if (!buffer) {
+ fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
+ abort();
+ }
+ }
+ memcpy(buffer + written, ptr, size);
+ written += size;
+ }
+
+ void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
+ {
+ assert(offset == (size_t)offset &&
+ offset + size >= offset && offset + size <= written);
+ memcpy(buffer + offset, ptr, size);
+ }
+
+ uint64_t current_pos() const override
+ {
+ return written;
+ }
+};
+
+/* The LLVM compiler is represented as a pass manager containing passes for
+ * optimizations, instruction selection, and code generation.
+ */
+struct ac_compiler_passes {
+ raw_memory_ostream ostream; /* ELF shader binary stream */
+ llvm::legacy::PassManager passmgr; /* list of passes */
+};
+
+struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
+{
+ struct ac_compiler_passes *p = new ac_compiler_passes();
+ if (!p)
+ return NULL;
+
+ llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
+
+ if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
+ nullptr,
+ llvm::TargetMachine::CGFT_ObjectFile)) {
+ fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
+ delete p;
+ return NULL;
+ }
+ return p;
+}
+
+void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
+{
+ delete p;
+}
+
+/* This returns false on failure. */
+bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
+ char **pelf_buffer, size_t *pelf_size)
+{
+ p->passmgr.run(*llvm::unwrap(module));
+ p->ostream.take(*pelf_buffer, *pelf_size);
+ return true;
+}
+
+void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
+{
+ llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
+}
+
+void ac_enable_global_isel(LLVMTargetMachineRef tm)
+{
+ reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
+}
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
+ LLVMValueRef ptr, LLVMValueRef val,
+ const char *sync_scope) {
+ llvm::AtomicRMWInst::BinOp binop;
+ switch (op) {
+ case LLVMAtomicRMWBinOpXchg:
+ binop = llvm::AtomicRMWInst::Xchg;
+ break;
+ case LLVMAtomicRMWBinOpAdd:
+ binop = llvm::AtomicRMWInst::Add;
+ break;
+ case LLVMAtomicRMWBinOpSub:
+ binop = llvm::AtomicRMWInst::Sub;
+ break;
+ case LLVMAtomicRMWBinOpAnd:
+ binop = llvm::AtomicRMWInst::And;
+ break;
+ case LLVMAtomicRMWBinOpNand:
+ binop = llvm::AtomicRMWInst::Nand;
+ break;
+ case LLVMAtomicRMWBinOpOr:
+ binop = llvm::AtomicRMWInst::Or;
+ break;
+ case LLVMAtomicRMWBinOpXor:
+ binop = llvm::AtomicRMWInst::Xor;
+ break;
+ case LLVMAtomicRMWBinOpMax:
+ binop = llvm::AtomicRMWInst::Max;
+ break;
+ case LLVMAtomicRMWBinOpMin:
+ binop = llvm::AtomicRMWInst::Min;
+ break;
+ case LLVMAtomicRMWBinOpUMax:
+ binop = llvm::AtomicRMWInst::UMax;
+ break;
+ case LLVMAtomicRMWBinOpUMin:
+ binop = llvm::AtomicRMWInst::UMin;
+ break;
+ default:
+ unreachable(!"invalid LLVMAtomicRMWBinOp");
+ break;
+ }
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
+ binop, llvm::unwrap(ptr), llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope) {
+ unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
+ return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
+ llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
+ llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::AtomicOrdering::SequentiallyConsistent, SSID));
+}
--- /dev/null
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
+#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
+#include "util/bitscan.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Support.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/Scalar.h>
+#include <llvm-c/Transforms/Utils.h>
+#include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
+#include "util/u_math.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+static void ac_init_llvm_target()
+{
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ LLVMInitializeAMDGPUAsmPrinter();
+
+ /* For inline assembly. */
+ LLVMInitializeAMDGPUAsmParser();
+
+ /* For ACO disassembly. */
+ LLVMInitializeAMDGPUDisassembler();
+
+ /* Workaround for bug in llvm 4.0 that causes image intrinsics
+ * to disappear.
+ * https://reviews.llvm.org/D26348
+ *
+ * "mesa" is the prefix for error messages.
+ *
+ * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+ * This option tells the backend to fall-back to SelectionDAG and print
+ * a diagnostic message if global isel fails.
+ */
+ const char *argv[] = {
+ "mesa",
+ "-simplifycfg-sink-common=false",
+ "-global-isel-abort=2",
+#if LLVM_VERSION_MAJOR >= 10
+ /* Atomic optimizations require LLVM 10.0 for gfx10 support. */
+ "-amdgpu-atomic-optimizations=true",
+#endif
+ };
+ LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
+}
+
+static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+
+void ac_init_llvm_once(void)
+{
+ call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
+}
+
+static LLVMTargetRef ac_get_llvm_target(const char *triple)
+{
+ LLVMTargetRef target = NULL;
+ char *err_message = NULL;
+
+ if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
+ fprintf(stderr, "Cannot find target for triple %s ", triple);
+ if (err_message) {
+ fprintf(stderr, "%s\n", err_message);
+ }
+ LLVMDisposeMessage(err_message);
+ return NULL;
+ }
+ return target;
+}
+
+const char *ac_get_llvm_processor_name(enum radeon_family family)
+{
+ switch (family) {
+ case CHIP_TAHITI:
+ return "tahiti";
+ case CHIP_PITCAIRN:
+ return "pitcairn";
+ case CHIP_VERDE:
+ return "verde";
+ case CHIP_OLAND:
+ return "oland";
+ case CHIP_HAINAN:
+ return "hainan";
+ case CHIP_BONAIRE:
+ return "bonaire";
+ case CHIP_KABINI:
+ return "kabini";
+ case CHIP_KAVERI:
+ return "kaveri";
+ case CHIP_HAWAII:
+ return "hawaii";
+ case CHIP_TONGA:
+ return "tonga";
+ case CHIP_ICELAND:
+ return "iceland";
+ case CHIP_CARRIZO:
+ return "carrizo";
+ case CHIP_FIJI:
+ return "fiji";
+ case CHIP_STONEY:
+ return "stoney";
+ case CHIP_POLARIS10:
+ return "polaris10";
+ case CHIP_POLARIS11:
+ case CHIP_POLARIS12:
+ case CHIP_VEGAM:
+ return "polaris11";
+ case CHIP_VEGA10:
+ return "gfx900";
+ case CHIP_RAVEN:
+ return "gfx902";
+ case CHIP_VEGA12:
+ return "gfx904";
+ case CHIP_VEGA20:
+ return "gfx906";
+ case CHIP_RAVEN2:
+ case CHIP_RENOIR:
+ return "gfx909";
+ case CHIP_ARCTURUS:
+ return "gfx908";
+ case CHIP_NAVI10:
+ return "gfx1010";
+ case CHIP_NAVI12:
+ return "gfx1011";
+ case CHIP_NAVI14:
+ return "gfx1012";
+ default:
+ return "";
+ }
+}
+
+static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+ enum ac_target_machine_options tm_options,
+ LLVMCodeGenOptLevel level,
+ const char **out_triple)
+{
+ assert(family >= CHIP_TAHITI);
+ char features[256];
+ const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
+ LLVMTargetRef target = ac_get_llvm_target(triple);
+
+ snprintf(features, sizeof(features),
+ "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
+ family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
+ ",+wavefrontsize64,-wavefrontsize32" : "",
+ tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+ tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+ tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+ tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
+ tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
+
+ LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
+ target,
+ triple,
+ ac_get_llvm_processor_name(family),
+ features,
+ level,
+ LLVMRelocDefault,
+ LLVMCodeModelDefault);
+
+ if (out_triple)
+ *out_triple = triple;
+ if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+ ac_enable_global_isel(tm);
+ return tm;
+}
+
+static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+ bool check_ir)
+{
+ LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+ if (!passmgr)
+ return NULL;
+
+ if (target_library_info)
+ LLVMAddTargetLibraryInfo(target_library_info,
+ passmgr);
+
+ if (check_ir)
+ LLVMAddVerifierPass(passmgr);
+ LLVMAddAlwaysInlinerPass(passmgr);
+ /* Normally, the pass manager runs all passes on one function before
+ * moving onto another. Adding a barrier no-op pass forces the pass
+ * manager to run the inliner on all functions first, which makes sure
+ * that the following passes are only run on the remaining non-inline
+ * function, so it removes useless work done on dead inline functions.
+ */
+ ac_llvm_add_barrier_noop_pass(passmgr);
+ /* This pass should eliminate all the load and store instructions. */
+ LLVMAddPromoteMemoryToRegisterPass(passmgr);
+ LLVMAddScalarReplAggregatesPass(passmgr);
+ LLVMAddLICMPass(passmgr);
+ LLVMAddAggressiveDCEPass(passmgr);
+ LLVMAddCFGSimplificationPass(passmgr);
+ /* This is recommended by the instruction combining pass. */
+ LLVMAddEarlyCSEMemSSAPass(passmgr);
+ LLVMAddInstructionCombiningPass(passmgr);
+ return passmgr;
+}
+
+static const char *attr_to_str(enum ac_func_attr attr)
+{
+ switch (attr) {
+ case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+ case AC_FUNC_ATTR_INREG: return "inreg";
+ case AC_FUNC_ATTR_NOALIAS: return "noalias";
+ case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
+ case AC_FUNC_ATTR_READNONE: return "readnone";
+ case AC_FUNC_ATTR_READONLY: return "readonly";
+ case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
+ case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
+ case AC_FUNC_ATTR_CONVERGENT: return "convergent";
+ default:
+ fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+ return 0;
+ }
+}
+
+void
+ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+ int attr_idx, enum ac_func_attr attr)
+{
+ const char *attr_name = attr_to_str(attr);
+ unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
+ strlen(attr_name));
+ LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
+
+ if (LLVMIsAFunction(function))
+ LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+ else
+ LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
+}
+
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+ unsigned attrib_mask)
+{
+ attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+ attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
+
+ while (attrib_mask) {
+ enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+ ac_add_function_attr(ctx, function, -1, attr);
+ }
+}
+
+void
+ac_dump_module(LLVMModuleRef module)
+{
+ char *str = LLVMPrintModuleToString(module);
+ fprintf(stderr, "%s", str);
+ LLVMDisposeMessage(str);
+}
+
+void
+ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
+ const char *name, unsigned value)
+{
+ char str[16];
+
+ snprintf(str, sizeof(str), "0x%x", value);
+ LLVMAddTargetDependentFunctionAttr(F, name, str);
+}
+
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
+{
+ if (!size)
+ return;
+
+ char str[32];
+ snprintf(str, sizeof(str), "%u,%u", size, size);
+ LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
+}
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function)
+{
+ unsigned private_mem_vgprs = 0;
+
+ /* Process all LLVM instructions. */
+ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+ while (bb) {
+ LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+ while (next) {
+ LLVMValueRef inst = next;
+ next = LLVMGetNextInstruction(next);
+
+ if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+ continue;
+
+ LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+ /* No idea why LLVM aligns allocas to 4 elements. */
+ unsigned alignment = LLVMGetAlignment(inst);
+ unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+ private_mem_vgprs += dw_size;
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ return private_mem_vgprs;
+}
+
+bool
+ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
+ enum radeon_family family,
+ enum ac_target_machine_options tm_options)
+{
+ const char *triple;
+ memset(compiler, 0, sizeof(*compiler));
+
+ compiler->tm = ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelDefault,
+ &triple);
+ if (!compiler->tm)
+ return false;
+
+ if (tm_options & AC_TM_CREATE_LOW_OPT) {
+ compiler->low_opt_tm =
+ ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelLess, NULL);
+ if (!compiler->low_opt_tm)
+ goto fail;
+ }
+
+ if (family >= CHIP_NAVI10) {
+ assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
+ compiler->tm_wave32 = ac_create_target_machine(family,
+ tm_options | AC_TM_WAVE32,
+ LLVMCodeGenLevelDefault,
+ NULL);
+ if (!compiler->tm_wave32)
+ goto fail;
+ }
+
+ compiler->target_library_info =
+ ac_create_target_library_info(triple);
+ if (!compiler->target_library_info)
+ goto fail;
+
+ compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
+ tm_options & AC_TM_CHECK_IR);
+ if (!compiler->passmgr)
+ goto fail;
+
+ return true;
+fail:
+ ac_destroy_llvm_compiler(compiler);
+ return false;
+}
+
+void
+ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
+{
+ ac_destroy_llvm_passes(compiler->passes);
+ ac_destroy_llvm_passes(compiler->passes_wave32);
+ ac_destroy_llvm_passes(compiler->low_opt_passes);
+
+ if (compiler->passmgr)
+ LLVMDisposePassManager(compiler->passmgr);
+ if (compiler->target_library_info)
+ ac_dispose_target_library_info(compiler->target_library_info);
+ if (compiler->low_opt_tm)
+ LLVMDisposeTargetMachine(compiler->low_opt_tm);
+ if (compiler->tm)
+ LLVMDisposeTargetMachine(compiler->tm);
+ if (compiler->tm_wave32)
+ LLVMDisposeTargetMachine(compiler->tm_wave32);
+}
--- /dev/null
+/*
+ * Copyright 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+
+#ifndef AC_LLVM_UTIL_H
+#define AC_LLVM_UTIL_H
+
+#include <stdbool.h>
+#include <llvm-c/TargetMachine.h>
+#include <llvm/Config/llvm-config.h>
+
+#include "amd_family.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ac_compiler_passes;
+
+enum ac_func_attr {
+ AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
+ AC_FUNC_ATTR_INREG = (1 << 2),
+ AC_FUNC_ATTR_NOALIAS = (1 << 3),
+ AC_FUNC_ATTR_NOUNWIND = (1 << 4),
+ AC_FUNC_ATTR_READNONE = (1 << 5),
+ AC_FUNC_ATTR_READONLY = (1 << 6),
+ AC_FUNC_ATTR_WRITEONLY = (1 << 7),
+ AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
+ AC_FUNC_ATTR_CONVERGENT = (1 << 9),
+
+ /* Legacy intrinsic that needs attributes on function declarations
+ * and they must match the internal LLVM definition exactly, otherwise
+ * intrinsic selection fails.
+ */
+ AC_FUNC_ATTR_LEGACY = (1u << 31),
+};
+
+enum ac_target_machine_options {
+ AC_TM_SUPPORTS_SPILL = (1 << 0),
+ AC_TM_SISCHED = (1 << 1),
+ AC_TM_FORCE_ENABLE_XNACK = (1 << 2),
+ AC_TM_FORCE_DISABLE_XNACK = (1 << 3),
+ AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
+ AC_TM_CHECK_IR = (1 << 5),
+ AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
+ AC_TM_CREATE_LOW_OPT = (1 << 7),
+ AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
+ AC_TM_WAVE32 = (1 << 9),
+};
+
+enum ac_float_mode {
+ AC_FLOAT_MODE_DEFAULT,
+ AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
+ AC_FLOAT_MODE_UNSAFE_FP_MATH,
+};
+
+/* Per-thread persistent LLVM objects. */
+struct ac_llvm_compiler {
+ LLVMTargetLibraryInfoRef target_library_info;
+ LLVMPassManagerRef passmgr;
+
+ /* Default compiler. */
+ LLVMTargetMachineRef tm;
+ struct ac_compiler_passes *passes;
+
+ /* Wave32 compiler for GFX10. */
+ LLVMTargetMachineRef tm_wave32;
+ struct ac_compiler_passes *passes_wave32;
+
+ /* Optional compiler for faster compilation with fewer optimizations.
+ * LLVM modules can be created with "tm" too. There is no difference.
+ */
+ LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
+ struct ac_compiler_passes *low_opt_passes;
+};
+
+const char *ac_get_llvm_processor_name(enum radeon_family family);
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
+bool ac_is_sgpr_param(LLVMValueRef param);
+void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
+ int attr_idx, enum ac_func_attr attr);
+void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
+ unsigned attrib_mask);
+void ac_dump_module(LLVMModuleRef module);
+
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
+bool ac_llvm_is_function(LLVMValueRef v);
+LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
+
+LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
+ enum ac_float_mode float_mode);
+
+void
+ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
+ const char *name, unsigned value);
+void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
+
+static inline unsigned
+ac_get_load_intr_attribs(bool can_speculate)
+{
+ /* READNONE means writes can't affect it, while READONLY means that
+ * writes can affect it. */
+ return can_speculate ? AC_FUNC_ATTR_READNONE :
+ AC_FUNC_ATTR_READONLY;
+}
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function);
+
+LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
+void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
+void ac_init_llvm_once(void);
+
+
+bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
+ enum radeon_family family,
+ enum ac_target_machine_options tm_options);
+void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
+
+struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
+void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
+bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
+ char **pelf_buffer, size_t *pelf_size);
+void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
+void ac_enable_global_isel(LLVMTargetMachineRef tm);
+
+static inline bool
+ac_has_vec3_support(enum chip_class chip, bool use_format)
+{
+ if (chip == GFX6 && !use_format) {
+ /* GFX6 only supports vec3 with load/store format. */
+ return false;
+ }
+
+ return LLVM_VERSION_MAJOR >= 9;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AC_LLVM_UTIL_H */
--- /dev/null
+/*
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <llvm/Config/llvm-config.h>
+
+#include "ac_nir_to_llvm.h"
+#include "ac_llvm_build.h"
+#include "ac_llvm_util.h"
+#include "ac_binary.h"
+#include "sid.h"
+#include "nir/nir.h"
+#include "nir/nir_deref.h"
+#include "util/bitscan.h"
+#include "util/u_math.h"
+#include "ac_shader_abi.h"
+#include "ac_shader_util.h"
+
+struct ac_nir_context {
+ struct ac_llvm_context ac;
+ struct ac_shader_abi *abi;
+
+ gl_shader_stage stage;
+ shader_info *info;
+
+ LLVMValueRef *ssa_defs;
+
+ LLVMValueRef scratch;
+ LLVMValueRef constant_data;
+
+ struct hash_table *defs;
+ struct hash_table *phis;
+ struct hash_table *vars;
+
+ LLVMValueRef main_function;
+ LLVMBasicBlockRef continue_block;
+ LLVMBasicBlockRef break_block;
+
+ int num_locals;
+ LLVMValueRef *locals;
+};
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type,
+ const nir_instr *instr,
+ bool image, bool write);
+
+static void
+build_store_values_extended(struct ac_llvm_context *ac,
+ LLVMValueRef *values,
+ unsigned value_count,
+ unsigned value_stride,
+ LLVMValueRef vec)
+{
+ LLVMBuilderRef builder = ac->builder;
+ unsigned i;
+
+ for (i = 0; i < value_count; i++) {
+ LLVMValueRef ptr = values[i * value_stride];
+ LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
+ LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
+ LLVMBuildStore(builder, value, ptr);
+ }
+}
+
+static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
+ const nir_ssa_def *def)
+{
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
+ if (def->num_components > 1) {
+ type = LLVMVectorType(type, def->num_components);
+ }
+ return type;
+}
+
+static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
+{
+ assert(src.is_ssa);
+ return nir->ssa_defs[src.ssa->index];
+}
+
+static LLVMValueRef
+get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
+{
+ LLVMValueRef ptr = get_src(ctx, src);
+ ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
+ int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+
+ return LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(ctx->ac.i32, addr_space), "");
+}
+
+static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
+ const struct nir_block *b)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
+ return (LLVMBasicBlockRef)entry->data;
+}
+
+static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
+ nir_alu_src src,
+ unsigned num_components)
+{
+ LLVMValueRef value = get_src(ctx, src.src);
+ bool need_swizzle = false;
+
+ assert(value);
+ unsigned src_components = ac_get_llvm_num_components(value);
+ for (unsigned i = 0; i < num_components; ++i) {
+ assert(src.swizzle[i] < src_components);
+ if (src.swizzle[i] != i)
+ need_swizzle = true;
+ }
+
+ if (need_swizzle || num_components != src_components) {
+ LLVMValueRef masks[] = {
+ LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
+ LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
+
+ if (src_components > 1 && num_components == 1) {
+ value = LLVMBuildExtractElement(ctx->ac.builder, value,
+ masks[0], "");
+ } else if (src_components == 1 && num_components > 1) {
+ LLVMValueRef values[] = {value, value, value, value};
+ value = ac_build_gather_values(&ctx->ac, values, num_components);
+ } else {
+ LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
+ value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
+ swizzle, "");
+ }
+ }
+ assert(!src.negate);
+ assert(!src.abs);
+ return value;
+}
+
+static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
+ LLVMIntPredicate pred, LLVMValueRef src0,
+ LLVMValueRef src1)
+{
+ LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(ctx->builder, result,
+ LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
+ ctx->i32_0, "");
+}
+
+static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
+ LLVMRealPredicate pred, LLVMValueRef src0,
+ LLVMValueRef src1)
+{
+ LLVMValueRef result;
+ src0 = ac_to_float(ctx, src0);
+ src1 = ac_to_float(ctx, src1);
+ result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
+ return LLVMBuildSelect(ctx->builder, result,
+ LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
+ ctx->i32_0, "");
+}
+
+static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
+ const char *intrin,
+ LLVMTypeRef result_type,
+ LLVMValueRef src0)
+{
+ char name[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ };
+
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
+ ac_get_elem_bits(ctx, result_type));
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
+ const char *intrin,
+ LLVMTypeRef result_type,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ char name[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
+ };
+
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
+ ac_get_elem_bits(ctx, result_type));
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
+ const char *intrin,
+ LLVMTypeRef result_type,
+ LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
+{
+ char name[64];
+ LLVMValueRef params[] = {
+ ac_to_float(ctx, src0),
+ ac_to_float(ctx, src1),
+ ac_to_float(ctx, src2),
+ };
+
+ ASSERTED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
+ ac_get_elem_bits(ctx, result_type));
+ assert(length < sizeof(name));
+ return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
+}
+
+static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
+ LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
+{
+ assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
+
+ LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
+ ctx->i32_0, "");
+ return LLVMBuildSelect(ctx->builder, v,
+ ac_to_integer_or_pointer(ctx, src1),
+ ac_to_integer_or_pointer(ctx, src2), "");
+}
+
+static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
+}
+
+static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
+ const char *intrin,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMTypeRef ret_type;
+ LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
+ LLVMValueRef res;
+ LLVMValueRef params[] = { src0, src1 };
+ ret_type = LLVMStructTypeInContext(ctx->context, types,
+ 2, true);
+
+ res = ac_build_intrinsic(ctx, intrin, ret_type,
+ params, 2, AC_FUNC_ATTR_READNONE);
+
+ res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
+ res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
+ return res;
+}
+
+static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
+ LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
+ LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+ "");
+ result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
+
+ switch (bitsize) {
+ case 16:
+ return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
+}
+
+static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ src0 = ac_to_float(ctx, src0);
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder,
+ LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
+ ctx->i32, "");
+}
+
+static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
+ LLVMValueRef src0,
+ unsigned bitsize)
+{
+ LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
+
+ switch (bitsize) {
+ case 8:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
+ case 16:
+ return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
+ case 32:
+ return result;
+ case 64:
+ return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
+ default:
+ unreachable("Unsupported bit size.");
+ }
+}
+
+static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
+ return LLVMBuildSExt(ctx->builder,
+ LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
+ ctx->i32, "");
+}
+
+static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ LLVMValueRef result;
+ LLVMValueRef cond = NULL;
+
+ src0 = ac_to_float(ctx, src0);
+ result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+ if (ctx->chip_class >= GFX8) {
+ LLVMValueRef args[2];
+ /* Check if the result is a denormal - and flush to 0 if so. */
+ args[0] = result;
+ args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
+ cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
+ }
+
+ /* need to convert back up to f32 */
+ result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+
+ if (ctx->chip_class >= GFX8)
+ result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
+ else {
+ /* for GFX6-GFX7 */
+ /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
+ * so compare the result and flush to 0 if it's smaller.
+ */
+ LLVMValueRef temp, cond2;
+ temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
+ cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
+ LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
+ temp, "");
+ cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+ temp, ctx->f32_0, "");
+ cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
+ result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
+ }
+ return result;
+}
+
+static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
+}
+
+static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
+ LLVMValueRef src0, LLVMValueRef src1)
+{
+ LLVMValueRef dst64, result;
+ src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
+ src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
+
+ dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
+ dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
+ result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
+ return result;
+}
+
+static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
+ LLVMValueRef bits, LLVMValueRef offset)
+{
+ /* mask = ((1 << bits) - 1) << offset */
+ return LLVMBuildShl(ctx->builder,
+ LLVMBuildSub(ctx->builder,
+ LLVMBuildShl(ctx->builder,
+ ctx->i32_1,
+ bits, ""),
+ ctx->i32_1, ""),
+ offset, "");
+}
+
+static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
+ LLVMValueRef mask, LLVMValueRef insert,
+ LLVMValueRef base)
+{
+ /* Calculate:
+ * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
+ * Use the right-hand side, which the LLVM backend can convert to V_BFI.
+ */
+ return LLVMBuildXor(ctx->builder, base,
+ LLVMBuildAnd(ctx->builder, mask,
+ LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
+}
+
+static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
+ LLVMValueRef src0,
+ LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
+ LLVMValueRef args[2]))
+{
+ LLVMValueRef comp[2];
+
+ src0 = ac_to_float(ctx, src0);
+ comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
+ comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
+
+ return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
+}
+
+static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
+ LLVMValueRef src0)
+{
+ LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
+ LLVMValueRef temps[2], val;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
+ val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
+ val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
+ temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
+ }
+ return ac_build_gather_values(ctx, temps, 2);
+}
+
+static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
+ nir_op op,
+ LLVMValueRef src0)
+{
+ unsigned mask;
+ int idx;
+ LLVMValueRef result;
+
+ if (op == nir_op_fddx_fine)
+ mask = AC_TID_MASK_LEFT;
+ else if (op == nir_op_fddy_fine)
+ mask = AC_TID_MASK_TOP;
+ else
+ mask = AC_TID_MASK_TOP_LEFT;
+
+ /* for DDX we want to next X pixel, DDY next Y pixel. */
+ if (op == nir_op_fddx_fine ||
+ op == nir_op_fddx_coarse ||
+ op == nir_op_fddx)
+ idx = 1;
+ else
+ idx = 2;
+
+ result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
+ return result;
+}
+
+static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
+{
+ LLVMValueRef src[4], result = NULL;
+ unsigned num_components = instr->dest.dest.ssa.num_components;
+ unsigned src_components;
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
+
+ assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
+ switch (instr->op) {
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ src_components = 1;
+ break;
+ case nir_op_pack_half_2x16:
+ case nir_op_pack_snorm_2x16:
+ case nir_op_pack_unorm_2x16:
+ src_components = 2;
+ break;
+ case nir_op_unpack_half_2x16:
+ src_components = 1;
+ break;
+ case nir_op_cube_face_coord:
+ case nir_op_cube_face_index:
+ src_components = 3;
+ break;
+ default:
+ src_components = num_components;
+ break;
+ }
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = get_alu_src(ctx, instr->src[i], src_components);
+
+ switch (instr->op) {
+ case nir_op_mov:
+ result = src[0];
+ break;
+ case nir_op_fneg:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
+ break;
+ case nir_op_ineg:
+ result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
+ break;
+ case nir_op_inot:
+ result = LLVMBuildNot(ctx->ac.builder, src[0], "");
+ break;
+ case nir_op_iadd:
+ result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fadd:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fsub:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_isub:
+ result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_imul:
+ result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_imod:
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_umod:
+ result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_irem:
+ result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_idiv:
+ result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_udiv:
+ result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_fmul:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_frcp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
+ break;
+ case nir_op_iand:
+ result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ior:
+ result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ixor:
+ result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishl:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ishr:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ushr:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
+ src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
+ LLVMTypeOf(src[0]), "");
+ result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
+ break;
+ case nir_op_ilt32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
+ break;
+ case nir_op_ine32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
+ break;
+ case nir_op_ieq32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
+ break;
+ case nir_op_ige32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
+ break;
+ case nir_op_ult32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
+ break;
+ case nir_op_uge32:
+ result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
+ break;
+ case nir_op_feq32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
+ break;
+ case nir_op_fne32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
+ break;
+ case nir_op_flt32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
+ break;
+ case nir_op_fge32:
+ result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
+ break;
+ case nir_op_fabs:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_iabs:
+ result = emit_iabs(&ctx->ac, src[0]);
+ break;
+ case nir_op_imax:
+ result = ac_build_imax(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_imin:
+ result = ac_build_imin(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_umax:
+ result = ac_build_umax(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_umin:
+ result = ac_build_umin(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_isign:
+ result = ac_build_isign(&ctx->ac, src[0],
+ instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_fsign:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_fsign(&ctx->ac, src[0],
+ instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_ffloor:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_ftrunc:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fceil:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fround_even:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
+ ac_to_float_type(&ctx->ac, def_type),src[0]);
+ break;
+ case nir_op_ffract:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_fract(&ctx->ac, src[0],
+ instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_fsin:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fcos:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fsqrt:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_fexp2:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_flog2:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ break;
+ case nir_op_frsq:
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
+ ac_to_float_type(&ctx->ac, def_type), src[0]);
+ result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
+ break;
+ case nir_op_frexp_exp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_frexp_exp(&ctx->ac, src[0],
+ ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
+ result = LLVMBuildSExt(ctx->ac.builder, result,
+ ctx->ac.i32, "");
+ break;
+ case nir_op_frexp_sig:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = ac_build_frexp_mant(&ctx->ac, src[0],
+ instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_fpow:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ break;
+ case nir_op_fmax:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ if (ctx->ac.chip_class < GFX9 &&
+ instr->dest.dest.ssa.bit_size == 32) {
+ /* Only pre-GFX9 chips do not flush denorms. */
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
+ ac_to_float_type(&ctx->ac, def_type),
+ result);
+ }
+ break;
+ case nir_op_fmin:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ if (ctx->ac.chip_class < GFX9 &&
+ instr->dest.dest.ssa.bit_size == 32) {
+ /* Only pre-GFX9 chips do not flush denorms. */
+ result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
+ ac_to_float_type(&ctx->ac, def_type),
+ result);
+ }
+ break;
+ case nir_op_ffma:
+ /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
+ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+ break;
+ case nir_op_ldexp:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
+ else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
+ else
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
+ break;
+ case nir_op_bfm:
+ result = emit_bfm(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_bitfield_select:
+ result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
+ break;
+ case nir_op_ubfe:
+ result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
+ break;
+ case nir_op_ibfe:
+ result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
+ break;
+ case nir_op_bitfield_reverse:
+ result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
+ break;
+ case nir_op_bit_count:
+ result = ac_build_bit_count(&ctx->ac, src[0]);
+ break;
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
+ src[i] = ac_to_integer(&ctx->ac, src[i]);
+ result = ac_build_gather_values(&ctx->ac, src, num_components);
+ break;
+ case nir_op_f2i8:
+ case nir_op_f2i16:
+ case nir_op_f2i32:
+ case nir_op_f2i64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_f2u8:
+ case nir_op_f2u16:
+ case nir_op_f2u32:
+ case nir_op_f2u64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2f16:
+ case nir_op_i2f32:
+ case nir_op_i2f64:
+ result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_u2f16:
+ case nir_op_u2f32:
+ case nir_op_u2f64:
+ result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_f2f16_rtz:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+ src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
+ LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
+ result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ break;
+ case nir_op_f2f16_rtne:
+ case nir_op_f2f16:
+ case nir_op_f2f32:
+ case nir_op_f2f64:
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ else
+ result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
+ break;
+ case nir_op_u2u8:
+ case nir_op_u2u16:
+ case nir_op_u2u32:
+ case nir_op_u2u64:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_i2i8:
+ case nir_op_i2i16:
+ case nir_op_i2i32:
+ case nir_op_i2i64:
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
+ result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
+ else
+ result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
+ break;
+ case nir_op_b32csel:
+ result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
+ break;
+ case nir_op_find_lsb:
+ result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
+ break;
+ case nir_op_ufind_msb:
+ result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
+ break;
+ case nir_op_ifind_msb:
+ result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
+ break;
+ case nir_op_uadd_carry:
+ result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_usub_borrow:
+ result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
+ break;
+ case nir_op_b2f16:
+ case nir_op_b2f32:
+ case nir_op_b2f64:
+ result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_f2b32:
+ result = emit_f2b(&ctx->ac, src[0]);
+ break;
+ case nir_op_b2i8:
+ case nir_op_b2i16:
+ case nir_op_b2i32:
+ case nir_op_b2i64:
+ result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
+ break;
+ case nir_op_i2b32:
+ result = emit_i2b(&ctx->ac, src[0]);
+ break;
+ case nir_op_fquantize2f16:
+ result = emit_f2f16(&ctx->ac, src[0]);
+ break;
+ case nir_op_umul_high:
+ result = emit_umul_high(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_imul_high:
+ result = emit_imul_high(&ctx->ac, src[0], src[1]);
+ break;
+ case nir_op_pack_half_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
+ break;
+ case nir_op_pack_snorm_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
+ break;
+ case nir_op_pack_unorm_2x16:
+ result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
+ break;
+ case nir_op_unpack_half_2x16:
+ result = emit_unpack_half_2x16(&ctx->ac, src[0]);
+ break;
+ case nir_op_fddx:
+ case nir_op_fddy:
+ case nir_op_fddx_fine:
+ case nir_op_fddy_fine:
+ case nir_op_fddx_coarse:
+ case nir_op_fddy_coarse:
+ result = emit_ddxy(ctx, instr->op, src[0]);
+ break;
+
+ case nir_op_unpack_64_2x32_split_x: {
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i32,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_64_2x32_split_y: {
+ assert(ac_get_llvm_num_components(src[0]) == 1);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i32,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_1, "");
+ break;
+ }
+
+ case nir_op_pack_64_2x32_split: {
+ LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
+ break;
+ }
+
+ case nir_op_pack_32_2x16_split: {
+ LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
+ result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_x: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i16,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_0, "");
+ break;
+ }
+
+ case nir_op_unpack_32_2x16_split_y: {
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
+ ctx->ac.v2i16,
+ "");
+ result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
+ ctx->ac.i32_1, "");
+ break;
+ }
+
+ case nir_op_cube_face_coord: {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ LLVMValueRef results[2];
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
+ results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
+ ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
+ results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
+ ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
+ LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
+ ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
+ results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
+ results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
+ LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
+ results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
+ results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
+ result = ac_build_gather_values(&ctx->ac, results, 2);
+ break;
+ }
+
+ case nir_op_cube_face_index: {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ LLVMValueRef in[3];
+ for (unsigned chan = 0; chan < 3; chan++)
+ in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
+ result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
+ ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
+ break;
+ }
+
+ case nir_op_fmin3:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
+ ac_to_float_type(&ctx->ac, def_type), result, src[2]);
+ break;
+ case nir_op_umin3:
+ result = ac_build_umin(&ctx->ac, src[0], src[1]);
+ result = ac_build_umin(&ctx->ac, result, src[2]);
+ break;
+ case nir_op_imin3:
+ result = ac_build_imin(&ctx->ac, src[0], src[1]);
+ result = ac_build_imin(&ctx->ac, result, src[2]);
+ break;
+ case nir_op_fmax3:
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
+ ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+ result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
+ ac_to_float_type(&ctx->ac, def_type), result, src[2]);
+ break;
+ case nir_op_umax3:
+ result = ac_build_umax(&ctx->ac, src[0], src[1]);
+ result = ac_build_umax(&ctx->ac, result, src[2]);
+ break;
+ case nir_op_imax3:
+ result = ac_build_imax(&ctx->ac, src[0], src[1]);
+ result = ac_build_imax(&ctx->ac, result, src[2]);
+ break;
+ case nir_op_fmed3: {
+ src[0] = ac_to_float(&ctx->ac, src[0]);
+ src[1] = ac_to_float(&ctx->ac, src[1]);
+ src[2] = ac_to_float(&ctx->ac, src[2]);
+ result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
+ instr->dest.dest.ssa.bit_size);
+ break;
+ }
+ case nir_op_imed3: {
+ LLVMValueRef tmp1 = ac_build_imin(&ctx->ac, src[0], src[1]);
+ LLVMValueRef tmp2 = ac_build_imax(&ctx->ac, src[0], src[1]);
+ tmp2 = ac_build_imin(&ctx->ac, tmp2, src[2]);
+ result = ac_build_imax(&ctx->ac, tmp1, tmp2);
+ break;
+ }
+ case nir_op_umed3: {
+ LLVMValueRef tmp1 = ac_build_umin(&ctx->ac, src[0], src[1]);
+ LLVMValueRef tmp2 = ac_build_umax(&ctx->ac, src[0], src[1]);
+ tmp2 = ac_build_umin(&ctx->ac, tmp2, src[2]);
+ result = ac_build_umax(&ctx->ac, tmp1, tmp2);
+ break;
+ }
+
+ default:
+ fprintf(stderr, "Unknown NIR alu instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+
+ if (result) {
+ assert(instr->dest.dest.is_ssa);
+ result = ac_to_integer_or_pointer(&ctx->ac, result);
+ ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
+ }
+}
+
+static void visit_load_const(struct ac_nir_context *ctx,
+ const nir_load_const_instr *instr)
+{
+ LLVMValueRef values[4], value = NULL;
+ LLVMTypeRef element_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
+
+ for (unsigned i = 0; i < instr->def.num_components; ++i) {
+ switch (instr->def.bit_size) {
+ case 8:
+ values[i] = LLVMConstInt(element_type,
+ instr->value[i].u8, false);
+ break;
+ case 16:
+ values[i] = LLVMConstInt(element_type,
+ instr->value[i].u16, false);
+ break;
+ case 32:
+ values[i] = LLVMConstInt(element_type,
+ instr->value[i].u32, false);
+ break;
+ case 64:
+ values[i] = LLVMConstInt(element_type,
+ instr->value[i].u64, false);
+ break;
+ default:
+ fprintf(stderr,
+ "unsupported nir load_const bit_size: %d\n",
+ instr->def.bit_size);
+ abort();
+ }
+ }
+ if (instr->def.num_components > 1) {
+ value = LLVMConstVector(values, instr->def.num_components);
+ } else
+ value = values[0];
+
+ ctx->ssa_defs[instr->def.index] = value;
+}
+
+static LLVMValueRef
+get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
+{
+ LLVMValueRef size =
+ LLVMBuildExtractElement(ctx->ac.builder, descriptor,
+ LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+ /* GFX8 only */
+ if (ctx->ac.chip_class == GFX8 && in_elements) {
+ /* On GFX8, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(ctx->ac.builder, descriptor,
+ ctx->ac.i32_1, "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride,
+ LLVMConstInt(ctx->ac.i32, 16, false), "");
+ stride = LLVMBuildAnd(ctx->ac.builder, stride,
+ LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
+
+ size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
+ }
+ return size;
+}
+
+/* Gather4 should follow the same rules as bilinear filtering, but the hardware
+ * incorrectly forces nearest filtering if the texture format is integer.
+ * The only effect it has on Gather4, which always returns 4 texels for
+ * bilinear filtering, is that the final coordinates are off by 0.5 of
+ * the texel size.
+ *
+ * The workaround is to subtract 0.5 from the unnormalized coordinates,
+ * or (0.5 / size) from the normalized coordinates.
+ *
+ * However, cube textures with 8_8_8_8 data formats require a different
+ * workaround of overriding the num format to USCALED/SSCALED. This would lose
+ * precision in 32-bit data formats, so it needs to be applied dynamically at
+ * runtime. In this case, return an i1 value that indicates whether the
+ * descriptor was overridden (and hence a fixup of the sampler result is needed).
+ */
+static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
+ nir_variable *var,
+ struct ac_image_args *args,
+ const nir_tex_instr *instr)
+{
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type stype = glsl_get_sampler_result_type(type);
+ LLVMValueRef wa_8888 = NULL;
+ LLVMValueRef half_texel[2];
+ LLVMValueRef result;
+
+ assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef formats;
+ LLVMValueRef data_format;
+ LLVMValueRef wa_formats;
+
+ formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
+
+ data_format = LLVMBuildLShr(ctx->builder, formats,
+ LLVMConstInt(ctx->i32, 20, false), "");
+ data_format = LLVMBuildAnd(ctx->builder, data_format,
+ LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
+ wa_8888 = LLVMBuildICmp(
+ ctx->builder, LLVMIntEQ, data_format,
+ LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
+ "");
+
+ uint32_t wa_num_format =
+ stype == GLSL_TYPE_UINT ?
+ S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
+ S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
+ wa_formats = LLVMBuildAnd(ctx->builder, formats,
+ LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
+ "");
+ wa_formats = LLVMBuildOr(ctx->builder, wa_formats,
+ LLVMConstInt(ctx->i32, wa_num_format, false), "");
+
+ formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
+ args->resource = LLVMBuildInsertElement(
+ ctx->builder, args->resource, formats, ctx->i32_1, "");
+ }
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
+ assert(!wa_8888);
+ half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
+ } else {
+ struct ac_image_args resinfo = {};
+ LLVMBasicBlockRef bbs[2];
+
+ LLVMValueRef unnorm = NULL;
+ LLVMValueRef default_offset = ctx->f32_0;
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D &&
+ !instr->is_array) {
+ /* In vulkan, whether the sampler uses unnormalized
+ * coordinates or not is a dynamic property of the
+ * sampler. Hence, to figure out whether or not we
+ * need to divide by the texture size, we need to test
+ * the sampler at runtime. This tests the bit set by
+ * radv_init_sampler().
+ */
+ LLVMValueRef sampler0 =
+ LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
+ sampler0 = LLVMBuildLShr(ctx->builder, sampler0,
+ LLVMConstInt(ctx->i32, 15, false), "");
+ sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
+ unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
+ default_offset = LLVMConstReal(ctx->f32, -0.5);
+ }
+
+ bbs[0] = LLVMGetInsertBlock(ctx->builder);
+ if (wa_8888 || unnorm) {
+ assert(!(wa_8888 && unnorm));
+ LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
+ /* Skip the texture size query entirely if we don't need it. */
+ ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
+ bbs[1] = LLVMGetInsertBlock(ctx->builder);
+ }
+
+ /* Query the texture size. */
+ resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
+ resinfo.opcode = ac_image_get_resinfo;
+ resinfo.dmask = 0xf;
+ resinfo.lod = ctx->i32_0;
+ resinfo.resource = args->resource;
+ resinfo.attributes = AC_FUNC_ATTR_READNONE;
+ LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
+
+ /* Compute -0.5 / size. */
+ for (unsigned c = 0; c < 2; c++) {
+ half_texel[c] =
+ LLVMBuildExtractElement(ctx->builder, size,
+ LLVMConstInt(ctx->i32, c, 0), "");
+ half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
+ half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
+ half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
+ LLVMConstReal(ctx->f32, -0.5), "");
+ }
+
+ if (wa_8888 || unnorm) {
+ ac_build_endif(ctx, 2000);
+
+ for (unsigned c = 0; c < 2; c++) {
+ LLVMValueRef values[2] = { default_offset, half_texel[c] };
+ half_texel[c] = ac_build_phi(ctx, ctx->f32, 2,
+ values, bbs);
+ }
+ }
+ }
+
+ for (unsigned c = 0; c < 2; c++) {
+ LLVMValueRef tmp;
+ tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
+ args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
+ }
+
+ args->attributes = AC_FUNC_ATTR_READNONE;
+ result = ac_build_image_opcode(ctx, args);
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
+ LLVMValueRef tmp, tmp2;
+
+ /* if the cube workaround is in place, f2i the result. */
+ for (unsigned c = 0; c < 4; c++) {
+ tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
+ if (stype == GLSL_TYPE_UINT)
+ tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
+ else
+ tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
+ tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
+ tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
+ tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
+ result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
+ }
+ }
+ return result;
+}
+
+static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
+{
+ nir_deref_instr *texture_deref_instr = NULL;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ texture_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ default:
+ break;
+ }
+ }
+ return texture_deref_instr;
+}
+
+static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
+ const nir_tex_instr *instr,
+ struct ac_image_args *args)
+{
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
+ return ac_build_buffer_load_format(&ctx->ac,
+ args->resource,
+ args->coords[0],
+ ctx->ac.i32_0,
+ util_last_bit(mask),
+ 0, true);
+ }
+
+ args->opcode = ac_image_sample;
+
+ switch (instr->op) {
+ case nir_texop_txf:
+ case nir_texop_txf_ms:
+ case nir_texop_samples_identical:
+ args->opcode = args->level_zero ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
+ ac_image_load : ac_image_load_mip;
+ args->level_zero = false;
+ break;
+ case nir_texop_txs:
+ case nir_texop_query_levels:
+ args->opcode = ac_image_get_resinfo;
+ if (!args->lod)
+ args->lod = ctx->ac.i32_0;
+ args->level_zero = false;
+ break;
+ case nir_texop_tex:
+ if (ctx->stage != MESA_SHADER_FRAGMENT) {
+ assert(!args->lod);
+ args->level_zero = true;
+ }
+ break;
+ case nir_texop_tg4:
+ args->opcode = ac_image_gather4;
+ args->level_zero = true;
+ break;
+ case nir_texop_lod:
+ args->opcode = ac_image_get_lod;
+ break;
+ default:
+ break;
+ }
+
+ if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
+ nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
+ nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
+ const struct glsl_type *type = glsl_without_array(var->type);
+ enum glsl_base_type stype = glsl_get_sampler_result_type(type);
+ if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
+ return lower_gather4_integer(&ctx->ac, var, args, instr);
+ }
+ }
+
+ /* Fixup for GFX9 which allocates 1D textures as 2D. */
+ if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
+ if ((args->dim == ac_image_2darray ||
+ args->dim == ac_image_2d) && !args->coords[1]) {
+ args->coords[1] = ctx->ac.i32_0;
+ }
+ }
+
+ args->attributes = AC_FUNC_ATTR_READNONE;
+ bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
+ ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
+ if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
+ /* Prevent texture instructions with implicit derivatives from being
+ * sinked into branches. */
+ switch (instr->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_lod:
+ args->attributes |= AC_FUNC_ATTR_CONVERGENT;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ac_build_image_opcode(&ctx->ac, args);
+}
+
+static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ LLVMValueRef index = get_src(ctx, instr->src[1]);
+
+ LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
+ LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+ return result;
+}
+
+static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ LLVMValueRef ptr, addr;
+ LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
+
+ addr = LLVMConstInt(ctx->ac.i32, index, 0);
+ addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+ /* Load constant values from user SGPRS when possible, otherwise
+ * fallback to the default path that loads directly from memory.
+ */
+ if (LLVMIsConstant(src0) &&
+ instr->dest.ssa.bit_size == 32) {
+ unsigned count = instr->dest.ssa.num_components;
+ unsigned offset = index;
+
+ offset += LLVMConstIntGetZExtValue(src0);
+ offset /= 4;
+
+ offset -= ctx->abi->base_inline_push_consts;
+
+ if (offset + count <= ctx->abi->num_inline_push_consts) {
+ return ac_build_gather_values(&ctx->ac,
+ ctx->abi->inline_push_consts + offset,
+ count);
+ }
+ }
+
+ ptr = LLVMBuildGEP(ctx->ac.builder, ctx->abi->push_constants, &addr, 1, "");
+
+ if (instr->dest.ssa.bit_size == 8) {
+ unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
+ LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
+ ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
+ LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+
+ LLVMValueRef params[3];
+ if (load_dwords > 1) {
+ LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
+ params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
+ params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
+ } else {
+ res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
+ params[0] = ctx->ac.i32_0;
+ params[1] = res;
+ }
+ params[2] = addr;
+ res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
+
+ res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
+ if (instr->dest.ssa.num_components > 1)
+ res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
+ return res;
+ } else if (instr->dest.ssa.bit_size == 16) {
+ unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
+ LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
+ ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
+ LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
+ LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
+ cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
+ LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
+ LLVMConstInt(ctx->ac.i32, 4, false)};
+ LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
+ LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
+ LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
+ LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
+ res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
+ return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
+ }
+
+ ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
+
+ return LLVMBuildLoad(ctx->ac.builder, ptr, "");
+}
+
+static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef index = get_src(ctx, instr->src[0]);
+
+ return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
+}
+
+static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
+{
+ uint32_t new_mask = 0;
+ for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
+ if (mask & (1u << i))
+ new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
+ return new_mask;
+}
+
+static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned start, unsigned count)
+{
+ LLVMValueRef mask[] = {
+ ctx->i32_0, ctx->i32_1,
+ LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
+
+ unsigned src_elements = ac_get_llvm_num_components(src);
+
+ if (count == src_elements) {
+ assert(start == 0);
+ return src;
+ } else if (count == 1) {
+ assert(start < src_elements);
+ return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
+ } else {
+ assert(start + count <= src_elements);
+ assert(count <= 4);
+ LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
+ return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
+ }
+}
+
+static unsigned get_cache_policy(struct ac_nir_context *ctx,
+ enum gl_access_qualifier access,
+ bool may_store_unaligned,
+ bool writeonly_memory)
+{
+ unsigned cache_policy = 0;
+
+ /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
+ * store opcodes not aligned to a dword are affected. The only way to
+ * get unaligned stores is through shader images.
+ */
+ if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
+ /* If this is write-only, don't keep data in L1 to prevent
+ * evicting L1 cache lines that may be needed by other
+ * instructions.
+ */
+ writeonly_memory ||
+ access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
+ cache_policy |= ac_glc;
+ }
+
+ if (access & ACCESS_STREAM_CACHE_POLICY)
+ cache_policy |= ac_slc;
+
+ return cache_policy;
+}
+
+static void visit_store_ssbo(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ LLVMValueRef src_data = get_src(ctx, instr->src[0]);
+ int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
+ unsigned writemask = nir_intrinsic_write_mask(instr);
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ bool writeonly_memory = access & ACCESS_NON_READABLE;
+ unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
+
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[1]), true);
+ LLVMValueRef base_data = src_data;
+ base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
+ LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
+
+ while (writemask) {
+ int start, count;
+ LLVMValueRef data, offset;
+ LLVMTypeRef data_type;
+
+ u_bit_scan_consecutive_range(&writemask, &start, &count);
+
+ /* Due to an LLVM limitation with LLVM < 9, split 3-element
+ * writes into a 2-element and a 1-element write. */
+ if (count == 3 &&
+ (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
+ writemask |= 1 << (start + 2);
+ count = 2;
+ }
+ int num_bytes = count * elem_size_bytes; /* count in bytes */
+
+ /* we can only store 4 DWords at the same time.
+ * can only happen for 64 Bit vectors. */
+ if (num_bytes > 16) {
+ writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
+ count = 2;
+ num_bytes = 16;
+ }
+
+ /* check alignment of 16 Bit stores */
+ if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
+ writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
+ count = 1;
+ num_bytes = 2;
+ }
+ data = extract_vector_range(&ctx->ac, base_data, start, count);
+
+ offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
+ LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
+
+ if (num_bytes == 1) {
+ ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
+ offset, ctx->ac.i32_0,
+ cache_policy);
+ } else if (num_bytes == 2) {
+ ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
+ offset, ctx->ac.i32_0,
+ cache_policy);
+ } else {
+ int num_channels = num_bytes / 4;
+
+ switch (num_bytes) {
+ case 16: /* v4f32 */
+ data_type = ctx->ac.v4f32;
+ break;
+ case 12: /* v3f32 */
+ data_type = ctx->ac.v3f32;
+ break;
+ case 8: /* v2f32 */
+ data_type = ctx->ac.v2f32;
+ break;
+ case 4: /* f32 */
+ data_type = ctx->ac.f32;
+ break;
+ default:
+ unreachable("Malformed vector store.");
+ }
+ data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
+
+ ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
+ num_channels, offset,
+ ctx->ac.i32_0, 0,
+ cache_policy, false);
+ }
+ }
+}
+
+static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
+ LLVMValueRef descriptor,
+ LLVMValueRef offset,
+ LLVMValueRef compare,
+ LLVMValueRef exchange)
+{
+ LLVMBasicBlockRef start_block = NULL, then_block = NULL;
+ if (ctx->abi->robust_buffer_access) {
+ LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
+
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
+ start_block = LLVMGetInsertBlock(ctx->ac.builder);
+
+ ac_build_ifcc(&ctx->ac, cond, -1);
+
+ then_block = LLVMGetInsertBlock(ctx->ac.builder);
+ }
+
+ LLVMValueRef ptr_parts[2] = {
+ ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
+ LLVMBuildAnd(ctx->ac.builder,
+ ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
+ LLVMConstInt(ctx->ac.i32, 65535, 0), "")
+ };
+
+ ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
+ ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
+
+ offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
+
+ LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
+ ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
+ ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), "");
+
+ LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
+ result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
+
+ if (ctx->abi->robust_buffer_access) {
+ ac_build_endif(&ctx->ac, -1);
+
+ LLVMBasicBlockRef incoming_blocks[2] = {
+ start_block,
+ then_block,
+ };
+
+ LLVMValueRef incoming_values[2] = {
+ LLVMConstInt(ctx->ac.i64, 0, 0),
+ result,
+ };
+ LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
+ LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
+ return ret;
+ } else {
+ return result;
+ }
+}
+
+static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
+ const char *op;
+ char name[64], type[8];
+ LLVMValueRef params[6], descriptor;
+ int arg_count = 0;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ssbo_atomic_add:
+ op = "add";
+ break;
+ case nir_intrinsic_ssbo_atomic_imin:
+ op = "smin";
+ break;
+ case nir_intrinsic_ssbo_atomic_umin:
+ op = "umin";
+ break;
+ case nir_intrinsic_ssbo_atomic_imax:
+ op = "smax";
+ break;
+ case nir_intrinsic_ssbo_atomic_umax:
+ op = "umax";
+ break;
+ case nir_intrinsic_ssbo_atomic_and:
+ op = "and";
+ break;
+ case nir_intrinsic_ssbo_atomic_or:
+ op = "or";
+ break;
+ case nir_intrinsic_ssbo_atomic_xor:
+ op = "xor";
+ break;
+ case nir_intrinsic_ssbo_atomic_exchange:
+ op = "swap";
+ break;
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ op = "cmpswap";
+ break;
+ default:
+ abort();
+ }
+
+ descriptor = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]),
+ true);
+
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
+ return_type == ctx->ac.i64) {
+ return emit_ssbo_comp_swap_64(ctx, descriptor,
+ get_src(ctx, instr->src[1]),
+ get_src(ctx, instr->src[2]),
+ get_src(ctx, instr->src[3]));
+ }
+ if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
+ }
+ params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+ params[arg_count++] = descriptor;
+
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy with
+ * LLVM 8, see r358579.
+ */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i32_0; /* soffset */
+ params[arg_count++] = ctx->ac.i32_0; /* slc */
+
+ ac_build_type_name_for_intr(return_type, type, sizeof(type));
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
+ } else {
+ params[arg_count++] = ctx->ac.i32_0; /* vindex */
+ params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
+ params[arg_count++] = ctx->ac.i1false; /* slc */
+
+ assert(return_type == ctx->ac.i32);
+ snprintf(name, sizeof(name),
+ "llvm.amdgcn.buffer.atomic.%s", op);
+ }
+
+ return ac_build_intrinsic(&ctx->ac, name, return_type, params,
+ arg_count, 0);
+}
+
+static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ int elem_size_bytes = instr->dest.ssa.bit_size / 8;
+ int num_components = instr->num_components;
+ enum gl_access_qualifier access = nir_intrinsic_access(instr);
+ unsigned cache_policy = get_cache_policy(ctx, access, false, false);
+
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+ get_src(ctx, instr->src[0]), false);
+ LLVMValueRef vindex = ctx->ac.i32_0;
+
+ LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
+
+ LLVMValueRef results[4];
+ for (int i = 0; i < num_components;) {
+ int num_elems = num_components - i;
+ if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
+ num_elems = 1;
+ if (num_elems * elem_size_bytes > 16)
+ num_elems = 16 / elem_size_bytes;
+ int load_bytes = num_elems * elem_size_bytes;
+
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+ LLVMValueRef ret;
+
+ if (load_bytes == 1) {
+ ret = ac_build_tbuffer_load_byte(&ctx->ac,
+ rsrc,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ cache_policy);
+ } else if (load_bytes == 2) {
+ ret = ac_build_tbuffer_load_short(&ctx->ac,
+ rsrc,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ cache_policy);
+ } else {
+ int num_channels = util_next_power_of_two(load_bytes) / 4;
+ bool can_speculate = access & ACCESS_CAN_REORDER;
+
+ ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
+ vindex, offset, immoffset, 0,
+ cache_policy, can_speculate, false);
+ }
+
+ LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+ ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
+
+ LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+ ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
+
+ for (unsigned j = 0; j < num_elems; j++) {
+ results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+ }
+ i += num_elems;
+ }
+
+ return ac_build_gather_values(&ctx->ac, results, num_components);
+}
+
+static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef ret;
+ LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ int num_components = instr->num_components;
+
+ if (ctx->abi->load_ubo)
+ rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
+
+ if (instr->dest.ssa.bit_size == 64)
+ num_components *= 2;
+
+ if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
+ unsigned load_bytes = instr->dest.ssa.bit_size / 8;
+ LLVMValueRef results[num_components];
+ for (unsigned i = 0; i < num_components; ++i) {
+ LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
+ load_bytes * i, 0);
+
+ if (load_bytes == 1) {
+ results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
+ rsrc,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ 0);
+ } else {
+ assert(load_bytes == 2);
+ results[i] = ac_build_tbuffer_load_short(&ctx->ac,
+ rsrc,
+ offset,
+ ctx->ac.i32_0,
+ immoffset,
+ 0);
+ }
+ }
+ ret = ac_build_gather_values(&ctx->ac, results, num_components);
+ } else {
+ ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
+ NULL, 0, 0, true, true);
+
+ ret = ac_trim_vector(&ctx->ac, ret, num_components);
+ }
+
+ return LLVMBuildBitCast(ctx->ac.builder, ret,
+ get_def_type(ctx, &instr->dest.ssa), "");
+}
+
+static void
+get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
+ bool vs_in, unsigned *vertex_index_out,
+ LLVMValueRef *vertex_index_ref,
+ unsigned *const_out, LLVMValueRef *indir_out)
+{
+ nir_variable *var = nir_deref_instr_get_variable(instr);
+ nir_deref_path path;
+ unsigned idx_lvl = 1;
+
+ nir_deref_path_init(&path, instr, NULL);
+
+ if (vertex_index_out != NULL || vertex_index_ref != NULL) {
+ if (vertex_index_ref) {
+ *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
+ if (vertex_index_out)
+ *vertex_index_out = 0;
+ } else {
+ *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ }
+ ++idx_lvl;
+ }
+
+ uint32_t const_offset = 0;
+ LLVMValueRef offset = NULL;
+
+ if (var->data.compact) {
+ assert(instr->deref_type == nir_deref_type_array);
+ const_offset = nir_src_as_uint(instr->arr.index);
+ goto out;
+ }
+
+ for (; path.path[idx_lvl]; ++idx_lvl) {
+ const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
+ if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
+ unsigned index = path.path[idx_lvl]->strct.index;
+
+ for (unsigned i = 0; i < index; i++) {
+ const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
+ const_offset += glsl_count_attribute_slots(ft, vs_in);
+ }
+ } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
+ unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
+ if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
+ const_offset += size *
+ nir_src_as_uint(path.path[idx_lvl]->arr.index);
+ } else {
+ LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
+ get_src(ctx, path.path[idx_lvl]->arr.index), "");
+ if (offset)
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
+ else
+ offset = array_off;
+ }
+ } else
+ unreachable("Uhandled deref type in get_deref_instr_offset");
+ }
+
+out:
+ nir_deref_path_finish(&path);
+
+ if (const_offset && offset)
+ offset = LLVMBuildAdd(ctx->ac.builder, offset,
+ LLVMConstInt(ctx->ac.i32, const_offset, 0),
+ "");
+
+ *const_out = const_offset;
+ *indir_out = offset;
+}
+
+static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr,
+ bool load_inputs)
+{
+ LLVMValueRef result;
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef indir_index = NULL;
+ unsigned const_index = 0;
+
+ nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+
+ unsigned location = var->data.location;
+ unsigned driver_location = var->data.driver_location;
+ const bool is_patch = var->data.patch;
+ const bool is_compact = var->data.compact;
+
+ get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
+ false, NULL, is_patch ? NULL : &vertex_index,
+ &const_index, &indir_index);
+
+ LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
+
+ LLVMTypeRef src_component_type;
+ if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
+ src_component_type = LLVMGetElementType(dest_type);
+ else
+ src_component_type = dest_type;
+
+ result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
+ vertex_index, indir_index,
+ const_index, location, driver_location,
+ var->data.location_frac,
+ instr->num_components,
+ is_patch, is_compact, load_inputs);
+ if (instr->dest.ssa.bit_size == 16) {
+ result = ac_to_integer(&ctx->ac, result);
+ result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
+ }
+ return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
+}
+
+static unsigned
+type_scalar_size_bytes(const struct glsl_type *type)
+{
+ assert(glsl_type_is_vector_or_scalar(type) ||
+ glsl_type_is_matrix(type));
+ return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+}
+
+static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef values[8];
+ int idx = 0;
+ int ve = instr->dest.ssa.num_components;
+ unsigned comp = 0;
+ LLVMValueRef indir_index;
+ LLVMValueRef ret;
+ unsigned const_index;
+ unsigned stride = 4;
+ int mode = deref->mode;
+
+ if (var) {
+ bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
+ var->data.mode == nir_var_shader_in;
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+ mode = var->data.mode;
+
+ get_deref_offset(ctx, deref, vs_in, NULL, NULL,
+ &const_index, &indir_index);
+
+ if (var->data.compact) {
+ stride = 1;
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (instr->dest.ssa.bit_size == 64 &&
+ (deref->mode == nir_var_shader_in ||
+ deref->mode == nir_var_shader_out ||
+ deref->mode == nir_var_function_temp))
+ ve *= 2;
+
+ switch (mode) {
+ case nir_var_shader_in:
+ if (ctx->stage == MESA_SHADER_TESS_CTRL ||
+ ctx->stage == MESA_SHADER_TESS_EVAL) {
+ return load_tess_varyings(ctx, instr, true);
+ }
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMValueRef indir_index;
+ unsigned const_index, vertex_index;
+ get_deref_offset(ctx, deref, false, &vertex_index, NULL,
+ &const_index, &indir_index);
+ assert(indir_index == NULL);
+
+ return ctx->abi->load_inputs(ctx->abi, var->data.location,
+ var->data.driver_location,
+ var->data.location_frac,
+ instr->num_components, vertex_index, const_index, type);
+ }
+
+ for (unsigned chan = comp; chan < ve + comp; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(
+ var->type,
+ ctx->stage == MESA_SHADER_VERTEX);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->inputs + idx + chan, count,
+ stride, false, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
+ tmp_vec,
+ indir_index, "");
+ } else
+ values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
+ }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < ve; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(
+ var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->locals + idx + chan, count,
+ stride, true, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
+ tmp_vec,
+ indir_index, "");
+ } else {
+ values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
+ }
+ }
+ break;
+ case nir_var_mem_shared: {
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+ return LLVMBuildBitCast(ctx->ac.builder, val,
+ get_def_type(ctx, &instr->dest.ssa),
+ "");
+ }
+ case nir_var_shader_out:
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ return load_tess_varyings(ctx, instr, false);
+ }
+
+ if (ctx->stage == MESA_SHADER_FRAGMENT &&
+ var->data.fb_fetch_output &&
+ ctx->abi->emit_fbfetch)
+ return ctx->abi->emit_fbfetch(ctx->abi);
+
+ for (unsigned chan = comp; chan < ve + comp; chan++) {
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(
+ var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->outputs + idx + chan, count,
+ stride, true, true);
+
+ values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
+ tmp_vec,
+ indir_index, "");
+ } else {
+ values[chan] = LLVMBuildLoad(ctx->ac.builder,
+ ctx->abi->outputs[idx + chan + const_index * stride],
+ "");
+ }
+ }
+ break;
+ case nir_var_mem_global: {
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+
+ LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
+ if (stride != natural_stride) {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+ for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
+ values[i] = LLVMBuildLoad(ctx->ac.builder,
+ ac_build_gep_ptr(&ctx->ac, address, offset), "");
+ }
+ return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
+ } else {
+ LLVMTypeRef ptr_type = LLVMPointerType(result_type,
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+ LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+ return val;
+ }
+ }
+ default:
+ unreachable("unhandle variable mode");
+ }
+ ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
+ return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+}
+
+static void
+visit_store_var(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+
+ LLVMValueRef temp_ptr, value;
+ int idx = 0;
+ unsigned comp = 0;
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
+ int writemask = instr->const_index[0];
+ LLVMValueRef indir_index;
+ unsigned const_index;
+
+ if (var) {
+ get_deref_offset(ctx, deref, false,
+ NULL, NULL, &const_index, &indir_index);
+ idx = var->data.driver_location;
+ comp = var->data.location_frac;
+
+ if (var->data.compact) {
+ const_index += comp;
+ comp = 0;
+ }
+ }
+
+ if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
+ (deref->mode == nir_var_shader_out ||
+ deref->mode == nir_var_function_temp)) {
+
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
+ "");
+
+ writemask = widen_mask(writemask, 2);
+ }
+
+ writemask = writemask << comp;
+
+ switch (deref->mode) {
+ case nir_var_shader_out:
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef indir_index = NULL;
+ unsigned const_index = 0;
+ const bool is_patch = var->data.patch;
+
+ get_deref_offset(ctx, deref, false, NULL,
+ is_patch ? NULL : &vertex_index,
+ &const_index, &indir_index);
+
+ ctx->abi->store_tcs_outputs(ctx->abi, var,
+ vertex_index, indir_index,
+ const_index, src, writemask);
+ return;
+ }
+
+ for (unsigned chan = 0; chan < 8; chan++) {
+ int stride = 4;
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
+
+ if (var->data.compact)
+ stride = 1;
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(
+ var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->abi->outputs + idx + chan, count,
+ stride, true, true);
+
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
+ value, indir_index, "");
+ build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
+ count, stride, tmp_vec);
+
+ } else {
+ temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
+
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
+ }
+ }
+ break;
+ case nir_var_function_temp:
+ for (unsigned chan = 0; chan < 8; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ value = ac_llvm_extract_elem(&ctx->ac, src, chan);
+ if (indir_index) {
+ unsigned count = glsl_count_attribute_slots(
+ var->type, false);
+ count -= chan / 4;
+ LLVMValueRef tmp_vec = ac_build_gather_values_extended(
+ &ctx->ac, ctx->locals + idx + chan, count,
+ 4, true, true);
+
+ tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
+ value, indir_index, "");
+ build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
+ count, 4, tmp_vec);
+ } else {
+ temp_ptr = ctx->locals[idx + chan + const_index * 4];
+
+ LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
+ }
+ }
+ break;
+
+ case nir_var_mem_global:
+ case nir_var_mem_shared: {
+ int writemask = instr->const_index[0];
+ LLVMValueRef address = get_src(ctx, instr->src[0]);
+ LLVMValueRef val = get_src(ctx, instr->src[1]);
+
+ unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+ unsigned natural_stride = type_scalar_size_bytes(deref->type);
+ unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+ if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
+ stride == natural_stride) {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+ val = LLVMBuildBitCast(ctx->ac.builder, val,
+ LLVMGetElementType(LLVMTypeOf(address)), "");
+ LLVMBuildStore(ctx->ac.builder, val, address);
+ } else {
+ LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
+ LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+ address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+ for (unsigned chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
+
+ LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
+ LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
+ chan);
+ src = LLVMBuildBitCast(ctx->ac.builder, src,
+ LLVMGetElementType(LLVMTypeOf(ptr)), "");
+ LLVMBuildStore(ctx->ac.builder, src, ptr);
+ }
+ }
+ break;
+ }
+ default:
+ abort();
+ break;
+ }
+}
+
+static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
+{
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_BUF:
+ return 1;
+ case GLSL_SAMPLER_DIM_1D:
+ return array ? 2 : 1;
+ case GLSL_SAMPLER_DIM_2D:
+ return array ? 3 : 2;
+ case GLSL_SAMPLER_DIM_MS:
+ return array ? 4 : 3;
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ return 3;
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_SUBPASS:
+ return 2;
+ case GLSL_SAMPLER_DIM_SUBPASS_MS:
+ return 3;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
+ LLVMValueRef coord_x, LLVMValueRef coord_y,
+ LLVMValueRef coord_z,
+ LLVMValueRef sample_index,
+ LLVMValueRef fmask_desc_ptr)
+{
+ unsigned sample_chan = coord_z ? 3 : 2;
+ LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
+ addr[sample_chan] = sample_index;
+
+ ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
+ return addr[sample_chan];
+}
+
+static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
+{
+ assert(instr->src[0].is_ssa);
+ return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+}
+
+static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ enum ac_descriptor_type desc_type,
+ bool write)
+{
+ nir_deref_instr *deref_instr =
+ instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
+ nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
+
+ return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, true, write);
+}
+
+static void get_image_coords(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ struct ac_image_args *args,
+ enum glsl_sampler_dim dim,
+ bool is_array)
+{
+ LLVMValueRef src0 = get_src(ctx, instr->src[1]);
+ LLVMValueRef masks[] = {
+ LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
+ };
+ LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
+
+ int count;
+ ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
+ bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
+ dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
+ bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
+ assert(!add_frag_pos && "Input attachments should be lowered by this point.");
+ count = image_type_to_components_count(dim, is_array);
+
+ if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
+ instr->intrinsic == nir_intrinsic_bindless_image_load)) {
+ LLVMValueRef fmask_load_address[3];
+
+ fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
+ fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
+ if (is_array)
+ fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
+ else
+ fmask_load_address[2] = NULL;
+
+ sample_index = adjust_sample_index_using_fmask(&ctx->ac,
+ fmask_load_address[0],
+ fmask_load_address[1],
+ fmask_load_address[2],
+ sample_index,
+ get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
+ AC_DESC_FMASK, &instr->instr, true, false));
+ }
+ if (count == 1 && !gfx9_1d) {
+ if (instr->src[1].ssa->num_components)
+ args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
+ else
+ args->coords[0] = src0;
+ } else {
+ int chan;
+ if (is_ms)
+ count--;
+ for (chan = 0; chan < count; ++chan) {
+ args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
+ }
+
+ if (gfx9_1d) {
+ if (is_array) {
+ args->coords[2] = args->coords[1];
+ args->coords[1] = ctx->ac.i32_0;
+ } else
+ args->coords[1] = ctx->ac.i32_0;
+ count++;
+ }
+ if (ctx->ac.chip_class == GFX9 &&
+ dim == GLSL_SAMPLER_DIM_2D &&
+ !is_array) {
+ /* The hw can't bind a slice of a 3D image as a 2D
+ * image, because it ignores BASE_ARRAY if the target
+ * is 3D. The workaround is to read BASE_ARRAY and set
+ * it as the 3rd address operand for all 2D images.
+ */
+ LLVMValueRef first_layer, const5, mask;
+
+ const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
+ mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
+ first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
+ first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
+
+ args->coords[count] = first_layer;
+ count++;
+ }
+
+
+ if (is_ms) {
+ args->coords[count] = sample_index;
+ count++;
+ }
+ }
+}
+
+static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ bool write, bool atomic)
+{
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
+ if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
+ LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
+ stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
+
+ LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
+ elem_count, stride, "");
+
+ rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
+ LLVMConstInt(ctx->ac.i32, 2, 0), "");
+ }
+ return rsrc;
+}
+
+static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ bool bindless)
+{
+ LLVMValueRef res;
+
+ enum glsl_sampler_dim dim;
+ enum gl_access_qualifier access;
+ bool is_array;
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ access = nir_intrinsic_access(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const nir_deref_instr *image_deref = get_image_deref(instr);
+ const struct glsl_type *type = image_deref->type;
+ const nir_variable *var = nir_deref_instr_get_variable(image_deref);
+ dim = glsl_get_sampler_dim(type);
+ access = var->data.image.access;
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ struct ac_image_args args = {};
+
+ args.cache_policy = get_cache_policy(ctx, access, false, false);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ unsigned num_channels = util_last_bit(mask);
+ LLVMValueRef rsrc, vindex;
+
+ rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
+ vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
+ ctx->ac.i32_0, "");
+
+ bool can_speculate = access & ACCESS_CAN_REORDER;
+ res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
+ ctx->ac.i32_0, num_channels,
+ args.cache_policy,
+ can_speculate);
+ res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
+
+ res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
+ res = ac_to_integer(&ctx->ac, res);
+ } else {
+ args.opcode = ac_image_load;
+ args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
+ get_image_coords(ctx, instr, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 15;
+ args.attributes = AC_FUNC_ATTR_READONLY;
+
+ res = ac_build_image_opcode(&ctx->ac, &args);
+ }
+ return res;
+}
+
+static void visit_image_store(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr,
+ bool bindless)
+{
+
+
+ enum glsl_sampler_dim dim;
+ enum gl_access_qualifier access;
+ bool is_array;
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ access = nir_intrinsic_access(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const nir_deref_instr *image_deref = get_image_deref(instr);
+ const struct glsl_type *type = image_deref->type;
+ const nir_variable *var = nir_deref_instr_get_variable(image_deref);
+ dim = glsl_get_sampler_dim(type);
+ access = var->data.image.access;
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ bool writeonly_memory = access & ACCESS_NON_READABLE;
+ struct ac_image_args args = {};
+
+ args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
+ LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+ unsigned src_channels = ac_get_llvm_num_components(src);
+ LLVMValueRef vindex;
+
+ if (src_channels == 3)
+ src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
+
+ vindex = LLVMBuildExtractElement(ctx->ac.builder,
+ get_src(ctx, instr->src[1]),
+ ctx->ac.i32_0, "");
+
+ ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
+ ctx->ac.i32_0, src_channels,
+ args.cache_policy);
+ } else {
+ args.opcode = ac_image_store;
+ args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+ args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 15;
+
+ ac_build_image_opcode(&ctx->ac, &args);
+ }
+
+}
+
+static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ bool bindless)
+{
+ LLVMValueRef params[7];
+ int param_count = 0;
+
+ bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
+ const char *atomic_name;
+ char intrinsic_name[64];
+ enum ac_atomic_op atomic_subop;
+ ASSERTED int length;
+
+ enum glsl_sampler_dim dim;
+ bool is_array;
+ if (bindless) {
+ if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
+ instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
+ const GLenum format = nir_intrinsic_format(instr);
+ assert(format == GL_R32UI || format == GL_R32I);
+ }
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const struct glsl_type *type = get_image_deref(instr)->type;
+ dim = glsl_get_sampler_dim(type);
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_image_deref_atomic_add:
+ atomic_name = "add";
+ atomic_subop = ac_atomic_add;
+ break;
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_imin:
+ atomic_name = "smin";
+ atomic_subop = ac_atomic_smin;
+ break;
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ atomic_name = "umin";
+ atomic_subop = ac_atomic_umin;
+ break;
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_imax:
+ atomic_name = "smax";
+ atomic_subop = ac_atomic_smax;
+ break;
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ atomic_name = "umax";
+ atomic_subop = ac_atomic_umax;
+ break;
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_image_deref_atomic_and:
+ atomic_name = "and";
+ atomic_subop = ac_atomic_and;
+ break;
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_image_deref_atomic_or:
+ atomic_name = "or";
+ atomic_subop = ac_atomic_or;
+ break;
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_xor:
+ atomic_name = "xor";
+ atomic_subop = ac_atomic_xor;
+ break;
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ atomic_name = "swap";
+ atomic_subop = ac_atomic_swap;
+ break;
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ atomic_name = "cmpswap";
+ atomic_subop = 0; /* not used */
+ break;
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap: {
+ atomic_name = "inc";
+ atomic_subop = ac_atomic_inc_wrap;
+ /* ATOMIC_INC instruction does:
+ * value = (value + 1) % (data + 1)
+ * but we want:
+ * value = (value + 1) % data
+ * So replace 'data' by 'data - 1'.
+ */
+ ctx->ssa_defs[instr->src[3].ssa->index] =
+ LLVMBuildSub(ctx->ac.builder,
+ ctx->ssa_defs[instr->src[3].ssa->index],
+ ctx->ac.i32_1, "");
+ break;
+ }
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ atomic_name = "dec";
+ atomic_subop = ac_atomic_dec_wrap;
+ break;
+ default:
+ abort();
+ }
+
+ if (cmpswap)
+ params[param_count++] = get_src(ctx, instr->src[4]);
+ params[param_count++] = get_src(ctx, instr->src[3]);
+
+ if (dim == GLSL_SAMPLER_DIM_BUF) {
+ params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
+ params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
+ ctx->ac.i32_0, ""); /* vindex */
+ params[param_count++] = ctx->ac.i32_0; /* voffset */
+ if (LLVM_VERSION_MAJOR >= 9) {
+ /* XXX: The new raw/struct atomic intrinsics are buggy
+ * with LLVM 8, see r358579.
+ */
+ params[param_count++] = ctx->ac.i32_0; /* soffset */
+ params[param_count++] = ctx->ac.i32_0; /* slc */
+
+ length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
+ } else {
+ params[param_count++] = ctx->ac.i1false; /* slc */
+
+ length = snprintf(intrinsic_name, sizeof(intrinsic_name),
+ "llvm.amdgcn.buffer.atomic.%s", atomic_name);
+ }
+
+ assert(length < sizeof(intrinsic_name));
+ return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
+ params, param_count, 0);
+ } else {
+ struct ac_image_args args = {};
+ args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
+ args.atomic = atomic_subop;
+ args.data[0] = params[0];
+ if (cmpswap)
+ args.data[1] = params[1];
+ args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
+ get_image_coords(ctx, instr, &args, dim, is_array);
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+
+ return ac_build_image_opcode(&ctx->ac, &args);
+ }
+}
+
+static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
+
+ return ac_build_image_get_sample_count(&ctx->ac, rsrc);
+}
+
+static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ bool bindless)
+{
+ LLVMValueRef res;
+
+ enum glsl_sampler_dim dim;
+ bool is_array;
+ if (bindless) {
+ dim = nir_intrinsic_image_dim(instr);
+ is_array = nir_intrinsic_image_array(instr);
+ } else {
+ const struct glsl_type *type = get_image_deref(instr)->type;
+ dim = glsl_get_sampler_dim(type);
+ is_array = glsl_sampler_type_is_array(type);
+ }
+
+ if (dim == GLSL_SAMPLER_DIM_BUF)
+ return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
+
+ struct ac_image_args args = { 0 };
+
+ args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
+ args.dmask = 0xf;
+ args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
+ args.opcode = ac_image_get_resinfo;
+ args.lod = ctx->ac.i32_0;
+ args.attributes = AC_FUNC_ATTR_READNONE;
+
+ res = ac_build_image_opcode(&ctx->ac, &args);
+
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+
+ if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
+ }
+ if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
+ res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
+ ctx->ac.i32_1, "");
+
+ }
+ return res;
+}
+
+static void emit_membar(struct ac_llvm_context *ac,
+ const nir_intrinsic_instr *instr)
+{
+ unsigned wait_flags = 0;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ break;
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
+ break;
+ case nir_intrinsic_memory_barrier_shared:
+ wait_flags = AC_WAIT_LGKM;
+ break;
+ default:
+ break;
+ }
+
+ ac_build_waitcnt(ac, wait_flags);
+}
+
+void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
+{
+ /* GFX6 only (thanks to a hw bug workaround):
+ * The real barrier instruction isn’t needed, because an entire patch
+ * always fits into a single wave.
+ */
+ if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
+ ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
+ return;
+ }
+ ac_build_s_barrier(ac);
+}
+
+static void emit_discard(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef cond;
+
+ if (instr->intrinsic == nir_intrinsic_discard_if) {
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
+ get_src(ctx, instr->src[0]),
+ ctx->ac.i32_0, "");
+ } else {
+ assert(instr->intrinsic == nir_intrinsic_discard);
+ cond = ctx->ac.i1false;
+ }
+
+ ctx->abi->emit_kill(ctx->abi, cond);
+}
+
+static LLVMValueRef
+visit_load_local_invocation_index(struct ac_nir_context *ctx)
+{
+ LLVMValueRef result;
+ LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
+ result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
+ LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
+
+ return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
+}
+
+static LLVMValueRef
+visit_load_subgroup_id(struct ac_nir_context *ctx)
+{
+ if (ctx->stage == MESA_SHADER_COMPUTE) {
+ LLVMValueRef result;
+ result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
+ LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
+ return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
+ } else {
+ return LLVMConstInt(ctx->ac.i32, 0, false);
+ }
+}
+
+static LLVMValueRef
+visit_load_num_subgroups(struct ac_nir_context *ctx)
+{
+ if (ctx->stage == MESA_SHADER_COMPUTE) {
+ return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
+ LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
+ } else {
+ return LLVMConstInt(ctx->ac.i32, 1, false);
+ }
+}
+
+static LLVMValueRef
+visit_first_invocation(struct ac_nir_context *ctx)
+{
+ LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
+ const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
+
+ /* The second argument is whether cttz(0) should be defined, but we do not care. */
+ LLVMValueRef args[] = {active_set, ctx->ac.i1false};
+ LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
+ ctx->ac.iN_wavemask, args, 2,
+ AC_FUNC_ATTR_NOUNWIND |
+ AC_FUNC_ATTR_READNONE);
+
+ return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
+}
+
+static LLVMValueRef
+visit_load_shared(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef values[4], derived_ptr, index, ret;
+
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
+
+ for (int chan = 0; chan < instr->num_components; chan++) {
+ index = LLVMConstInt(ctx->ac.i32, chan, 0);
+ derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
+ values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
+ }
+
+ ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
+ return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
+}
+
+static void
+visit_store_shared(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr)
+{
+ LLVMValueRef derived_ptr, data,index;
+ LLVMBuilderRef builder = ctx->ac.builder;
+
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]);
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+
+ int writemask = nir_intrinsic_write_mask(instr);
+ for (int chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan))) {
+ continue;
+ }
+ data = ac_llvm_extract_elem(&ctx->ac, src, chan);
+ index = LLVMConstInt(ctx->ac.i32, chan, 0);
+ derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
+ LLVMBuildStore(builder, data, derived_ptr);
+ }
+}
+
+static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
+ const nir_intrinsic_instr *instr,
+ LLVMValueRef ptr, int src_idx)
+{
+ LLVMValueRef result;
+ LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
+
+ const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+
+ if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
+ instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
+ LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
+ result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
+ result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
+ } else {
+ LLVMAtomicRMWBinOp op;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_deref_atomic_add:
+ op = LLVMAtomicRMWBinOpAdd;
+ break;
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_deref_atomic_umin:
+ op = LLVMAtomicRMWBinOpUMin;
+ break;
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_deref_atomic_umax:
+ op = LLVMAtomicRMWBinOpUMax;
+ break;
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_deref_atomic_imin:
+ op = LLVMAtomicRMWBinOpMin;
+ break;
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_deref_atomic_imax:
+ op = LLVMAtomicRMWBinOpMax;
+ break;
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_deref_atomic_and:
+ op = LLVMAtomicRMWBinOpAnd;
+ break;
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_deref_atomic_or:
+ op = LLVMAtomicRMWBinOpOr;
+ break;
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_deref_atomic_xor:
+ op = LLVMAtomicRMWBinOpXor;
+ break;
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_deref_atomic_exchange:
+ op = LLVMAtomicRMWBinOpXchg;
+ break;
+ default:
+ return NULL;
+ }
+
+ result = ac_build_atomic_rmw(&ctx->ac, op, ptr, ac_to_integer(&ctx->ac, src), sync_scope);
+ }
+ return result;
+}
+
+static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
+{
+ LLVMValueRef values[2];
+ LLVMValueRef pos[2];
+
+ pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]);
+ pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]);
+
+ values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
+ values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
+ return ac_build_gather_values(&ctx->ac, values, 2);
+}
+
+static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx,
+ enum glsl_interp_mode interp, unsigned location)
+{
+ switch (interp) {
+ case INTERP_MODE_FLAT:
+ default:
+ return NULL;
+ case INTERP_MODE_SMOOTH:
+ case INTERP_MODE_NONE:
+ if (location == INTERP_CENTER)
+ return ctx->abi->persp_center;
+ else if (location == INTERP_CENTROID)
+ return ctx->abi->persp_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ctx->abi->persp_sample;
+ break;
+ case INTERP_MODE_NOPERSPECTIVE:
+ if (location == INTERP_CENTER)
+ return ctx->abi->linear_center;
+ else if (location == INTERP_CENTROID)
+ return ctx->abi->linear_centroid;
+ else if (location == INTERP_SAMPLE)
+ return ctx->abi->linear_sample;
+ break;
+ }
+ return NULL;
+}
+
+static LLVMValueRef barycentric_center(struct ac_nir_context *ctx,
+ unsigned mode)
+{
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+}
+
+static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx,
+ unsigned mode,
+ LLVMValueRef offset)
+{
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
+ LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
+ LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
+
+ LLVMValueRef ij_out[2];
+ LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
+
+ /*
+ * take the I then J parameters, and the DDX/Y for it, and
+ * calculate the IJ inputs for the interpolator.
+ * temp1 = ddx * offset/sample.x + I;
+ * interp_param.I = ddy * offset/sample.y + temp1;
+ * temp1 = ddx * offset/sample.x + J;
+ * interp_param.J = ddy * offset/sample.y + temp1;
+ */
+ for (unsigned i = 0; i < 2; i++) {
+ LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
+ LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
+ LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
+ ddxy_out, ix_ll, "");
+ LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
+ ddxy_out, iy_ll, "");
+ LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
+ interp_param, ix_ll, "");
+ LLVMValueRef temp1, temp2;
+
+ interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
+ ctx->ac.f32, "");
+
+ temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
+ temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
+
+ ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
+ temp2, ctx->ac.i32, "");
+ }
+ interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+}
+
+static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx,
+ unsigned mode)
+{
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+}
+
+static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
+ unsigned mode,
+ LLVMValueRef sample_id)
+{
+ if (ctx->abi->interp_at_sample_force_center)
+ return barycentric_center(ctx, mode);
+
+ LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
+
+ /* fetch sample ID */
+ LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
+
+ LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
+ src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
+ LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
+ src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
+ LLVMValueRef coords[] = { src_c0, src_c1 };
+ LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
+
+ return barycentric_offset(ctx, mode, offset);
+}
+
+
+static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx,
+ unsigned mode)
+{
+ LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
+ return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
+}
+
+static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
+ LLVMValueRef interp_param,
+ unsigned index, unsigned comp_start,
+ unsigned num_components,
+ unsigned bitsize)
+{
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+
+ interp_param = LLVMBuildBitCast(ctx->ac.builder,
+ interp_param, ctx->ac.v2f32, "");
+ LLVMValueRef i = LLVMBuildExtractElement(
+ ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+ LLVMValueRef j = LLVMBuildExtractElement(
+ ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+
+ LLVMValueRef values[4];
+ assert(bitsize == 16 || bitsize == 32);
+ for (unsigned comp = 0; comp < num_components; comp++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
+ if (bitsize == 16) {
+ values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
+ ctx->abi->prim_mask, i, j);
+ } else {
+ values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
+ ctx->abi->prim_mask, i, j);
+ }
+ }
+
+ return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
+}
+
+static LLVMValueRef load_flat_input(struct ac_nir_context *ctx,
+ unsigned index, unsigned comp_start,
+ unsigned num_components,
+ unsigned bit_size)
+{
+ LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
+
+ LLVMValueRef values[8];
+
+ /* Each component of a 64-bit value takes up two GL-level channels. */
+ unsigned channels =
+ bit_size == 64 ? num_components * 2 : num_components;
+
+ for (unsigned chan = 0; chan < channels; chan++) {
+ if (comp_start + chan > 4)
+ attr_number = LLVMConstInt(ctx->ac.i32, index + 1, false);
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (comp_start + chan) % 4, false);
+ values[chan] = ac_build_fs_interp_mov(&ctx->ac,
+ LLVMConstInt(ctx->ac.i32, 2, false),
+ llvm_chan,
+ attr_number,
+ ctx->abi->prim_mask);
+ values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+ values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
+ bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
+ }
+
+ LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, channels);
+ if (bit_size == 64) {
+ LLVMTypeRef type = num_components == 1 ? ctx->ac.i64 :
+ LLVMVectorType(ctx->ac.i64, num_components);
+ result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
+ }
+ return result;
+}
+
+static void visit_intrinsic(struct ac_nir_context *ctx,
+ nir_intrinsic_instr *instr)
+{
+ LLVMValueRef result = NULL;
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_ballot:
+ result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
+ if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
+ result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
+ break;
+ case nir_intrinsic_read_invocation:
+ result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]));
+ break;
+ case nir_intrinsic_read_first_invocation:
+ result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
+ break;
+ case nir_intrinsic_load_subgroup_invocation:
+ result = ac_get_thread_id(&ctx->ac);
+ break;
+ case nir_intrinsic_load_work_group_id: {
+ LLVMValueRef values[3];
+
+ for (int i = 0; i < 3; i++) {
+ values[i] = ctx->abi->workgroup_ids[i] ?
+ ctx->abi->workgroup_ids[i] : ctx->ac.i32_0;
+ }
+
+ result = ac_build_gather_values(&ctx->ac, values, 3);
+ break;
+ }
+ case nir_intrinsic_load_base_vertex:
+ case nir_intrinsic_load_first_vertex:
+ result = ctx->abi->load_base_vertex(ctx->abi);
+ break;
+ case nir_intrinsic_load_local_group_size:
+ result = ctx->abi->load_local_group_size(ctx->abi);
+ break;
+ case nir_intrinsic_load_vertex_id:
+ result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id,
+ ctx->abi->base_vertex, "");
+ break;
+ case nir_intrinsic_load_vertex_id_zero_base: {
+ result = ctx->abi->vertex_id;
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_id: {
+ result = ctx->abi->local_invocation_ids;
+ break;
+ }
+ case nir_intrinsic_load_base_instance:
+ result = ctx->abi->start_instance;
+ break;
+ case nir_intrinsic_load_draw_id:
+ result = ctx->abi->draw_id;
+ break;
+ case nir_intrinsic_load_view_index:
+ result = ctx->abi->view_index;
+ break;
+ case nir_intrinsic_load_invocation_id:
+ if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
+ } else {
+ if (ctx->ac.chip_class >= GFX10) {
+ result = LLVMBuildAnd(ctx->ac.builder,
+ ctx->abi->gs_invocation_id,
+ LLVMConstInt(ctx->ac.i32, 127, 0), "");
+ } else {
+ result = ctx->abi->gs_invocation_id;
+ }
+ }
+ break;
+ case nir_intrinsic_load_primitive_id:
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ result = ctx->abi->gs_prim_id;
+ } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
+ result = ctx->abi->tcs_patch_id;
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ result = ctx->abi->tes_patch_id;
+ } else
+ fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
+ break;
+ case nir_intrinsic_load_sample_id:
+ result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
+ break;
+ case nir_intrinsic_load_sample_pos:
+ result = load_sample_pos(ctx);
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ result = ctx->abi->load_sample_mask_in(ctx->abi);
+ break;
+ case nir_intrinsic_load_frag_coord: {
+ LLVMValueRef values[4] = {
+ ctx->abi->frag_pos[0],
+ ctx->abi->frag_pos[1],
+ ctx->abi->frag_pos[2],
+ ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
+ };
+ result = ac_to_integer(&ctx->ac,
+ ac_build_gather_values(&ctx->ac, values, 4));
+ break;
+ }
+ case nir_intrinsic_load_layer_id:
+ result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ break;
+ case nir_intrinsic_load_front_face:
+ result = ctx->abi->front_face;
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ result = ac_build_load_helper_invocation(&ctx->ac);
+ break;
+ case nir_intrinsic_load_color0:
+ result = ctx->abi->color0;
+ break;
+ case nir_intrinsic_load_color1:
+ result = ctx->abi->color1;
+ break;
+ case nir_intrinsic_load_user_data_amd:
+ assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
+ result = ctx->abi->user_data;
+ break;
+ case nir_intrinsic_load_instance_id:
+ result = ctx->abi->instance_id;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ result = ctx->abi->num_work_groups;
+ break;
+ case nir_intrinsic_load_local_invocation_index:
+ result = visit_load_local_invocation_index(ctx);
+ break;
+ case nir_intrinsic_load_subgroup_id:
+ result = visit_load_subgroup_id(ctx);
+ break;
+ case nir_intrinsic_load_num_subgroups:
+ result = visit_load_num_subgroups(ctx);
+ break;
+ case nir_intrinsic_first_invocation:
+ result = visit_first_invocation(ctx);
+ break;
+ case nir_intrinsic_load_push_constant:
+ result = visit_load_push_constant(ctx, instr);
+ break;
+ case nir_intrinsic_vulkan_resource_index: {
+ LLVMValueRef index = get_src(ctx, instr->src[0]);
+ unsigned desc_set = nir_intrinsic_desc_set(instr);
+ unsigned binding = nir_intrinsic_binding(instr);
+
+ result = ctx->abi->load_resource(ctx->abi, index, desc_set,
+ binding);
+ break;
+ }
+ case nir_intrinsic_vulkan_resource_reindex:
+ result = visit_vulkan_resource_reindex(ctx, instr);
+ break;
+ case nir_intrinsic_store_ssbo:
+ visit_store_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ssbo:
+ result = visit_load_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ result = visit_atomic_ssbo(ctx, instr);
+ break;
+ case nir_intrinsic_load_ubo:
+ result = visit_load_ubo_buffer(ctx, instr);
+ break;
+ case nir_intrinsic_get_buffer_size:
+ result = visit_get_buffer_size(ctx, instr);
+ break;
+ case nir_intrinsic_load_deref:
+ result = visit_load_var(ctx, instr);
+ break;
+ case nir_intrinsic_store_deref:
+ visit_store_var(ctx, instr);
+ break;
+ case nir_intrinsic_load_shared:
+ result = visit_load_shared(ctx, instr);
+ break;
+ case nir_intrinsic_store_shared:
+ visit_store_shared(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_samples:
+ case nir_intrinsic_image_deref_samples:
+ result = visit_image_samples(ctx, instr);
+ break;
+ case nir_intrinsic_bindless_image_load:
+ result = visit_image_load(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_load:
+ result = visit_image_load(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_store:
+ visit_image_store(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_store:
+ visit_image_store(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_atomic_add:
+ case nir_intrinsic_bindless_image_atomic_imin:
+ case nir_intrinsic_bindless_image_atomic_umin:
+ case nir_intrinsic_bindless_image_atomic_imax:
+ case nir_intrinsic_bindless_image_atomic_umax:
+ case nir_intrinsic_bindless_image_atomic_and:
+ case nir_intrinsic_bindless_image_atomic_or:
+ case nir_intrinsic_bindless_image_atomic_xor:
+ case nir_intrinsic_bindless_image_atomic_exchange:
+ case nir_intrinsic_bindless_image_atomic_comp_swap:
+ case nir_intrinsic_bindless_image_atomic_inc_wrap:
+ case nir_intrinsic_bindless_image_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_atomic_inc_wrap:
+ case nir_intrinsic_image_deref_atomic_dec_wrap:
+ result = visit_image_atomic(ctx, instr, false);
+ break;
+ case nir_intrinsic_bindless_image_size:
+ result = visit_image_size(ctx, instr, true);
+ break;
+ case nir_intrinsic_image_deref_size:
+ result = visit_image_size(ctx, instr, false);
+ break;
+ case nir_intrinsic_shader_clock:
+ result = ac_build_shader_clock(&ctx->ac);
+ break;
+ case nir_intrinsic_discard:
+ case nir_intrinsic_discard_if:
+ emit_discard(ctx, instr);
+ break;
+ case nir_intrinsic_memory_barrier:
+ case nir_intrinsic_group_memory_barrier:
+ case nir_intrinsic_memory_barrier_atomic_counter:
+ case nir_intrinsic_memory_barrier_buffer:
+ case nir_intrinsic_memory_barrier_image:
+ case nir_intrinsic_memory_barrier_shared:
+ emit_membar(&ctx->ac, instr);
+ break;
+ case nir_intrinsic_barrier:
+ ac_emit_barrier(&ctx->ac, ctx->stage);
+ break;
+ case nir_intrinsic_shared_atomic_add:
+ case nir_intrinsic_shared_atomic_imin:
+ case nir_intrinsic_shared_atomic_umin:
+ case nir_intrinsic_shared_atomic_imax:
+ case nir_intrinsic_shared_atomic_umax:
+ case nir_intrinsic_shared_atomic_and:
+ case nir_intrinsic_shared_atomic_or:
+ case nir_intrinsic_shared_atomic_xor:
+ case nir_intrinsic_shared_atomic_exchange:
+ case nir_intrinsic_shared_atomic_comp_swap: {
+ LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_deref_atomic_add:
+ case nir_intrinsic_deref_atomic_imin:
+ case nir_intrinsic_deref_atomic_umin:
+ case nir_intrinsic_deref_atomic_imax:
+ case nir_intrinsic_deref_atomic_umax:
+ case nir_intrinsic_deref_atomic_and:
+ case nir_intrinsic_deref_atomic_or:
+ case nir_intrinsic_deref_atomic_xor:
+ case nir_intrinsic_deref_atomic_exchange:
+ case nir_intrinsic_deref_atomic_comp_swap: {
+ LLVMValueRef ptr = get_src(ctx, instr->src[0]);
+ result = visit_var_atomic(ctx, instr, ptr, 1);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_pixel:
+ result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_centroid:
+ result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_sample:
+ result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
+ break;
+ case nir_intrinsic_load_barycentric_at_offset: {
+ LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_sample: {
+ LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
+ result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
+ break;
+ }
+ case nir_intrinsic_load_interpolated_input: {
+ /* We assume any indirect loads have been lowered away */
+ ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
+ assert(offset);
+ assert(offset[0].i32 == 0);
+
+ LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ result = load_interpolated_input(ctx, interp_param, index,
+ component,
+ instr->dest.ssa.num_components,
+ instr->dest.ssa.bit_size);
+ break;
+ }
+ case nir_intrinsic_load_input: {
+ /* We only lower inputs for fragment shaders ATM */
+ ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[0]);
+ assert(offset);
+ assert(offset[0].i32 == 0);
+
+ unsigned index = nir_intrinsic_base(instr);
+ unsigned component = nir_intrinsic_component(instr);
+ result = load_flat_input(ctx, index, component,
+ instr->dest.ssa.num_components,
+ instr->dest.ssa.bit_size);
+ break;
+ }
+ case nir_intrinsic_emit_vertex:
+ ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
+ break;
+ case nir_intrinsic_end_primitive:
+ ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
+ break;
+ case nir_intrinsic_load_tess_coord:
+ result = ctx->abi->load_tess_coord(ctx->abi);
+ break;
+ case nir_intrinsic_load_tess_level_outer:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
+ break;
+ case nir_intrinsic_load_tess_level_inner:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
+ break;
+ case nir_intrinsic_load_tess_level_outer_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
+ break;
+ case nir_intrinsic_load_tess_level_inner_default:
+ result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
+ break;
+ case nir_intrinsic_load_patch_vertices_in:
+ result = ctx->abi->load_patch_vertices_in(ctx->abi);
+ break;
+ case nir_intrinsic_vote_all: {
+ LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_vote_any: {
+ LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
+ result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
+ break;
+ }
+ case nir_intrinsic_shuffle:
+ result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]));
+ break;
+ case nir_intrinsic_reduce:
+ result = ac_build_reduce(&ctx->ac,
+ get_src(ctx, instr->src[0]),
+ instr->const_index[0],
+ instr->const_index[1]);
+ break;
+ case nir_intrinsic_inclusive_scan:
+ result = ac_build_inclusive_scan(&ctx->ac,
+ get_src(ctx, instr->src[0]),
+ instr->const_index[0]);
+ break;
+ case nir_intrinsic_exclusive_scan:
+ result = ac_build_exclusive_scan(&ctx->ac,
+ get_src(ctx, instr->src[0]),
+ instr->const_index[0]);
+ break;
+ case nir_intrinsic_quad_broadcast: {
+ unsigned lane = nir_src_as_uint(instr->src[1]);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
+ lane, lane, lane, lane);
+ break;
+ }
+ case nir_intrinsic_quad_swap_horizontal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
+ break;
+ case nir_intrinsic_quad_swap_vertical:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
+ break;
+ case nir_intrinsic_quad_swap_diagonal:
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
+ break;
+ case nir_intrinsic_quad_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
+ mask & 0x3, (mask >> 2) & 0x3,
+ (mask >> 4) & 0x3, (mask >> 6) & 0x3);
+ break;
+ }
+ case nir_intrinsic_masked_swizzle_amd: {
+ uint32_t mask = nir_intrinsic_swizzle_mask(instr);
+ result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
+ break;
+ }
+ case nir_intrinsic_write_invocation_amd:
+ result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
+ get_src(ctx, instr->src[1]),
+ get_src(ctx, instr->src[2]));
+ break;
+ case nir_intrinsic_mbcnt_amd:
+ result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
+ break;
+ case nir_intrinsic_load_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+ offset);
+ LLVMTypeRef comp_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type =
+ instr->dest.ssa.num_components == 1 ? comp_type :
+ LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ case nir_intrinsic_store_scratch: {
+ LLVMValueRef offset = get_src(ctx, instr->src[1]);
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
+ offset);
+ LLVMTypeRef comp_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(comp_type, addr_space), "");
+ LLVMValueRef src = get_src(ctx, instr->src[0]);
+ unsigned wrmask = nir_intrinsic_write_mask(instr);
+ while (wrmask) {
+ int start, count;
+ u_bit_scan_consecutive_range(&wrmask, &start, &count);
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
+ LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
+ LLVMTypeRef vec_type =
+ count == 1 ? comp_type : LLVMVectorType(comp_type, count);
+ offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
+ offset_ptr,
+ LLVMPointerType(vec_type, addr_space),
+ "");
+ LLVMValueRef offset_src =
+ ac_extract_components(&ctx->ac, src, start, count);
+ LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
+ }
+ break;
+ }
+ case nir_intrinsic_load_constant: {
+ LLVMValueRef offset = get_src(ctx, instr->src[0]);
+ LLVMValueRef base = LLVMConstInt(ctx->ac.i32,
+ nir_intrinsic_base(instr),
+ false);
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, base, "");
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
+ offset);
+ LLVMTypeRef comp_type =
+ LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
+ LLVMTypeRef vec_type =
+ instr->dest.ssa.num_components == 1 ? comp_type :
+ LLVMVectorType(comp_type, instr->dest.ssa.num_components);
+ unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
+ LLVMPointerType(vec_type, addr_space), "");
+ result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown intrinsic: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ break;
+ }
+ if (result) {
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }
+}
+
+static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef dynamic_index)
+{
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
+ LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
+ LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
+
+ /* Bindless uniforms are 64bit so multiple index by 8 */
+ index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
+ offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
+
+ LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
+
+ LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
+ NULL, 0, 0, true, true);
+
+ return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
+}
+
+static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
+ nir_deref_instr *deref_instr,
+ enum ac_descriptor_type desc_type,
+ const nir_instr *instr,
+ bool image, bool write)
+{
+ LLVMValueRef index = NULL;
+ unsigned constant_index = 0;
+ unsigned descriptor_set;
+ unsigned base_index;
+ bool bindless = false;
+
+ if (!deref_instr) {
+ descriptor_set = 0;
+ if (image) {
+ nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
+ base_index = 0;
+ bindless = true;
+ index = get_src(ctx, img_instr->src[0]);
+ } else {
+ nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
+ int sampSrcIdx = nir_tex_instr_src_index(tex_instr,
+ nir_tex_src_sampler_handle);
+ if (sampSrcIdx != -1) {
+ base_index = 0;
+ bindless = true;
+ index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
+ } else {
+ assert(tex_instr && !image);
+ base_index = tex_instr->sampler_index;
+ }
+ }
+ } else {
+ while(deref_instr->deref_type != nir_deref_type_var) {
+ if (deref_instr->deref_type == nir_deref_type_array) {
+ unsigned array_size = glsl_get_aoa_size(deref_instr->type);
+ if (!array_size)
+ array_size = 1;
+
+ if (nir_src_is_const(deref_instr->arr.index)) {
+ constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
+ } else {
+ LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
+
+ indirect = LLVMBuildMul(ctx->ac.builder, indirect,
+ LLVMConstInt(ctx->ac.i32, array_size, false), "");
+
+ if (!index)
+ index = indirect;
+ else
+ index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
+ }
+
+ deref_instr = nir_src_as_deref(deref_instr->parent);
+ } else if (deref_instr->deref_type == nir_deref_type_struct) {
+ unsigned sidx = deref_instr->strct.index;
+ deref_instr = nir_src_as_deref(deref_instr->parent);
+ constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
+ } else {
+ unreachable("Unsupported deref type");
+ }
+ }
+ descriptor_set = deref_instr->var->data.descriptor_set;
+
+ if (deref_instr->var->data.bindless) {
+ /* For now just assert on unhandled variable types */
+ assert(deref_instr->var->data.mode == nir_var_uniform);
+
+ base_index = deref_instr->var->data.driver_location;
+ bindless = true;
+
+ index = index ? index : ctx->ac.i32_0;
+ index = get_bindless_index_from_uniform(ctx, base_index,
+ constant_index, index);
+ } else
+ base_index = deref_instr->var->data.binding;
+ }
+
+ return ctx->abi->load_sampler_desc(ctx->abi,
+ descriptor_set,
+ base_index,
+ constant_index, index,
+ desc_type, image, write, bindless);
+}
+
+/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
+ *
+ * GFX6-GFX7:
+ * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
+ * filtering manually. The driver sets img7 to a mask clearing
+ * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
+ * s_and_b32 samp0, samp0, img7
+ *
+ * GFX8:
+ * The ANISO_OVERRIDE sampler field enables this fix in TA.
+ */
+static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
+ LLVMValueRef res, LLVMValueRef samp)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef img7, samp0;
+
+ if (ctx->ac.chip_class >= GFX8)
+ return samp;
+
+ img7 = LLVMBuildExtractElement(builder, res,
+ LLVMConstInt(ctx->ac.i32, 7, 0), "");
+ samp0 = LLVMBuildExtractElement(builder, samp,
+ LLVMConstInt(ctx->ac.i32, 0, 0), "");
+ samp0 = LLVMBuildAnd(builder, samp0, img7, "");
+ return LLVMBuildInsertElement(builder, samp, samp0,
+ LLVMConstInt(ctx->ac.i32, 0, 0), "");
+}
+
+static void tex_fetch_ptrs(struct ac_nir_context *ctx,
+ nir_tex_instr *instr,
+ LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
+ LLVMValueRef *fmask_ptr)
+{
+ nir_deref_instr *texture_deref_instr = NULL;
+ nir_deref_instr *sampler_deref_instr = NULL;
+ int plane = -1;
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ texture_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ case nir_tex_src_sampler_deref:
+ sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
+ break;
+ case nir_tex_src_plane:
+ plane = nir_src_as_int(instr->src[i].src);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!sampler_deref_instr)
+ sampler_deref_instr = texture_deref_instr;
+
+ enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
+
+ if (plane >= 0) {
+ assert(instr->op != nir_texop_txf_ms &&
+ instr->op != nir_texop_samples_identical);
+ assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
+
+ main_descriptor = AC_DESC_PLANE_0 + plane;
+ }
+
+ *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr, false, false);
+
+ if (samp_ptr) {
+ *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr, false, false);
+ if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
+ *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
+ }
+ if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
+ instr->op == nir_texop_samples_identical))
+ *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr, false, false);
+}
+
+static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
+ LLVMValueRef coord)
+{
+ coord = ac_to_float(ctx, coord);
+ coord = ac_build_round(ctx, coord);
+ coord = ac_to_integer(ctx, coord);
+ return coord;
+}
+
+static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
+{
+ LLVMValueRef result = NULL;
+ struct ac_image_args args = { 0 };
+ LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
+ LLVMValueRef ddx = NULL, ddy = NULL;
+ unsigned offset_src = 0;
+
+ tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
+
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_coord: {
+ LLVMValueRef coord = get_src(ctx, instr->src[i].src);
+ for (unsigned chan = 0; chan < instr->coord_components; ++chan)
+ args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
+ break;
+ }
+ case nir_tex_src_projector:
+ break;
+ case nir_tex_src_comparator:
+ if (instr->is_shadow) {
+ args.compare = get_src(ctx, instr->src[i].src);
+ args.compare = ac_to_float(&ctx->ac, args.compare);
+ }
+ break;
+ case nir_tex_src_offset:
+ args.offset = get_src(ctx, instr->src[i].src);
+ offset_src = i;
+ break;
+ case nir_tex_src_bias:
+ if (instr->op == nir_texop_txb)
+ args.bias = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_lod: {
+ if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
+ args.level_zero = true;
+ else
+ args.lod = get_src(ctx, instr->src[i].src);
+ break;
+ }
+ case nir_tex_src_ms_index:
+ sample_index = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_ms_mcs:
+ break;
+ case nir_tex_src_ddx:
+ ddx = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_ddy:
+ ddy = get_src(ctx, instr->src[i].src);
+ break;
+ case nir_tex_src_texture_offset:
+ case nir_tex_src_sampler_offset:
+ case nir_tex_src_plane:
+ default:
+ break;
+ }
+ }
+
+ if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+ result = get_buffer_size(ctx, args.resource, true);
+ goto write_result;
+ }
+
+ if (instr->op == nir_texop_texture_samples) {
+ LLVMValueRef res, samples, is_msaa;
+ res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
+ samples = LLVMBuildExtractElement(ctx->ac.builder, res,
+ LLVMConstInt(ctx->ac.i32, 3, false), "");
+ is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 28, false), "");
+ is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
+ LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+ is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
+ LLVMConstInt(ctx->ac.i32, 0xe, false), "");
+
+ samples = LLVMBuildLShr(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 16, false), "");
+ samples = LLVMBuildAnd(ctx->ac.builder, samples,
+ LLVMConstInt(ctx->ac.i32, 0xf, false), "");
+ samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
+ samples, "");
+ samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
+ ctx->ac.i32_1, "");
+ result = samples;
+ goto write_result;
+ }
+
+ if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+ LLVMValueRef offset[3], pack;
+ for (unsigned chan = 0; chan < 3; ++chan)
+ offset[chan] = ctx->ac.i32_0;
+
+ unsigned num_components = ac_get_llvm_num_components(args.offset);
+ for (unsigned chan = 0; chan < num_components; chan++) {
+ offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
+ offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
+ LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
+ if (chan)
+ offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
+ LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
+ }
+ pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
+ pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
+ args.offset = pack;
+ }
+
+ /* Section 8.23.1 (Depth Texture Comparison Mode) of the
+ * OpenGL 4.5 spec says:
+ *
+ * "If the texture’s internal format indicates a fixed-point
+ * depth texture, then D_t and D_ref are clamped to the
+ * range [0, 1]; otherwise no clamping is performed."
+ *
+ * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
+ * an explicitly clamped 32-bit float format.
+ */
+ if (args.compare &&
+ ctx->ac.chip_class >= GFX8 &&
+ ctx->ac.chip_class <= GFX9 &&
+ ctx->abi->clamp_shadow_reference) {
+ LLVMValueRef upgraded, clamped;
+
+ upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
+ LLVMConstInt(ctx->ac.i32, 3, false), "");
+ upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
+ LLVMConstInt(ctx->ac.i32, 29, false), "");
+ upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
+ clamped = ac_build_clamp(&ctx->ac, args.compare);
+ args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
+ args.compare, "");
+ }
+
+ /* pack derivatives */
+ if (ddx || ddy) {
+ int num_src_deriv_channels, num_dest_deriv_channels;
+ switch (instr->sampler_dim) {
+ case GLSL_SAMPLER_DIM_3D:
+ case GLSL_SAMPLER_DIM_CUBE:
+ num_src_deriv_channels = 3;
+ num_dest_deriv_channels = 3;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ default:
+ num_src_deriv_channels = 2;
+ num_dest_deriv_channels = 2;
+ break;
+ case GLSL_SAMPLER_DIM_1D:
+ num_src_deriv_channels = 1;
+ if (ctx->ac.chip_class == GFX9) {
+ num_dest_deriv_channels = 2;
+ } else {
+ num_dest_deriv_channels = 1;
+ }
+ break;
+ }
+
+ for (unsigned i = 0; i < num_src_deriv_channels; i++) {
+ args.derivs[i] = ac_to_float(&ctx->ac,
+ ac_llvm_extract_elem(&ctx->ac, ddx, i));
+ args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
+ ac_llvm_extract_elem(&ctx->ac, ddy, i));
+ }
+ for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
+ args.derivs[i] = ctx->ac.f32_0;
+ args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
+ }
+ }
+
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
+ for (unsigned chan = 0; chan < instr->coord_components; chan++)
+ args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
+ if (instr->coord_components == 3)
+ args.coords[3] = LLVMGetUndef(ctx->ac.f32);
+ ac_prepare_cube_coords(&ctx->ac,
+ instr->op == nir_texop_txd, instr->is_array,
+ instr->op == nir_texop_lod, args.coords, args.derivs);
+ }
+
+ /* Texture coordinates fixups */
+ if (instr->coord_components > 1 &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->is_array &&
+ instr->op != nir_texop_txf) {
+ args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
+ }
+
+ if (instr->coord_components > 2 &&
+ (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
+ instr->is_array &&
+ instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+ args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
+ }
+
+ if (ctx->ac.chip_class == GFX9 &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->op != nir_texop_lod) {
+ LLVMValueRef filler;
+ if (instr->op == nir_texop_txf)
+ filler = ctx->ac.i32_0;
+ else
+ filler = LLVMConstReal(ctx->ac.f32, 0.5);
+
+ if (instr->is_array)
+ args.coords[2] = args.coords[1];
+ args.coords[1] = filler;
+ }
+
+ /* Pack sample index */
+ if (instr->op == nir_texop_txf_ms && sample_index)
+ args.coords[instr->coord_components] = sample_index;
+
+ if (instr->op == nir_texop_samples_identical) {
+ struct ac_image_args txf_args = { 0 };
+ memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
+
+ txf_args.dmask = 0xf;
+ txf_args.resource = fmask_ptr;
+ txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
+ result = build_tex_intrinsic(ctx, instr, &txf_args);
+
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
+ goto write_result;
+ }
+
+ if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
+ instr->op != nir_texop_txs) {
+ unsigned sample_chan = instr->is_array ? 3 : 2;
+ args.coords[sample_chan] = adjust_sample_index_using_fmask(
+ &ctx->ac, args.coords[0], args.coords[1],
+ instr->is_array ? args.coords[2] : NULL,
+ args.coords[sample_chan], fmask_ptr);
+ }
+
+ if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
+ int num_offsets = instr->src[offset_src].src.ssa->num_components;
+ num_offsets = MIN2(num_offsets, instr->coord_components);
+ for (unsigned i = 0; i < num_offsets; ++i) {
+ args.coords[i] = LLVMBuildAdd(
+ ctx->ac.builder, args.coords[i],
+ LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), "");
+ }
+ args.offset = NULL;
+ }
+
+ /* DMASK was repurposed for GATHER4. 4 components are always
+ * returned and DMASK works like a swizzle - it selects
+ * the component to fetch. The only valid DMASK values are
+ * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ * (red,red,red,red) etc.) The ISA document doesn't mention
+ * this.
+ */
+ args.dmask = 0xf;
+ if (instr->op == nir_texop_tg4) {
+ if (instr->is_shadow)
+ args.dmask = 1;
+ else
+ args.dmask = 1 << instr->component;
+ }
+
+ if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
+ args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
+ args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
+ }
+ result = build_tex_intrinsic(ctx, instr, &args);
+
+ if (instr->op == nir_texop_query_levels)
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
+ else if (instr->is_shadow && instr->is_new_style_shadow &&
+ instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
+ instr->op != nir_texop_tg4)
+ result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
+ else if (instr->op == nir_texop_txs &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
+ instr->is_array) {
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
+ LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
+ } else if (ctx->ac.chip_class == GFX9 &&
+ instr->op == nir_texop_txs &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->is_array) {
+ LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
+ result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
+ ctx->ac.i32_1, "");
+ } else if (instr->dest.ssa.num_components != 4)
+ result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
+
+write_result:
+ if (result) {
+ assert(instr->dest.is_ssa);
+ result = ac_to_integer(&ctx->ac, result);
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ }
+}
+
+
+static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
+{
+ LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
+ LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
+
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+ _mesa_hash_table_insert(ctx->phis, instr, result);
+}
+
+static void visit_post_phi(struct ac_nir_context *ctx,
+ nir_phi_instr *instr,
+ LLVMValueRef llvm_phi)
+{
+ nir_foreach_phi_src(src, instr) {
+ LLVMBasicBlockRef block = get_block(ctx, src->pred);
+ LLVMValueRef llvm_src = get_src(ctx, src->src);
+
+ LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
+ }
+}
+
+static void phi_post_pass(struct ac_nir_context *ctx)
+{
+ hash_table_foreach(ctx->phis, entry) {
+ visit_post_phi(ctx, (nir_phi_instr*)entry->key,
+ (LLVMValueRef)entry->data);
+ }
+}
+
+
+static void visit_ssa_undef(struct ac_nir_context *ctx,
+ const nir_ssa_undef_instr *instr)
+{
+ unsigned num_components = instr->def.num_components;
+ LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
+ LLVMValueRef undef;
+
+ if (num_components == 1)
+ undef = LLVMGetUndef(type);
+ else {
+ undef = LLVMGetUndef(LLVMVectorType(type, num_components));
+ }
+ ctx->ssa_defs[instr->def.index] = undef;
+}
+
+static void visit_jump(struct ac_llvm_context *ctx,
+ const nir_jump_instr *instr)
+{
+ switch (instr->type) {
+ case nir_jump_break:
+ ac_build_break(ctx);
+ break;
+ case nir_jump_continue:
+ ac_build_continue(ctx);
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR jump instr: ");
+ nir_print_instr(&instr->instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+}
+
+static LLVMTypeRef
+glsl_base_to_llvm_type(struct ac_llvm_context *ac,
+ enum glsl_base_type type)
+{
+ switch (type) {
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_SUBROUTINE:
+ return ac->i32;
+ case GLSL_TYPE_INT8:
+ case GLSL_TYPE_UINT8:
+ return ac->i8;
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT16:
+ return ac->i16;
+ case GLSL_TYPE_FLOAT:
+ return ac->f32;
+ case GLSL_TYPE_FLOAT16:
+ return ac->f16;
+ case GLSL_TYPE_INT64:
+ case GLSL_TYPE_UINT64:
+ return ac->i64;
+ case GLSL_TYPE_DOUBLE:
+ return ac->f64;
+ default:
+ unreachable("unknown GLSL type");
+ }
+}
+
+static LLVMTypeRef
+glsl_to_llvm_type(struct ac_llvm_context *ac,
+ const struct glsl_type *type)
+{
+ if (glsl_type_is_scalar(type)) {
+ return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
+ }
+
+ if (glsl_type_is_vector(type)) {
+ return LLVMVectorType(
+ glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
+ glsl_get_vector_elements(type));
+ }
+
+ if (glsl_type_is_matrix(type)) {
+ return LLVMArrayType(
+ glsl_to_llvm_type(ac, glsl_get_column_type(type)),
+ glsl_get_matrix_columns(type));
+ }
+
+ if (glsl_type_is_array(type)) {
+ return LLVMArrayType(
+ glsl_to_llvm_type(ac, glsl_get_array_element(type)),
+ glsl_get_length(type));
+ }
+
+ assert(glsl_type_is_struct_or_ifc(type));
+
+ LLVMTypeRef member_types[glsl_get_length(type)];
+
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ member_types[i] =
+ glsl_to_llvm_type(ac,
+ glsl_get_struct_field(type, i));
+ }
+
+ return LLVMStructTypeInContext(ac->context, member_types,
+ glsl_get_length(type), false);
+}
+
+static void visit_deref(struct ac_nir_context *ctx,
+ nir_deref_instr *instr)
+{
+ if (instr->mode != nir_var_mem_shared &&
+ instr->mode != nir_var_mem_global)
+ return;
+
+ LLVMValueRef result = NULL;
+ switch(instr->deref_type) {
+ case nir_deref_type_var: {
+ struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
+ result = entry->data;
+ break;
+ }
+ case nir_deref_type_struct:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ uint64_t offset = glsl_get_struct_field_offset(parent->type,
+ instr->strct.index);
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->ac.i32, offset, 0));
+ } else {
+ result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
+ LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
+ }
+ break;
+ case nir_deref_type_array:
+ if (instr->mode == nir_var_mem_global) {
+ nir_deref_instr *parent = nir_deref_instr_parent(instr);
+ unsigned stride = glsl_get_explicit_stride(parent->type);
+
+ if ((glsl_type_is_matrix(parent->type) &&
+ glsl_matrix_type_is_row_major(parent->type)) ||
+ (glsl_type_is_vector(parent->type) && stride == 0))
+ stride = type_scalar_size_bytes(parent->type);
+
+ assert(stride > 0);
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->ac.i64)
+ index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+ LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+ } else {
+ result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
+ get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_ptr_as_array:
+ if (instr->mode == nir_var_mem_global) {
+ unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
+
+ LLVMValueRef index = get_src(ctx, instr->arr.index);
+ if (LLVMTypeOf(index) != ctx->ac.i64)
+ index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+ LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+ } else {
+ result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+ get_src(ctx, instr->arr.index));
+ }
+ break;
+ case nir_deref_type_cast: {
+ result = get_src(ctx, instr->parent);
+
+ /* We can't use the structs from LLVM because the shader
+ * specifies its own offsets. */
+ LLVMTypeRef pointee_type = ctx->ac.i8;
+ if (instr->mode == nir_var_mem_shared)
+ pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
+
+ unsigned address_space;
+
+ switch(instr->mode) {
+ case nir_var_mem_shared:
+ address_space = AC_ADDR_SPACE_LDS;
+ break;
+ case nir_var_mem_global:
+ address_space = AC_ADDR_SPACE_GLOBAL;
+ break;
+ default:
+ unreachable("Unhandled address space");
+ }
+
+ LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
+
+ if (LLVMTypeOf(result) != type) {
+ if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
+ result = LLVMBuildBitCast(ctx->ac.builder, result,
+ type, "");
+ } else {
+ result = LLVMBuildIntToPtr(ctx->ac.builder, result,
+ type, "");
+ }
+ }
+ break;
+ }
+ default:
+ unreachable("Unhandled deref_instr deref type");
+ }
+
+ ctx->ssa_defs[instr->dest.ssa.index] = result;
+}
+
+static void visit_cf_list(struct ac_nir_context *ctx,
+ struct exec_list *list);
+
+static void visit_block(struct ac_nir_context *ctx, nir_block *block)
+{
+ nir_foreach_instr(instr, block)
+ {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ visit_alu(ctx, nir_instr_as_alu(instr));
+ break;
+ case nir_instr_type_load_const:
+ visit_load_const(ctx, nir_instr_as_load_const(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_tex:
+ visit_tex(ctx, nir_instr_as_tex(instr));
+ break;
+ case nir_instr_type_phi:
+ visit_phi(ctx, nir_instr_as_phi(instr));
+ break;
+ case nir_instr_type_ssa_undef:
+ visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
+ break;
+ case nir_instr_type_jump:
+ visit_jump(&ctx->ac, nir_instr_as_jump(instr));
+ break;
+ case nir_instr_type_deref:
+ visit_deref(ctx, nir_instr_as_deref(instr));
+ break;
+ default:
+ fprintf(stderr, "Unknown NIR instr type: ");
+ nir_print_instr(instr, stderr);
+ fprintf(stderr, "\n");
+ abort();
+ }
+ }
+
+ _mesa_hash_table_insert(ctx->defs, block,
+ LLVMGetInsertBlock(ctx->ac.builder));
+}
+
+static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
+{
+ LLVMValueRef value = get_src(ctx, if_stmt->condition);
+
+ nir_block *then_block =
+ (nir_block *) exec_list_get_head(&if_stmt->then_list);
+
+ ac_build_uif(&ctx->ac, value, then_block->index);
+
+ visit_cf_list(ctx, &if_stmt->then_list);
+
+ if (!exec_list_is_empty(&if_stmt->else_list)) {
+ nir_block *else_block =
+ (nir_block *) exec_list_get_head(&if_stmt->else_list);
+
+ ac_build_else(&ctx->ac, else_block->index);
+ visit_cf_list(ctx, &if_stmt->else_list);
+ }
+
+ ac_build_endif(&ctx->ac, then_block->index);
+}
+
+static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
+{
+ nir_block *first_loop_block =
+ (nir_block *) exec_list_get_head(&loop->body);
+
+ ac_build_bgnloop(&ctx->ac, first_loop_block->index);
+
+ visit_cf_list(ctx, &loop->body);
+
+ ac_build_endloop(&ctx->ac, first_loop_block->index);
+}
+
+static void visit_cf_list(struct ac_nir_context *ctx,
+ struct exec_list *list)
+{
+ foreach_list_typed(nir_cf_node, node, node, list)
+ {
+ switch (node->type) {
+ case nir_cf_node_block:
+ visit_block(ctx, nir_cf_node_as_block(node));
+ break;
+
+ case nir_cf_node_if:
+ visit_if(ctx, nir_cf_node_as_if(node));
+ break;
+
+ case nir_cf_node_loop:
+ visit_loop(ctx, nir_cf_node_as_loop(node));
+ break;
+
+ default:
+ assert(0);
+ }
+ }
+}
+
+void
+ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
+ struct ac_shader_abi *abi,
+ struct nir_shader *nir,
+ struct nir_variable *variable,
+ gl_shader_stage stage)
+{
+ unsigned output_loc = variable->data.driver_location / 4;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+
+ /* tess ctrl has it's own load/store paths for outputs */
+ if (stage == MESA_SHADER_TESS_CTRL)
+ return;
+
+ if (stage == MESA_SHADER_VERTEX ||
+ stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY) {
+ int idx = variable->data.location + variable->data.index;
+ if (idx == VARYING_SLOT_CLIP_DIST0) {
+ int length = nir->info.clip_distance_array_size +
+ nir->info.cull_distance_array_size;
+
+ if (length > 4)
+ attrib_count = 2;
+ else
+ attrib_count = 1;
+ }
+ }
+
+ bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
+ LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ for (unsigned chan = 0; chan < 4; chan++) {
+ abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
+ ac_build_alloca_undef(ctx, type, "");
+ }
+ }
+}
+
+static void
+setup_locals(struct ac_nir_context *ctx,
+ struct nir_function *func)
+{
+ int i, j;
+ ctx->num_locals = 0;
+ nir_foreach_variable(variable, &func->impl->locals) {
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ variable->data.driver_location = ctx->num_locals * 4;
+ variable->data.location_frac = 0;
+ ctx->num_locals += attrib_count;
+ }
+ ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
+ if (!ctx->locals)
+ return;
+
+ for (i = 0; i < ctx->num_locals; i++) {
+ for (j = 0; j < 4; j++) {
+ ctx->locals[i * 4 + j] =
+ ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
+ }
+ }
+}
+
+static void
+setup_scratch(struct ac_nir_context *ctx,
+ struct nir_shader *shader)
+{
+ if (shader->scratch_size == 0)
+ return;
+
+ ctx->scratch = ac_build_alloca_undef(&ctx->ac,
+ LLVMArrayType(ctx->ac.i8, shader->scratch_size),
+ "scratch");
+}
+
+static void
+setup_constant_data(struct ac_nir_context *ctx,
+ struct nir_shader *shader)
+{
+ if (!shader->constant_data)
+ return;
+
+ LLVMValueRef data =
+ LLVMConstStringInContext(ctx->ac.context,
+ shader->constant_data,
+ shader->constant_data_size,
+ true);
+ LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
+
+ /* We want to put the constant data in the CONST address space so that
+ * we can use scalar loads. However, LLVM versions before 10 put these
+ * variables in the same section as the code, which is unacceptable
+ * for RadeonSI as it needs to relocate all the data sections after
+ * the code sections. See https://reviews.llvm.org/D65813.
+ */
+ unsigned address_space =
+ LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
+
+ LLVMValueRef global =
+ LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
+ "const_data",
+ address_space);
+
+ LLVMSetInitializer(global, data);
+ LLVMSetGlobalConstant(global, true);
+ LLVMSetVisibility(global, LLVMHiddenVisibility);
+ ctx->constant_data = global;
+}
+
+static void
+setup_shared(struct ac_nir_context *ctx,
+ struct nir_shader *nir)
+{
+ nir_foreach_variable(variable, &nir->shared) {
+ LLVMValueRef shared =
+ LLVMAddGlobalInAddressSpace(
+ ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
+ variable->name ? variable->name : "",
+ AC_ADDR_SPACE_LDS);
+ _mesa_hash_table_insert(ctx->vars, variable, shared);
+ }
+}
+
+void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
+ struct nir_shader *nir)
+{
+ struct ac_nir_context ctx = {};
+ struct nir_function *func;
+
+ ctx.ac = *ac;
+ ctx.abi = abi;
+
+ ctx.stage = nir->info.stage;
+ ctx.info = &nir->info;
+
+ ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+
+ nir_foreach_variable(variable, &nir->outputs)
+ ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
+ ctx.stage);
+
+ ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+ ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ func = (struct nir_function *)exec_list_get_head(&nir->functions);
+
+ nir_index_ssa_defs(func->impl);
+ ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
+
+ setup_locals(&ctx, func);
+ setup_scratch(&ctx, nir);
+ setup_constant_data(&ctx, nir);
+
+ if (gl_shader_stage_is_compute(nir->info.stage))
+ setup_shared(&ctx, nir);
+
+ visit_cf_list(&ctx, &func->impl->body);
+ phi_post_pass(&ctx);
+
+ if (!gl_shader_stage_is_compute(nir->info.stage))
+ ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
+ ctx.abi->outputs);
+
+ free(ctx.locals);
+ free(ctx.ssa_defs);
+ ralloc_free(ctx.defs);
+ ralloc_free(ctx.phis);
+ ralloc_free(ctx.vars);
+}
+
+void
+ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
+{
+ /* Lower large variables to scratch first so that we won't bloat the
+ * shader by generating large if ladders for them. We later lower
+ * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
+ */
+ NIR_PASS_V(nir, nir_lower_vars_to_scratch,
+ nir_var_function_temp,
+ 256,
+ glsl_get_natural_size_align_bytes);
+
+ /* While it would be nice not to have this flag, we are constrained
+ * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
+ */
+ bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
+
+ /* TODO: Indirect indexing of GS inputs is unimplemented.
+ *
+ * TCS and TES load inputs directly from LDS or offchip memory, so
+ * indirect indexing is trivial.
+ */
+ nir_variable_mode indirect_mask = 0;
+ if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+ (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+ nir->info.stage != MESA_SHADER_TESS_EVAL &&
+ !llvm_has_working_vgpr_indexing)) {
+ indirect_mask |= nir_var_shader_in;
+ }
+ if (!llvm_has_working_vgpr_indexing &&
+ nir->info.stage != MESA_SHADER_TESS_CTRL)
+ indirect_mask |= nir_var_shader_out;
+
+ /* TODO: We shouldn't need to do this, however LLVM isn't currently
+ * smart enough to handle indirects without causing excess spilling
+ * causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ indirect_mask |= nir_var_function_temp;
+
+ nir_lower_indirect_derefs(nir, indirect_mask);
+}
+
+static unsigned
+get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
+{
+ if (intrin->intrinsic != nir_intrinsic_store_deref)
+ return 0;
+
+ nir_variable *var =
+ nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
+
+ if (var->data.mode != nir_var_shader_out)
+ return 0;
+
+ unsigned writemask = 0;
+ const int location = var->data.location;
+ unsigned first_component = var->data.location_frac;
+ unsigned num_comps = intrin->dest.ssa.num_components;
+
+ if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+ writemask = ((1 << (num_comps + 1)) - 1) << first_component;
+ else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
+
+ return writemask;
+}
+
+static void
+scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
+ unsigned *cond_block_tf_writemask,
+ bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
+{
+ switch (cf_node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(cf_node);
+ nir_foreach_instr(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic == nir_intrinsic_barrier) {
+
+ /* If we find a barrier in nested control flow put this in the
+ * too hard basket. In GLSL this is not possible but it is in
+ * SPIR-V.
+ */
+ if (is_nested_cf) {
+ *tessfactors_are_def_in_all_invocs = false;
+ return;
+ }
+
+ /* The following case must be prevented:
+ * gl_TessLevelInner = ...;
+ * barrier();
+ * if (gl_InvocationID == 1)
+ * gl_TessLevelInner = ...;
+ *
+ * If you consider disjoint code segments separated by barriers, each
+ * such segment that writes tess factor channels should write the same
+ * channels in all codepaths within that segment.
+ */
+ if (upper_block_tf_writemask || cond_block_tf_writemask) {
+ /* Accumulate the result: */
+ *tessfactors_are_def_in_all_invocs &=
+ !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
+
+ /* Analyze the next code segment from scratch. */
+ *upper_block_tf_writemask = 0;
+ *cond_block_tf_writemask = 0;
+ }
+ } else
+ *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
+ }
+
+ break;
+ }
+ case nir_cf_node_if: {
+ unsigned then_tessfactor_writemask = 0;
+ unsigned else_tessfactor_writemask = 0;
+
+ nir_if *if_stmt = nir_cf_node_as_if(cf_node);
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
+ scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
+ cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
+ scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
+ cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ if (then_tessfactor_writemask || else_tessfactor_writemask) {
+ /* If both statements write the same tess factor channels,
+ * we can say that the upper block writes them too.
+ */
+ *upper_block_tf_writemask |= then_tessfactor_writemask &
+ else_tessfactor_writemask;
+ *cond_block_tf_writemask |= then_tessfactor_writemask |
+ else_tessfactor_writemask;
+ }
+
+ break;
+ }
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(cf_node);
+ foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
+ scan_tess_ctrl(nested_node, cond_block_tf_writemask,
+ cond_block_tf_writemask,
+ tessfactors_are_def_in_all_invocs, true);
+ }
+
+ break;
+ }
+ default:
+ unreachable("unknown cf node type");
+ }
+}
+
+bool
+ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
+{
+ assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
+
+ /* The pass works as follows:
+ * If all codepaths write tess factors, we can say that all
+ * invocations define tess factors.
+ *
+ * Each tess factor channel is tracked separately.
+ */
+ unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
+ unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
+
+ /* Initial value = true. Here the pass will accumulate results from
+ * multiple segments surrounded by barriers. If tess factors aren't
+ * written at all, it's a shader bug and we don't care if this will be
+ * true.
+ */
+ bool tessfactors_are_def_in_all_invocs = true;
+
+ nir_foreach_function(function, nir) {
+ if (function->impl) {
+ foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
+ scan_tess_ctrl(node, &main_block_tf_writemask,
+ &cond_block_tf_writemask,
+ &tessfactors_are_def_in_all_invocs,
+ false);
+ }
+ }
+ }
+
+ /* Accumulate the result for the last code segment separated by a
+ * barrier.
+ */
+ if (main_block_tf_writemask || cond_block_tf_writemask) {
+ tessfactors_are_def_in_all_invocs &=
+ !(cond_block_tf_writemask & ~main_block_tf_writemask);
+ }
+
+ return tessfactors_are_def_in_all_invocs;
+}
--- /dev/null
+/*
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef AC_NIR_TO_LLVM_H
+#define AC_NIR_TO_LLVM_H
+
+#include <stdbool.h>
+#include "llvm-c/Core.h"
+#include "llvm-c/TargetMachine.h"
+#include "amd_family.h"
+#include "compiler/shader_enums.h"
+
+struct nir_shader;
+struct nir_variable;
+struct ac_llvm_context;
+struct ac_shader_abi;
+
+/* Interpolation locations */
+#define INTERP_CENTER 0
+#define INTERP_CENTROID 1
+#define INTERP_SAMPLE 2
+
+static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
+{
+ return (index * 4) + chan;
+}
+
+void ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
+
+bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
+
+void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
+ struct nir_shader *nir);
+
+void
+ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
+ struct ac_shader_abi *abi,
+ struct nir_shader *nir,
+ struct nir_variable *variable,
+ gl_shader_stage stage);
+
+void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
+
+#endif /* AC_NIR_TO_LLVM_H */
--- /dev/null
+/*
+ * Copyright 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_ABI_H
+#define AC_SHADER_ABI_H
+
+#include <llvm-c/Core.h>
+
+#include "compiler/shader_enums.h"
+
+struct nir_variable;
+
+#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
+
+#define AC_MAX_INLINE_PUSH_CONSTS 8
+
+enum ac_descriptor_type {
+ AC_DESC_IMAGE,
+ AC_DESC_FMASK,
+ AC_DESC_SAMPLER,
+ AC_DESC_BUFFER,
+ AC_DESC_PLANE_0,
+ AC_DESC_PLANE_1,
+ AC_DESC_PLANE_2,
+};
+
+/* Document the shader ABI during compilation. This is what allows radeonsi and
+ * radv to share a compiler backend.
+ */
+struct ac_shader_abi {
+ LLVMValueRef base_vertex;
+ LLVMValueRef start_instance;
+ LLVMValueRef draw_id;
+ LLVMValueRef vertex_id;
+ LLVMValueRef instance_id;
+ LLVMValueRef tcs_patch_id;
+ LLVMValueRef tcs_rel_ids;
+ LLVMValueRef tes_patch_id;
+ LLVMValueRef gs_prim_id;
+ LLVMValueRef gs_invocation_id;
+
+ /* PS */
+ LLVMValueRef frag_pos[4];
+ LLVMValueRef front_face;
+ LLVMValueRef ancillary;
+ LLVMValueRef sample_coverage;
+ LLVMValueRef prim_mask;
+ LLVMValueRef color0;
+ LLVMValueRef color1;
+ LLVMValueRef user_data;
+ LLVMValueRef persp_sample;
+ LLVMValueRef persp_center;
+ LLVMValueRef persp_centroid;
+ LLVMValueRef linear_sample;
+ LLVMValueRef linear_center;
+ LLVMValueRef linear_centroid;
+
+ /* CS */
+ LLVMValueRef local_invocation_ids;
+ LLVMValueRef num_work_groups;
+ LLVMValueRef workgroup_ids[3];
+ LLVMValueRef tg_size;
+
+ /* Vulkan only */
+ LLVMValueRef push_constants;
+ LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+ unsigned num_inline_push_consts;
+ unsigned base_inline_push_consts;
+ LLVMValueRef view_index;
+
+ LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
+
+ /* For VS and PS: pre-loaded shader inputs.
+ *
+ * Currently only used for NIR shaders; indexed by variables'
+ * driver_location.
+ */
+ LLVMValueRef *inputs;
+
+ /* Varying -> attribute number mapping. Also NIR-only */
+ unsigned fs_input_attr_indices[MAX_VARYING];
+
+ void (*emit_outputs)(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs);
+
+ void (*emit_vertex)(struct ac_shader_abi *abi,
+ unsigned stream,
+ LLVMValueRef *addrs);
+
+ void (*emit_primitive)(struct ac_shader_abi *abi,
+ unsigned stream);
+
+ void (*emit_kill)(struct ac_shader_abi *abi, LLVMValueRef visible);
+
+ LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ unsigned vertex_index,
+ unsigned const_index,
+ LLVMTypeRef type);
+
+ LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
+ LLVMTypeRef type,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_inputs);
+
+ void (*store_tcs_outputs)(struct ac_shader_abi *abi,
+ const struct nir_variable *var,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ LLVMValueRef src,
+ unsigned writemask);
+
+ LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
+ unsigned varying_id,
+ bool load_default_state);
+
+
+ LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
+
+ /**
+ * Load the descriptor for the given buffer.
+ *
+ * \param buffer the buffer as presented in NIR: this is the descriptor
+ * in Vulkan, and the buffer index in OpenGL/Gallium
+ * \param write whether buffer contents will be written
+ */
+ LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
+ LLVMValueRef buffer, bool write);
+
+ /**
+ * Load a descriptor associated to a sampler.
+ *
+ * \param descriptor_set the descriptor set index (only for Vulkan)
+ * \param base_index the base index of the sampler variable
+ * \param constant_index constant part of an array index (or 0, if the
+ * sampler variable is not an array)
+ * \param index non-constant part of an array index (may be NULL)
+ * \param desc_type the type of descriptor to load
+ * \param image whether the descriptor is loaded for an image operation
+ */
+ LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
+ unsigned descriptor_set,
+ unsigned base_index,
+ unsigned constant_index,
+ LLVMValueRef index,
+ enum ac_descriptor_type desc_type,
+ bool image, bool write,
+ bool bindless);
+
+ /**
+ * Load a Vulkan-specific resource.
+ *
+ * \param index resource index
+ * \param desc_set descriptor set
+ * \param binding descriptor set binding
+ */
+ LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
+ LLVMValueRef index,
+ unsigned desc_set,
+ unsigned binding);
+
+ LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
+ LLVMValueRef sample_id);
+
+ LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
+
+ LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
+
+ /* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
+ * uses it due to promoting D16 to D32, but radv needs it off. */
+ bool clamp_shadow_reference;
+ bool interp_at_sample_force_center;
+
+ /* Whether bounds checks are required */
+ bool robust_buffer_access;
+};
+
+#endif /* AC_SHADER_ABI_H */
--- /dev/null
+# Copyright © 2019 Valve Corporation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+amd_common_llvm_files = files(
+ 'ac_llvm_build.c',
+ 'ac_llvm_build.h',
+ 'ac_llvm_cull.c',
+ 'ac_llvm_cull.h',
+ 'ac_llvm_helper.cpp',
+ 'ac_llvm_util.c',
+ 'ac_llvm_util.h',
+ 'ac_nir_to_llvm.c',
+ 'ac_nir_to_llvm.h',
+ 'ac_shader_abi.h',
+)
+
+libamd_common_llvm = static_library(
+ 'amd_common_llvm',
+ [amd_common_llvm_files],
+ include_directories : [
+ inc_common, inc_compiler, inc_mesa, inc_mapi, inc_amd, inc_amd_common
+ ],
+ link_with: [
+ libamd_common
+ ],
+ dependencies : [
+ dep_llvm, dep_thread, dep_elf, dep_libdrm_amdgpu, dep_valgrind,
+ idep_nir_headers, idep_amdgfxregs_h,
+ ],
+ c_args : [c_vis_args],
+ cpp_args : [cpp_vis_args],
+)
+
subdir('addrlib')
subdir('common')
+subdir('llvm')
if with_amd_vk
subdir('compiler')
subdir('vulkan')
'vulkan_radeon',
[libradv_files, radv_entrypoints, radv_extensions_c, amd_vk_format_table_c, sha1_h, xmlpool_options_h, radv_gfx10_format_table_h],
include_directories : [
- inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_wsi,
+ inc_common, inc_amd, inc_amd_common, inc_amd_common_llvm, inc_compiler, inc_util, inc_vulkan_wsi,
],
link_with : [
- libamd_common, libamdgpu_addrlib, libvulkan_wsi,
+ libamd_common, libamd_common_llvm, libamdgpu_addrlib, libvulkan_wsi,
],
dependencies : [
dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
LOCAL_SRC_FILES := $(C_SOURCES) $(CXX_SOURCES)
-LOCAL_C_INCLUDES += $(MESA_TOP)/src/amd/common
+LOCAL_C_INCLUDES += \
+ $(MESA_TOP)/src/amd/common \
+ $(MESA_TOP)/src/amd/llvm
LOCAL_SHARED_LIBRARIES := libdrm_radeon
LOCAL_MODULE := libmesa_pipe_r600
LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/amd/common \
+ $(MESA_TOP)/src/amd/llvm \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_amd_common,,)/common \
$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir
'radeonsi',
[files_libradeonsi, si_driinfo_h, sid_tables_h, gfx10_format_table_h],
include_directories : [
- inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common,
+ inc_src, inc_include, inc_gallium, inc_gallium_aux, inc_amd_common, inc_amd_common_llvm,
inc_gallium_drivers,
],
c_args : ['-Wstrict-overflow=0', c_vis_args],
compile_args : '-DGALLIUM_RADEONSI',
sources : si_driinfo_h,
link_with : [
- libradeonsi, libradeonwinsys, libamdgpuwinsys, libamd_common,
+ libradeonsi, libradeonwinsys, libamdgpuwinsys, libamd_common, libamd_common_llvm
],
dependencies : idep_nir,
)
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include "amd/common/ac_llvm_util.h"
-#include "amd/common/sid.h"
+#include "ac_llvm_util.h"
+#include "sid.h"
#ifndef AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS
#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E
),
include_directories : [
inc_amd, inc_gallium, inc_gallium_aux, inc_include, inc_src,
+ inc_amd_common, inc_amd_common_llvm,
],
c_args : [c_vis_args],
cpp_args : [cpp_vis_args],
inc_gallium = include_directories('gallium/include')
inc_gallium_aux = include_directories('gallium/auxiliary')
inc_amd_common = include_directories('amd/common')
+inc_amd_common_llvm = include_directories('amd/llvm')
libglsl_util = static_library(
'glsl_util',