User are encouraged to switch to LLVM 7.0 released in September 2018.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
LLVM_REQUIRED_GALLIUM=3.3.0
LLVM_REQUIRED_OPENCL=3.9.0
LLVM_REQUIRED_R600=3.9.0
-LLVM_REQUIRED_RADEONSI=6.0.0
-LLVM_REQUIRED_RADV=6.0.0
+LLVM_REQUIRED_RADEONSI=7.0.0
+LLVM_REQUIRED_RADV=7.0.0
LLVM_REQUIRED_SWR=6.0.0
dnl Check for progs
endif
if with_amd_vk or with_gallium_radeonsi
- _llvm_version = '>= 6.0.0'
+ _llvm_version = '>= 7.0.0'
elif with_gallium_swr
_llvm_version = '>= 6.0.0'
elif with_gallium_opencl or with_gallium_r600
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
- ctx->intptr = HAVE_32BIT_POINTERS ? ctx->i32 : ctx->i64;
+ ctx->intptr = ctx->i32;
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
int idx,
LLVMValueRef val)
{
+ unsigned tl_lanes[4], trbl_lanes[4];
LLVMValueRef tl, trbl, args[2];
LLVMValueRef result;
- if (HAVE_LLVM >= 0x0700) {
- unsigned tl_lanes[4], trbl_lanes[4];
-
- for (unsigned i = 0; i < 4; ++i) {
- tl_lanes[i] = i & mask;
- trbl_lanes[i] = (i & mask) + idx;
- }
-
- tl = ac_build_quad_swizzle(ctx, val,
- tl_lanes[0], tl_lanes[1],
- tl_lanes[2], tl_lanes[3]);
- trbl = ac_build_quad_swizzle(ctx, val,
- trbl_lanes[0], trbl_lanes[1],
- trbl_lanes[2], trbl_lanes[3]);
- } else if (ctx->chip_class >= VI) {
- LLVMValueRef thread_id, tl_tid, trbl_tid;
- thread_id = ac_get_thread_id(ctx);
-
- tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
- LLVMConstInt(ctx->i32, mask, false), "");
-
- trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
- LLVMConstInt(ctx->i32, idx, false), "");
-
- args[0] = LLVMBuildMul(ctx->builder, tl_tid,
- LLVMConstInt(ctx->i32, 4, false), "");
- args[1] = val;
- tl = ac_build_intrinsic(ctx,
- "llvm.amdgcn.ds.bpermute", ctx->i32,
- args, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-
- args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
- LLVMConstInt(ctx->i32, 4, false), "");
- trbl = ac_build_intrinsic(ctx,
- "llvm.amdgcn.ds.bpermute", ctx->i32,
- args, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
- } else {
- uint32_t masks[2] = {};
-
- switch (mask) {
- case AC_TID_MASK_TOP_LEFT:
- masks[0] = 0x8000;
- if (idx == 1)
- masks[1] = 0x8055;
- else
- masks[1] = 0x80aa;
-
- break;
- case AC_TID_MASK_TOP:
- masks[0] = 0x8044;
- masks[1] = 0x80ee;
- break;
- case AC_TID_MASK_LEFT:
- masks[0] = 0x80a0;
- masks[1] = 0x80f5;
- break;
- default:
- assert(0);
- }
-
- args[0] = val;
- args[1] = LLVMConstInt(ctx->i32, masks[0], false);
-
- tl = ac_build_intrinsic(ctx,
- "llvm.amdgcn.ds.swizzle", ctx->i32,
- args, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
-
- args[1] = LLVMConstInt(ctx->i32, masks[1], false);
- trbl = ac_build_intrinsic(ctx,
- "llvm.amdgcn.ds.swizzle", ctx->i32,
- args, 2,
- AC_FUNC_ATTR_READNONE |
- AC_FUNC_ATTR_CONVERGENT);
+ for (unsigned i = 0; i < 4; ++i) {
+ tl_lanes[i] = i & mask;
+ trbl_lanes[i] = (i & mask) + idx;
}
+ tl = ac_build_quad_swizzle(ctx, val,
+ tl_lanes[0], tl_lanes[1],
+ tl_lanes[2], tl_lanes[3]);
+ trbl = ac_build_quad_swizzle(ctx, val,
+ trbl_lanes[0], trbl_lanes[1],
+ trbl_lanes[2], trbl_lanes[3]);
+
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
- if (HAVE_LLVM >= 0x0700) {
- result = ac_build_intrinsic(ctx,
- "llvm.amdgcn.wqm.f32", ctx->f32,
- &result, 1, 0);
- }
+ result = ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32,
+ &result, 1, 0);
return result;
}
unreachable("bad atomic op");
}
-/* LLVM 6 and older */
-static LLVMValueRef ac_build_image_opcode_llvm6(struct ac_llvm_context *ctx,
- struct ac_image_args *a)
-{
- LLVMValueRef args[16];
- LLVMTypeRef retty = ctx->v4f32;
- const char *name = NULL;
- const char *atomic_subop = "";
- char intr_name[128], coords_type[64];
-
- bool sample = a->opcode == ac_image_sample ||
- a->opcode == ac_image_gather4 ||
- a->opcode == ac_image_get_lod;
- bool atomic = a->opcode == ac_image_atomic ||
- a->opcode == ac_image_atomic_cmpswap;
- bool da = a->dim == ac_image_cube ||
- a->dim == ac_image_1darray ||
- a->dim == ac_image_2darray ||
- a->dim == ac_image_2darraymsaa;
- if (a->opcode == ac_image_get_lod)
- da = false;
-
- unsigned num_coords =
- a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
- LLVMValueRef addr;
- unsigned num_addr = 0;
-
- if (a->opcode == ac_image_get_lod) {
- switch (a->dim) {
- case ac_image_1darray:
- num_coords = 1;
- break;
- case ac_image_2darray:
- case ac_image_cube:
- num_coords = 2;
- break;
- default:
- break;
- }
- }
-
- if (a->offset)
- args[num_addr++] = ac_to_integer(ctx, a->offset);
- if (a->bias)
- args[num_addr++] = ac_to_integer(ctx, a->bias);
- if (a->compare)
- args[num_addr++] = ac_to_integer(ctx, a->compare);
- if (a->derivs[0]) {
- unsigned num_derivs = ac_num_derivs(a->dim);
- for (unsigned i = 0; i < num_derivs; ++i)
- args[num_addr++] = ac_to_integer(ctx, a->derivs[i]);
- }
- for (unsigned i = 0; i < num_coords; ++i)
- args[num_addr++] = ac_to_integer(ctx, a->coords[i]);
- if (a->lod)
- args[num_addr++] = ac_to_integer(ctx, a->lod);
-
- unsigned pad_goal = util_next_power_of_two(num_addr);
- while (num_addr < pad_goal)
- args[num_addr++] = LLVMGetUndef(ctx->i32);
-
- addr = ac_build_gather_values(ctx, args, num_addr);
-
- unsigned num_args = 0;
- if (atomic || a->opcode == ac_image_store || a->opcode == ac_image_store_mip) {
- args[num_args++] = a->data[0];
- if (a->opcode == ac_image_atomic_cmpswap)
- args[num_args++] = a->data[1];
- }
-
- unsigned coords_arg = num_args;
- if (sample)
- args[num_args++] = ac_to_float(ctx, addr);
- else
- args[num_args++] = ac_to_integer(ctx, addr);
-
- args[num_args++] = a->resource;
- if (sample)
- args[num_args++] = a->sampler;
- if (!atomic) {
- args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
- if (sample)
- args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
- args[num_args++] = a->cache_policy & ac_glc ? ctx->i1true : ctx->i1false;
- args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
- args[num_args++] = ctx->i1false; /* lwe */
- args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
- } else {
- args[num_args++] = ctx->i1false; /* r128 */
- args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
- args[num_args++] = a->cache_policy & ac_slc ? ctx->i1true : ctx->i1false;
- }
-
- switch (a->opcode) {
- case ac_image_sample:
- name = "llvm.amdgcn.image.sample";
- break;
- case ac_image_gather4:
- name = "llvm.amdgcn.image.gather4";
- break;
- case ac_image_load:
- name = "llvm.amdgcn.image.load";
- break;
- case ac_image_load_mip:
- name = "llvm.amdgcn.image.load.mip";
- break;
- case ac_image_store:
- name = "llvm.amdgcn.image.store";
- retty = ctx->voidt;
- break;
- case ac_image_store_mip:
- name = "llvm.amdgcn.image.store.mip";
- retty = ctx->voidt;
- break;
- case ac_image_atomic:
- case ac_image_atomic_cmpswap:
- name = "llvm.amdgcn.image.atomic.";
- retty = ctx->i32;
- if (a->opcode == ac_image_atomic_cmpswap) {
- atomic_subop = "cmpswap";
- } else {
- atomic_subop = get_atomic_name(a->atomic);
- }
- break;
- case ac_image_get_lod:
- name = "llvm.amdgcn.image.getlod";
- break;
- case ac_image_get_resinfo:
- name = "llvm.amdgcn.image.getresinfo";
- break;
- default:
- unreachable("invalid image opcode");
- }
-
- ac_build_type_name_for_intr(LLVMTypeOf(args[coords_arg]), coords_type,
- sizeof(coords_type));
-
- if (atomic) {
- snprintf(intr_name, sizeof(intr_name), "llvm.amdgcn.image.atomic.%s.%s",
- atomic_subop, coords_type);
- } else {
- bool lod_suffix =
- a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
-
- snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
- name,
- a->compare ? ".c" : "",
- a->bias ? ".b" :
- lod_suffix ? ".l" :
- a->derivs[0] ? ".d" :
- a->level_zero ? ".lz" : "",
- a->offset ? ".o" : "",
- coords_type);
- }
-
- LLVMValueRef result =
- ac_build_intrinsic(ctx, intr_name, retty, args, num_args,
- a->attributes);
- if (!sample && retty == ctx->v4f32) {
- result = LLVMBuildBitCast(ctx->builder, result,
- ctx->v4i32, "");
- }
- return result;
-}
-
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a)
{
(a->level_zero ? 1 : 0) +
(a->derivs[0] ? 1 : 0) <= 1);
- if (HAVE_LLVM < 0x0700)
- return ac_build_image_opcode_llvm6(ctx, a);
-
if (a->opcode == ac_image_get_lod) {
switch (dim) {
case ac_image_1darray:
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
{
- if (!HAVE_32BIT_POINTERS)
- return ac_array_in_const_addr_space(elem_type);
-
return LLVMPointerType(LLVMArrayType(elem_type, 0),
AC_ADDR_SPACE_CONST_32BIT);
}
extern "C" {
#endif
-#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
-
enum {
- AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+ AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
AC_ADDR_SPACE_GLOBAL = 1,
- AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+ AC_ADDR_SPACE_GDS = 2,
AC_ADDR_SPACE_LDS = 3,
- AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
#include <llvm/Transforms/IPO.h>
#include <llvm/IR/LegacyPassManager.h>
-#if HAVE_LLVM < 0x0700
-#include "llvm/Support/raw_ostream.h"
-#endif
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
{
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
-#if HAVE_LLVM >= 0x0700
nullptr,
-#endif
llvm::TargetMachine::CGFT_ObjectFile)) {
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
delete p;
void ac_enable_global_isel(LLVMTargetMachineRef tm)
{
-#if HAVE_LLVM >= 0x0700
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
-#endif
}
#include <llvm-c/Support.h>
#include <llvm-c/Transforms/IPO.h>
#include <llvm-c/Transforms/Scalar.h>
-#if HAVE_LLVM >= 0x0700
#include <llvm-c/Transforms/Utils.h>
-#endif
#include "c11/threads.h"
#include "gallivm/lp_bld_misc.h"
#include "util/u_math.h"
case CHIP_RAVEN:
return "gfx902";
case CHIP_VEGA12:
- return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
+ return "gfx904";
case CHIP_VEGA20:
- return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
+ return "gfx906";
case CHIP_RAVEN2:
return "gfx902"; /* TODO: use gfx909 when it's available */
default:
bool
ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- bool okay_to_leak_target_library_info,
enum radeon_family family,
enum ac_target_machine_options tm_options)
{
goto fail;
}
- if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
- compiler->target_library_info =
- ac_create_target_library_info(triple);
- if (!compiler->target_library_info)
- goto fail;
- }
+ compiler->target_library_info =
+ ac_create_target_library_info(triple);
+ if (!compiler->target_library_info)
+ goto fail;
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
tm_options & AC_TM_CHECK_IR);
{
if (compiler->passmgr)
LLVMDisposePassManager(compiler->passmgr);
-#if HAVE_LLVM >= 0x0700
- /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */
if (compiler->target_library_info)
ac_dispose_target_library_info(compiler->target_library_info);
-#endif
if (compiler->low_opt_tm)
LLVMDisposeTargetMachine(compiler->low_opt_tm);
if (compiler->tm)
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
- bool okay_to_leak_target_library_info,
enum radeon_family family,
enum ac_target_machine_options tm_options);
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
{
LLVMValueRef result;
- if (HAVE_LLVM < 0x0700) {
- LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
- result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
- result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
- } else {
- /* FIXME: LLVM 7 returns incorrect result when count is 0.
- * https://bugs.freedesktop.org/show_bug.cgi?id=107276
- */
- LLVMValueRef zero = ctx->i32_0;
- LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
- LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");
+ /* FIXME: LLVM 7+ returns incorrect result when count is 0.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+ */
+ LLVMValueRef zero = ctx->i32_0;
+ LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
+ LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");
- result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
- result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, "");
- result = LLVMBuildSelect(ctx->builder, icond2, zero, result, "");
- }
+ result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
+ result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, "");
+ result = LLVMBuildSelect(ctx->builder, icond2, zero, result, "");
return result;
}
if (loc->sgpr_idx == -1)
return;
- assert(loc->num_sgprs == (HAVE_32BIT_POINTERS ? 1 : 2));
+ assert(loc->num_sgprs == 1);
assert(!loc->indirect);
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
struct radv_userdata_info *loc = &locs->descriptor_sets[start];
unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
- radv_emit_shader_pointer_head(cs, sh_offset, count,
- HAVE_32BIT_POINTERS);
+ radv_emit_shader_pointer_head(cs, sh_offset, count, true);
for (int i = 0; i < count; i++) {
struct radv_descriptor_set *set =
descriptors_state->sets[start + i];
- radv_emit_shader_pointer_body(device, cs, set->va,
- HAVE_32BIT_POINTERS);
+ radv_emit_shader_pointer_body(device, cs, set->va, true);
}
}
}
{
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, bind_point);
- uint8_t ptr_size = HAVE_32BIT_POINTERS ? 1 : 2;
- uint32_t size = MAX_SETS * 4 * ptr_size;
+ uint32_t size = MAX_SETS * 4;
uint32_t offset;
void *ptr;
return;
for (unsigned i = 0; i < MAX_SETS; i++) {
- uint32_t *uptr = ((uint32_t *)ptr) + i * ptr_size;
+ uint32_t *uptr = ((uint32_t *)ptr) + i;
uint64_t set_va = 0;
struct radv_descriptor_set *set = descriptors_state->sets[i];
if (descriptors_state->valid & (1u << i))
set_va = set->va;
uptr[0] = set_va & 0xffffffff;
- if (ptr_size == 2)
- uptr[1] = set_va >> 32;
}
uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
.shaderCullDistance = true,
.shaderFloat64 = true,
.shaderInt64 = true,
- .shaderInt16 = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
+ .shaderInt16 = pdevice->rad_info.chip_class >= GFX9,
.sparseBinding = true,
.variableMultisampleRate = true,
.inheritedQueries = true,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures*)ext;
- bool enabled = HAVE_LLVM >= 0x0700 && pdevice->rad_info.chip_class >= VI;
+ bool enabled = pdevice->rad_info.chip_class >= VI;
features->storageBuffer16BitAccess = enabled;
features->uniformAndStorageBuffer16BitAccess = enabled;
features->storagePushConstant16 = enabled;
# and dEQP-VK.api.info.device fail due to the duplicated strings.
EXTENSIONS = [
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
- Extension('VK_KHR_16bit_storage', 1, 'HAVE_LLVM >= 0x0700'),
+ Extension('VK_KHR_16bit_storage', 1, True),
Extension('VK_KHR_bind_memory2', 1, True),
Extension('VK_KHR_create_renderpass2', 1, True),
Extension('VK_KHR_dedicated_allocation', 1, True),
bool init(void)
{
if (!ac_init_llvm_compiler(&llvm_info,
- true,
family,
tm_options))
return false;
}
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
- bool okay_to_leak_target_library_info,
bool thread_compiler,
enum radeon_family family,
enum ac_target_machine_options tm_options)
}
if (!ac_init_llvm_compiler(info,
- okay_to_leak_target_library_info,
family,
tm_options))
return false;
#include <llvm-c/Core.h>
#include <llvm-c/TargetMachine.h>
#include <llvm-c/Transforms/Scalar.h>
-#if HAVE_LLVM >= 0x0700
#include <llvm-c/Transforms/Utils.h>
-#endif
#include "sid.h"
#include "gfx9d.h"
static void
set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
{
- bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
- idx != AC_UD_SCRATCH_RING_OFFSETS;
+ bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
}
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
assert(ud_info);
- set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect);
+ set_loc(ud_info, sgpr_idx, 1, indirect);
if (!indirect)
locs->descriptor_sets_enabled |= 1 << idx;
uint8_t count = 0;
if (ctx->shader_info->info.vs.has_vertex_buffers)
- count += HAVE_32BIT_POINTERS ? 1 : 2;
+ count++;
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
return count;
user_sgpr_count++;
if (ctx->shader_info->info.loads_push_constants)
- user_sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
+ user_sgpr_count++;
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
uint32_t num_desc_set =
util_bitcount(ctx->shader_info->info.desc_set_used_mask);
- if (remaining_sgprs / (HAVE_32BIT_POINTERS ? 1 : 2) < num_desc_set) {
+ if (remaining_sgprs < num_desc_set) {
user_sgpr_info->indirect_all_descriptor_sets = true;
}
}
struct radeon_cmdbuf *cs,
uint32_t sh_offset, uint64_t va, bool global)
{
- bool use_32bit_pointers = HAVE_32BIT_POINTERS && !global;
+ bool use_32bit_pointers = !global;
radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
radv_init_llvm_once();
- radv_init_llvm_compiler(&ac_llvm, false,
+ radv_init_llvm_compiler(&ac_llvm,
thread_compiler,
chip_family, tm_options);
if (gs_copy_shader) {
#endif
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
- bool okay_to_leak_target_library_info,
bool thread_compiler,
enum radeon_family family,
enum ac_target_machine_options tm_options);
unsigned sh_offset,
unsigned pointer_count)
{
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (HAVE_32BIT_POINTERS ? 1 : 2), 0));
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count, 0));
radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
}
{
radeon_emit(cs, va);
- if (HAVE_32BIT_POINTERS)
- assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
- else
- radeon_emit(cs, va >> 32);
+ assert(va == 0 || (va >> 32) == sscreen->info.address32_hi);
}
static void si_emit_shader_pointer(struct si_context *sctx,
}
}
-static void si_emit_disjoint_shader_pointers(struct si_context *sctx,
- unsigned pointer_mask,
- unsigned sh_base)
-{
- if (!sh_base)
- return;
-
- struct radeon_cmdbuf *cs = sctx->gfx_cs;
- unsigned mask = sctx->shader_pointers_dirty & pointer_mask;
-
- while (mask) {
- struct si_descriptors *descs = &sctx->descriptors[u_bit_scan(&mask)];
- unsigned sh_offset = sh_base + descs->shader_userdata_offset;
-
- si_emit_shader_pointer_head(cs, sh_offset, 1);
- si_emit_shader_pointer_body(sctx->screen, cs, descs->gpu_address);
- }
-}
-
static void si_emit_global_shader_pointers(struct si_context *sctx,
struct si_descriptors *descs)
{
sh_base[PIPE_SHADER_TESS_EVAL]);
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
sh_base[PIPE_SHADER_FRAGMENT]);
- if (HAVE_32BIT_POINTERS || sctx->chip_class <= VI) {
- si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
- sh_base[PIPE_SHADER_TESS_CTRL]);
- si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
- sh_base[PIPE_SHADER_GEOMETRY]);
- } else {
- si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
- sh_base[PIPE_SHADER_TESS_CTRL]);
- si_emit_disjoint_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
- sh_base[PIPE_SHADER_GEOMETRY]);
- }
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
+ sh_base[PIPE_SHADER_TESS_CTRL]);
+ si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
+ sh_base[PIPE_SHADER_GEOMETRY]);
sctx->shader_pointers_dirty &=
~u_bit_consecutive(SI_DESCS_RW_BUFFERS, SI_DESCS_FIRST_COMPUTE);
{
int i;
-#if !HAVE_32BIT_POINTERS
- STATIC_ASSERT(GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES % 2 == 0);
-#endif
-
for (i = 0; i < SI_NUM_SHADERS; i++) {
bool is_2nd = sctx->chip_class >= GFX9 &&
(i == PIPE_SHADER_TESS_CTRL ||
desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
if (is_2nd) {
-#if HAVE_32BIT_POINTERS
if (i == PIPE_SHADER_TESS_CTRL) {
rel_dw_offset = (R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS -
R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
rel_dw_offset = (R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS -
R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
}
-#else
- rel_dw_offset = GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES;
-#endif
} else {
rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
}
!sscreen->llvm_has_working_vgpr_indexing)
return 0;
- /* Doing indirect indexing on GFX9 with LLVM 6.0 hangs.
- * This means we don't support INTERP instructions with
- * indirect indexing on inputs.
- */
- if (shader == PIPE_SHADER_FRAGMENT &&
- !sscreen->llvm_has_working_vgpr_indexing &&
- HAVE_LLVM < 0x0700)
- return 0;
-
/* TCS and TES load inputs directly from LDS or offchip
* memory, so indirect indexing is always supported.
* PS has to support indirect indexing, because we can't
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
ac_init_llvm_once();
- ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
+ ac_init_llvm_compiler(compiler, sscreen->info.family, tm_options);
compiler->passes = ac_create_llvm_passes(compiler->tm);
if (compiler->low_opt_tm)
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
LLVMValueRef desc0, desc1;
- if (HAVE_32BIT_POINTERS) {
- desc0 = ptr;
- desc1 = LLVMConstInt(ctx->i32,
- S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
- } else {
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->v2i32, "");
- desc0 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_0, "");
- desc1 = LLVMBuildExtractElement(ctx->ac.builder, ptr, ctx->i32_1, "");
- /* Mask out all bits except BASE_ADDRESS_HI. */
- desc1 = LLVMBuildAnd(ctx->ac.builder, desc1,
- LLVMConstInt(ctx->i32, ~C_008F04_BASE_ADDRESS_HI, 0), "");
- }
+ desc0 = ptr;
+ desc1 = LLVMConstInt(ctx->i32,
+ S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
LLVMValueRef desc_elems[] = {
desc0,
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef ptr, lo, hi;
- if (HAVE_32BIT_POINTERS) {
- ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
- return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
- }
-
ptr = LLVMGetParam(ctx->main_fn, param);
- ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i64, "");
- ptr = LLVMBuildBitCast(builder, ptr, ctx->v2i32, "");
- lo = LLVMBuildExtractElement(builder, ptr, ctx->i32_0, "");
- hi = LLVMBuildExtractElement(builder, ptr, ctx->i32_1, "");
- ret = LLVMBuildInsertValue(builder, ret, lo, return_index, "");
- return LLVMBuildInsertValue(builder, ret, hi, return_index + 1, "");
+ ptr = LLVMBuildPtrToInt(builder, ptr, ctx->i32, "");
+ return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
}
/* This only writes the tessellation factor levels. */
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_factor_offset, 4);
ret = si_insert_input_ret(ctx, ret, ctx->param_vs_state_bits,
8 + SI_SGPR_VS_STATE_BITS);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_out_lds_offsets,
LLVMValueRef ret = ctx->return_value;
ret = si_insert_input_ptr(ctx, ret, 0, 0);
- if (HAVE_32BIT_POINTERS)
- ret = si_insert_input_ptr(ctx, ret, 1, 1);
+ ret = si_insert_input_ptr(ctx, ret, 1, 1);
ret = si_insert_input_ret(ctx, ret, ctx->param_gs2vs_offset, 2);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_wave_info, 3);
ret = si_insert_input_ret(ctx, ret, ctx->param_merged_scratch_offset, 5);
ctx->param_bindless_samplers_and_images,
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
-#if !HAVE_32BIT_POINTERS
- ret = si_insert_input_ptr(ctx, ret, ctx->param_vs_state_bits + 4,
- 8 + GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES);
-#endif
-
unsigned vgpr;
if (ctx->type == PIPE_SHADER_VERTEX)
vgpr = 8 + GFX9_VSGS_NUM_USER_SGPR;
case SI_SHADER_MERGED_VERTEX_TESSCTRL:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_TESS_CTRL);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->type == PIPE_SHADER_VERTEX);
declare_vs_specific_input_sgprs(ctx, &fninfo);
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_TESS_CTRL);
- }
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_offsets = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS) /* Align to 2 dwords. */
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
case SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY:
/* Merged stages have 8 system SGPRs at the beginning. */
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_GS */
- if (HAVE_32BIT_POINTERS) {
- declare_per_stage_desc_pointers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- } else {
- declare_const_and_shader_buffers(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
+ declare_per_stage_desc_pointers(ctx, &fninfo,
+ ctx->type == PIPE_SHADER_GEOMETRY);
ctx->param_gs2vs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_merged_wave_info = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tes_offchip_addr = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* Declare as many input SGPRs as the VS has. */
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->i32); /* unused */
}
- if (!HAVE_32BIT_POINTERS) {
- declare_samplers_and_images(ctx, &fninfo,
- ctx->type == PIPE_SHADER_GEOMETRY);
- }
if (ctx->type == PIPE_SHADER_VERTEX) {
ctx->param_vertex_buffers = add_arg(&fninfo, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->v4i32));
LLVMValueRef ptr[2], list;
bool merged_shader = is_merged_shader(ctx);
- if (HAVE_32BIT_POINTERS) {
- ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
- ac_array_in_const32_addr_space(ctx->v4i32), "");
- return list;
- }
-
- /* Get the pointer to rw buffers. */
ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
- ptr[1] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS + 1);
- list = ac_build_gather_values(&ctx->ac, ptr, 2);
- list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
- list = LLVMBuildIntToPtr(ctx->ac.builder, list,
- ac_array_in_const_addr_space(ctx->v4i32), "");
+ list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
+ ac_array_in_const32_addr_space(ctx->v4i32), "");
return list;
}
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
- if (!HAVE_32BIT_POINTERS)
- add_arg(&fninfo, ARG_SGPR, ctx->ac.intptr);
ctx->param_tcs_offchip_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32);
ctx->param_tcs_out_lds_layout = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* SGPR user data indices */
enum {
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
-#if !HAVE_32BIT_POINTERS
- SI_SGPR_RW_BUFFERS_HI,
-#endif
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
- SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
-#endif
SI_SGPR_CONST_AND_SHADER_BUFFERS, /* or just a constant buffer 0 pointer */
-#if !HAVE_32BIT_POINTERS
- SI_SGPR_CONST_AND_SHADER_BUFFERS_HI,
-#endif
SI_SGPR_SAMPLERS_AND_IMAGES,
-#if !HAVE_32BIT_POINTERS
- SI_SGPR_SAMPLERS_AND_IMAGES_HI,
-#endif
SI_NUM_RESOURCE_SGPRS,
/* API VS, TES without GS, GS copy shader */
GFX6_TCS_NUM_USER_SGPR,
/* GFX9: Merged shaders. */
-#if HAVE_32BIT_POINTERS
/* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO (SGPR0). */
/* 2ND_SAMPLERS_AND_IMAGES is set in USER_DATA_ADDR_HI (SGPR1). */
GFX9_MERGED_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
-#else
- /* 2ND_CONST_AND_SHADER_BUFFERS is set in USER_DATA_ADDR_LO/HI (SGPR[0:1]). */
- GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES = SI_VS_NUM_USER_SGPR,
- GFX9_SGPR_2ND_SAMPLERS_AND_IMAGES_HI,
- GFX9_MERGED_NUM_USER_SGPR,
-#endif
/* GFX9: Merged LS-HS (VS-TCS) only. */
GFX9_SGPR_TCS_OFFCHIP_LAYOUT = GFX9_MERGED_NUM_USER_SGPR,
GFX9_SGPR_TCS_OUT_OFFSETS,
GFX9_SGPR_TCS_OUT_LAYOUT,
-#if !HAVE_32BIT_POINTERS
- GFX9_SGPR_align_for_vb_pointer,
-#endif
GFX9_TCS_NUM_USER_SGPR,
/* GS limits */
GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
-#if HAVE_32BIT_POINTERS
GFX9_VSGS_NUM_USER_SGPR = SI_VS_NUM_USER_SGPR,
GFX9_TESGS_NUM_USER_SGPR = SI_TES_NUM_USER_SGPR,
-#else
- GFX9_VSGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
- GFX9_TESGS_NUM_USER_SGPR = GFX9_MERGED_NUM_USER_SGPR,
-#endif
SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
/* PS only */
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- if (HAVE_LLVM < 0x0700) {
- LLVMValueRef bfe_sm5 =
- ac_build_bfe(&ctx->ac, emit_data->args[0],
- emit_data->args[1], emit_data->args[2],
- emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
- /* Correct for GLSL semantics. */
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
- LLVMConstInt(ctx->i32, 32, 0), "");
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
- } else {
- /* FIXME: LLVM 7 returns incorrect result when count is 0.
- * https://bugs.freedesktop.org/show_bug.cgi?id=107276
- */
- LLVMValueRef zero = ctx->i32_0;
- LLVMValueRef bfe_sm5 =
- ac_build_bfe(&ctx->ac, emit_data->args[0],
- emit_data->args[1], emit_data->args[2],
- emit_data->info->opcode == TGSI_OPCODE_IBFE);
-
- /* Correct for GLSL semantics. */
- LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
- LLVMConstInt(ctx->i32, 32, 0), "");
- LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
- zero, "");
- bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
- emit_data->output[emit_data->chan] =
- LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
- }
+ /* FIXME: LLVM 7 returns incorrect result when count is 0.
+ * https://bugs.freedesktop.org/show_bug.cgi?id=107276
+ */
+ LLVMValueRef zero = ctx->i32_0;
+ LLVMValueRef bfe_sm5 =
+ ac_build_bfe(&ctx->ac, emit_data->args[0],
+ emit_data->args[1], emit_data->args[2],
+ emit_data->info->opcode == TGSI_OPCODE_IBFE);
+
+ /* Correct for GLSL semantics. */
+ LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2],
+ LLVMConstInt(ctx->i32, 32, 0), "");
+ LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2],
+ zero, "");
+ bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, "");
+ emit_data->output[emit_data->chan] =
+ LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, "");
}
/* this is ffs in C */
static unsigned si_get_num_vs_user_sgprs(unsigned num_always_on_user_sgprs)
{
/* Add the pointer to VBO descriptors. */
- if (HAVE_32BIT_POINTERS) {
- return num_always_on_user_sgprs + 1;
- } else {
- assert(num_always_on_user_sgprs % 2 == 0);
- return num_always_on_user_sgprs + 2;
- }
+ return num_always_on_user_sgprs + 1;
}
static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)