From 420fe1e7f9ef56177c8f45e98e057488a2b57646 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 3 Jan 2020 23:15:27 -0500 Subject: [PATCH] radeonsi: remove TGSI Acked-by: Pierre-Eric Pelloux-Prayer --- src/gallium/drivers/radeonsi/Makefile.sources | 5 +- src/gallium/drivers/radeonsi/meson.build | 5 +- src/gallium/drivers/radeonsi/si_compute.c | 31 +- .../drivers/radeonsi/si_debug_options.h | 1 - src/gallium/drivers/radeonsi/si_get.c | 27 +- src/gallium/drivers/radeonsi/si_pipe.c | 6 +- src/gallium/drivers/radeonsi/si_shader.c | 1384 +----------- src/gallium/drivers/radeonsi/si_shader.h | 9 +- .../drivers/radeonsi/si_shader_internal.h | 104 +- src/gallium/drivers/radeonsi/si_shader_llvm.c | 239 +++ .../drivers/radeonsi/si_shader_llvm_build.c | 219 ++ .../drivers/radeonsi/si_shader_tgsi_alu.c | 834 -------- .../drivers/radeonsi/si_shader_tgsi_mem.c | 1852 ----------------- .../drivers/radeonsi/si_shader_tgsi_setup.c | 1165 ----------- .../drivers/radeonsi/si_state_shaders.c | 57 +- src/util/00-mesa-defaults.conf | 6 - 16 files changed, 598 insertions(+), 5346 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_shader_llvm.c create mode 100644 src/gallium/drivers/radeonsi/si_shader_llvm_build.c delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c delete mode 100644 src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources index 886aaf6fa34..5d658b744d0 100644 --- a/src/gallium/drivers/radeonsi/Makefile.sources +++ b/src/gallium/drivers/radeonsi/Makefile.sources @@ -35,10 +35,9 @@ C_SOURCES := \ si_shader.c \ si_shader.h \ si_shader_internal.h \ + si_shader_llvm.c \ + si_shader_llvm_build.c \ si_shader_nir.c \ - si_shader_tgsi_alu.c \ - si_shader_tgsi_mem.c \ - si_shader_tgsi_setup.c \ si_shaderlib_tgsi.c \ si_state.c \ si_state_binning.c \ diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index d2d3dd684b0..a0bd10f6ac9 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -50,10 +50,9 @@ files_libradeonsi = files( 'si_shader.c', 'si_shader.h', 'si_shader_internal.h', + 'si_shader_llvm.c', + 'si_shader_llvm_build.c', 'si_shader_nir.c', - 'si_shader_tgsi_alu.c', - 'si_shader_tgsi_mem.c', - 'si_shader_tgsi_setup.c', 'si_shaderlib_tgsi.c', 'si_state.c', 'si_state.h', diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 7abea1927cd..f264b880d29 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -24,7 +24,6 @@ */ #include "nir/tgsi_to_nir.h" -#include "tgsi/tgsi_parse.h" #include "util/u_async_debug.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" @@ -124,13 +123,8 @@ static void si_create_compute_state_async(void *job, int thread_index) if (!compiler->passes) si_init_compiler(sscreen, compiler); - if (program->ir_type == PIPE_SHADER_IR_TGSI) { - tgsi_scan_shader(sel->tokens, &sel->info); - } else { - assert(program->ir_type == PIPE_SHADER_IR_NIR); - - si_nir_scan_shader(sel->nir, &sel->info); - } + assert(program->ir_type == PIPE_SHADER_IR_NIR); + si_nir_scan_shader(sel->nir, &sel->info); /* Store the declared LDS size into tgsi_shader_info for the shader * cache to include it. @@ -167,9 +161,6 @@ static void si_create_compute_state_async(void *job, int thread_index) if (!si_shader_create(sscreen, compiler, &program->shader, debug)) { program->shader.compilation_failed = true; - - if (program->ir_type == PIPE_SHADER_IR_TGSI) - FREE(sel->tokens); return; } @@ -209,8 +200,6 @@ static void si_create_compute_state_async(void *job, int thread_index) simple_mtx_unlock(&sscreen->shader_cache_mutex); } - FREE(sel->tokens); - sel->tokens = NULL; ralloc_free(sel->nir); sel->nir = NULL; } @@ -234,16 +223,9 @@ static void *si_create_compute_state( program->input_size = cso->req_input_mem; if (cso->ir_type != PIPE_SHADER_IR_NATIVE) { - if (sscreen->options.enable_nir && - cso->ir_type == PIPE_SHADER_IR_TGSI) { + if (cso->ir_type == PIPE_SHADER_IR_TGSI) { program->ir_type = PIPE_SHADER_IR_NIR; sel->nir = tgsi_to_nir(cso->prog, ctx->screen); - } else if (cso->ir_type == PIPE_SHADER_IR_TGSI) { - sel->tokens = tgsi_dup_tokens(cso->prog); - if (!sel->tokens) { - FREE(program); - return NULL; - } } else { assert(cso->ir_type == PIPE_SHADER_IR_NIR); sel->nir = (struct nir_shader *) cso->prog; @@ -719,8 +701,8 @@ static bool si_upload_compute_input(struct si_context *sctx, return true; } -static void si_setup_tgsi_user_data(struct si_context *sctx, - const struct pipe_grid_info *info) +static void si_setup_nir_user_data(struct si_context *sctx, + const struct pipe_grid_info *info) { struct si_compute *program = sctx->cs_shader_state.program; struct si_shader_selector *sel = &program->sel; @@ -944,7 +926,7 @@ static void si_launch_grid( } if (program->ir_type != PIPE_SHADER_IR_NATIVE) - si_setup_tgsi_user_data(sctx, info); + si_setup_nir_user_data(sctx, info); si_emit_dispatch_packets(sctx, info); @@ -977,7 +959,6 @@ void si_destroy_compute(struct si_compute *program) FREE(program->global_buffers); si_shader_destroy(&program->shader); - FREE(program->sel.tokens); ralloc_free(program->sel.nir); FREE(program); } diff --git a/src/gallium/drivers/radeonsi/si_debug_options.h b/src/gallium/drivers/radeonsi/si_debug_options.h index 7ba835acf84..9a0dd0c9f78 100644 --- a/src/gallium/drivers/radeonsi/si_debug_options.h +++ b/src/gallium/drivers/radeonsi/si_debug_options.h @@ -1,5 +1,4 @@ OPT_BOOL(clear_db_cache_before_clear, false, "Clear DB cache before fast depth clear") -OPT_BOOL(enable_nir, true, "Enable NIR") OPT_BOOL(aux_debug, false, "Generate ddebug_dumps for the auxiliary context") OPT_BOOL(sync_compile, false, "Always compile synchronously (will cause stalls)") OPT_BOOL(dump_shader_binary, false, "Dump shader binary as part of ddebug_dumps") diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index c34c8649bcf..1adbafda53a 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -159,6 +159,9 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_IMAGE_LOAD_FORMATTED: case PIPE_CAP_PREFER_COMPUTE_FOR_MULTIMEDIA: case PIPE_CAP_TGSI_DIV: + case PIPE_CAP_PACKED_UNIFORMS: + case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: + case PIPE_CAP_GL_SPIRV: return 1; case PIPE_CAP_QUERY_SO_OVERFLOW: @@ -195,7 +198,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: if (!sscreen->info.has_indirect_compute_dispatch) return 420; - return sscreen->options.enable_nir ? 460 : 450; + return 460; case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET: /* Optimal number for good TexSubImage performance on Polaris10. */ @@ -214,15 +217,6 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return sscreen->info.has_sparse_vm_mappings ? RADEON_SPARSE_PAGE_SIZE : 0; - case PIPE_CAP_PACKED_UNIFORMS: - case PIPE_CAP_SHADER_SAMPLES_IDENTICAL: - case PIPE_CAP_GL_SPIRV: - return sscreen->options.enable_nir; - - case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: - if (sscreen->options.enable_nir) - return 0; - return 1; /* Unsupported features. */ case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY: @@ -246,6 +240,7 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE: case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS: case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS: + case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF: return 0; case PIPE_CAP_FENCE_SIGNAL: @@ -395,14 +390,14 @@ static int si_get_shader_param(struct pipe_screen* pscreen, int ir = 1 << PIPE_SHADER_IR_NATIVE; if (sscreen->info.has_indirect_compute_dispatch) - ir |= 1 << PIPE_SHADER_IR_TGSI; + ir |= 1 << PIPE_SHADER_IR_NIR; return ir; } case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: { uint64_t max_const_buffer_size; - pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_TGSI, + pscreen->get_compute_param(pscreen, PIPE_SHADER_IR_NIR, PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, &max_const_buffer_size); return MIN2(max_const_buffer_size, INT_MAX); @@ -444,13 +439,9 @@ static int si_get_shader_param(struct pipe_screen* pscreen, case PIPE_SHADER_CAP_MAX_SHADER_IMAGES: return SI_NUM_IMAGES; case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT: - if (sscreen->options.enable_nir) - return 0; - return 32; + return 0; case PIPE_SHADER_CAP_PREFERRED_IR: - if (sscreen->options.enable_nir) - return PIPE_SHADER_IR_NIR; - return PIPE_SHADER_IR_TGSI; + return PIPE_SHADER_IR_NIR; case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD: return 4; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 2e3232d1cf0..755c768fb0b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -660,7 +660,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, } uint64_t max_threads_per_block; - screen->get_compute_param(screen, PIPE_SHADER_IR_TGSI, + screen->get_compute_param(screen, PIPE_SHADER_IR_NIR, PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK, &max_threads_per_block); @@ -910,10 +910,6 @@ static void si_disk_cache_create(struct si_screen *sscreen) /* These flags affect shader compilation. */ #define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL)) uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS; - /* Reserve left-most bit for tgsi/nir selector */ - assert(!(shader_debug_flags & (1u << 31))); - shader_debug_flags |= (uint32_t) - ((sscreen->options.enable_nir & 0x1) << 31); /* Add the high bits of 32-bit addresses, which affects * how 32-bit addresses are expanded to 64 bits. diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index e6678e026cd..65a070b4570 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -25,14 +25,9 @@ #include #include "util/u_memory.h" -#include "util/u_string.h" -#include "tgsi/tgsi_build.h" #include "tgsi/tgsi_strings.h" -#include "tgsi/tgsi_util.h" -#include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_from_mesa.h" -#include "ac_binary.h" #include "ac_exp_param.h" #include "ac_shader_util.h" #include "ac_rtld.h" @@ -50,15 +45,7 @@ static const char scratch_rsrc_dword0_symbol[] = static const char scratch_rsrc_dword1_symbol[] = "SCRATCH_RSRC_DWORD1"; -static void si_init_shader_ctx(struct si_shader_context *ctx, - struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - unsigned wave_size, - bool nir); - -static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data); +static void si_llvm_emit_barrier(struct si_shader_context *ctx); static void si_dump_shader_key(const struct si_shader *shader, FILE *f); @@ -596,15 +583,6 @@ void si_llvm_load_input_vs( out[i] = ac_to_float(&ctx->ac, fetches[i]); } -static void declare_input_vs( - struct si_shader_context *ctx, - unsigned input_index, - const struct tgsi_full_declaration *decl, - LLVMValueRef out[4]) -{ - si_llvm_load_input_vs(ctx, input_index, out); -} - LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle) { @@ -626,53 +604,6 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, } } -/** - * Return the value of tgsi_ind_register for indexing. - * This is the indirect index with the constant offset added to it. - */ -LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, - const struct tgsi_ind_register *ind, - unsigned addr_mul, - int rel_index) -{ - LLVMValueRef result; - - if (ind->File == TGSI_FILE_ADDRESS) { - result = ctx->addrs[ind->Index][ind->Swizzle]; - result = LLVMBuildLoad(ctx->ac.builder, result, ""); - } else { - struct tgsi_full_src_register src = {}; - - src.Register.File = ind->File; - src.Register.Index = ind->Index; - - /* Set the second index to 0 for constants. */ - if (ind->File == TGSI_FILE_CONSTANT) - src.Register.Dimension = 1; - - result = ctx->bld_base.emit_fetch_funcs[ind->File](&ctx->bld_base, &src, - TGSI_TYPE_SIGNED, - ind->Swizzle); - result = ac_to_integer(&ctx->ac, result); - } - - return ac_build_imad(&ctx->ac, result, LLVMConstInt(ctx->i32, addr_mul, 0), - LLVMConstInt(ctx->i32, rel_index, 0)); -} - -/** - * Like si_get_indirect_index, but restricts the return value to a (possibly - * undefined) value inside [0..num). - */ -LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, - const struct tgsi_ind_register *ind, - int rel_index, unsigned num) -{ - LLVMValueRef result = si_get_indirect_index(ctx, ind, 1, rel_index); - - return si_llvm_bound_index(ctx, result, num); -} - static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride, LLVMValueRef base_addr, @@ -701,78 +632,6 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context LLVMConstInt(ctx->i32, param * 4, 0), ""); } -/** - * Calculate a dword address given an input or output register and a stride. - */ -static LLVMValueRef get_dw_address(struct si_shader_context *ctx, - const struct tgsi_full_dst_register *dst, - const struct tgsi_full_src_register *src, - LLVMValueRef vertex_dw_stride, - LLVMValueRef base_addr) -{ - struct tgsi_shader_info *info = &ctx->shader->selector->info; - ubyte *name, *index, *array_first; - int input_index; - struct tgsi_full_dst_register reg; - LLVMValueRef vertex_index = NULL; - LLVMValueRef ind_index = NULL; - - /* Set the register description. The address computation is the same - * for sources and destinations. */ - if (src) { - reg.Register.File = src->Register.File; - reg.Register.Index = src->Register.Index; - reg.Register.Indirect = src->Register.Indirect; - reg.Register.Dimension = src->Register.Dimension; - reg.Indirect = src->Indirect; - reg.Dimension = src->Dimension; - reg.DimIndirect = src->DimIndirect; - } else - reg = *dst; - - /* If the register is 2-dimensional (e.g. an array of vertices - * in a primitive), calculate the base address of the vertex. */ - if (reg.Register.Dimension) { - if (reg.Dimension.Indirect) - vertex_index = si_get_indirect_index(ctx, ®.DimIndirect, - 1, reg.Dimension.Index); - else - vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0); - } - - /* Get information about the register. */ - if (reg.Register.File == TGSI_FILE_INPUT) { - name = info->input_semantic_name; - index = info->input_semantic_index; - array_first = info->input_array_first; - } else if (reg.Register.File == TGSI_FILE_OUTPUT) { - name = info->output_semantic_name; - index = info->output_semantic_index; - array_first = info->output_array_first; - } else { - assert(0); - return NULL; - } - - if (reg.Register.Indirect) { - /* Add the relative address of the element. */ - if (reg.Indirect.ArrayID) - input_index = array_first[reg.Indirect.ArrayID]; - else - input_index = reg.Register.Index; - - ind_index = si_get_indirect_index(ctx, ®.Indirect, - 1, reg.Register.Index - input_index); - } else { - input_index = reg.Register.Index; - } - - return get_dw_address_from_generic_indices(ctx, vertex_dw_stride, - base_addr, vertex_index, - ind_index, name[input_index], - index[input_index]); -} - /* The offchip buffer layout for TCS->TES is * * - attribute 0 of patch 0 vertex 0 @@ -854,65 +713,24 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices( vertex_index, param_index); } -static LLVMValueRef get_tcs_tes_buffer_address_from_reg( - struct si_shader_context *ctx, - const struct tgsi_full_dst_register *dst, - const struct tgsi_full_src_register *src) +static LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, + LLVMTypeRef type, + LLVMValueRef val1, + LLVMValueRef val2) { - struct tgsi_shader_info *info = &ctx->shader->selector->info; - ubyte *name, *index, *array_first; - struct tgsi_full_src_register reg; - LLVMValueRef vertex_index = NULL; - LLVMValueRef param_index = NULL; - unsigned param_base; - - reg = src ? *src : tgsi_full_src_register_from_dst(dst); - - if (reg.Register.Dimension) { - if (reg.Dimension.Indirect) - vertex_index = si_get_indirect_index(ctx, ®.DimIndirect, - 1, reg.Dimension.Index); - else - vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0); - } - - /* Get information about the register. */ - if (reg.Register.File == TGSI_FILE_INPUT) { - name = info->input_semantic_name; - index = info->input_semantic_index; - array_first = info->input_array_first; - } else if (reg.Register.File == TGSI_FILE_OUTPUT) { - name = info->output_semantic_name; - index = info->output_semantic_index; - array_first = info->output_array_first; - } else { - assert(0); - return NULL; - } - - if (reg.Register.Indirect) { - if (reg.Indirect.ArrayID) - param_base = array_first[reg.Indirect.ArrayID]; - else - param_base = reg.Register.Index; - - param_index = si_get_indirect_index(ctx, ®.Indirect, - 1, reg.Register.Index - param_base); - } else { - param_base = reg.Register.Index; - } - - return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, - param_index, name[param_base], - index[param_base]); + LLVMValueRef values[2] = { + ac_to_integer(&ctx->ac, val1), + ac_to_integer(&ctx->ac, val2), + }; + LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2); + return LLVMBuildBitCast(ctx->ac.builder, result, type, ""); } -static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, +static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle, LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base, bool can_speculate) { - struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef value, value2; LLVMTypeRef vec_type = LLVMVectorType(type, 4); @@ -938,7 +756,7 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc, can_speculate, false); - return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); + return si_build_gather_64bit(ctx, type, value, value2); } /** @@ -948,30 +766,28 @@ static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base, * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4 * \param dw_addr address in dwords */ -static LLVMValueRef lshs_lds_load(struct lp_build_tgsi_context *bld_base, - LLVMTypeRef type, unsigned swizzle, - LLVMValueRef dw_addr) +static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, + LLVMTypeRef type, unsigned swizzle, + LLVMValueRef dw_addr) { - struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef value; if (swizzle == ~0) { - LLVMValueRef values[TGSI_NUM_CHANNELS]; + LLVMValueRef values[4]; - for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) - values[chan] = lshs_lds_load(bld_base, type, chan, dw_addr); + for (unsigned chan = 0; chan < 4; chan++) + values[chan] = lshs_lds_load(ctx, type, chan, dw_addr); - return ac_build_gather_values(&ctx->ac, values, - TGSI_NUM_CHANNELS); + return ac_build_gather_values(&ctx->ac, values, 4); } /* Split 64-bit loads. */ if (llvm_type_is_64bit(ctx, type)) { LLVMValueRef lo, hi; - lo = lshs_lds_load(bld_base, ctx->i32, swizzle, dw_addr); - hi = lshs_lds_load(bld_base, ctx->i32, swizzle + 1, dw_addr); - return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi); + lo = lshs_lds_load(ctx, ctx->i32, swizzle, dw_addr); + hi = lshs_lds_load(ctx, ctx->i32, swizzle + 1, dw_addr); + return si_build_gather_64bit(ctx, type, lo, hi); } dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, @@ -1049,21 +865,6 @@ static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, return ac_build_gather_values(&ctx->ac, desc, 4); } -static LLVMValueRef fetch_input_tcs( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef dw_addr, stride; - unsigned swizzle = swizzle_in & 0xffff; - stride = get_tcs_in_vertex_dw_stride(ctx); - dw_addr = get_tcs_in_current_patch_offset(ctx); - dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr); - - return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr); -} - static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, @@ -1079,7 +880,6 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, { struct si_shader_context *ctx = si_shader_context_from_abi(abi); struct tgsi_shader_info *info = &ctx->shader->selector->info; - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMValueRef dw_addr, stride; ubyte name, index; @@ -1125,49 +925,12 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, offset *= 2; offset += component; - value[i + component] = lshs_lds_load(bld_base, type, offset, dw_addr); + value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr); } return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } -static LLVMValueRef fetch_output_tcs( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef dw_addr, stride; - unsigned swizzle = (swizzle_in & 0xffff); - - if (reg->Register.Dimension) { - stride = get_tcs_out_vertex_dw_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr); - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr); - } - - return lshs_lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr); -} - -static LLVMValueRef fetch_input_tes( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef base, addr; - unsigned swizzle = (swizzle_in & 0xffff); - - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); - addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg); - - return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, - ctx->tess_offchip_ring, base, addr, true); -} - LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, @@ -1226,110 +989,13 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, } offset += component; - value[i + component] = buffer_load(&ctx->bld_base, type, offset, + value[i + component] = buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true); } return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } -static void store_output_tcs(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - unsigned index, - LLVMValueRef dst[4]) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_dst_register *reg = &inst->Dst[index]; - const struct tgsi_shader_info *sh_info = &ctx->shader->selector->info; - unsigned chan_index; - LLVMValueRef dw_addr, stride; - LLVMValueRef buffer, base, buf_addr; - LLVMValueRef values[4]; - bool skip_lds_store; - bool is_tess_factor = false, is_tess_inner = false; - - /* Only handle per-patch and per-vertex outputs here. - * Vectors will be lowered to scalars and this function will be called again. - */ - if (reg->Register.File != TGSI_FILE_OUTPUT || - (dst[0] && LLVMGetTypeKind(LLVMTypeOf(dst[0])) == LLVMVectorTypeKind)) { - si_llvm_emit_store(bld_base, inst, info, index, dst); - return; - } - - if (reg->Register.Dimension) { - stride = get_tcs_out_vertex_dw_stride(ctx); - dw_addr = get_tcs_out_current_patch_offset(ctx); - dw_addr = get_dw_address(ctx, reg, NULL, stride, dw_addr); - skip_lds_store = !sh_info->reads_pervertex_outputs; - } else { - dw_addr = get_tcs_out_current_patch_data_offset(ctx); - dw_addr = get_dw_address(ctx, reg, NULL, NULL, dw_addr); - skip_lds_store = !sh_info->reads_perpatch_outputs; - - if (!reg->Register.Indirect) { - int name = sh_info->output_semantic_name[reg->Register.Index]; - - /* Always write tess factors into LDS for the TCS epilog. */ - if (name == TGSI_SEMANTIC_TESSINNER || - name == TGSI_SEMANTIC_TESSOUTER) { - /* The epilog doesn't read LDS if invocation 0 defines tess factors. */ - skip_lds_store = !sh_info->reads_tessfactor_outputs && - ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs; - is_tess_factor = true; - is_tess_inner = name == TGSI_SEMANTIC_TESSINNER; - } - } - } - - buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); - - base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); - buf_addr = get_tcs_tes_buffer_address_from_reg(ctx, reg, NULL); - - uint32_t writemask = reg->Register.WriteMask; - while (writemask) { - chan_index = u_bit_scan(&writemask); - LLVMValueRef value = dst[chan_index]; - - if (inst->Instruction.Saturate) - value = ac_build_clamp(&ctx->ac, value); - - /* Skip LDS stores if there is no LDS read of this output. */ - if (!skip_lds_store) - lshs_lds_store(ctx, chan_index, dw_addr, value); - - value = ac_to_integer(&ctx->ac, value); - values[chan_index] = value; - - if (reg->Register.WriteMask != 0xF && !is_tess_factor) { - ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, - buf_addr, base, - 4 * chan_index, ac_glc); - } - - /* Write tess factors into VGPRs for the epilog. */ - if (is_tess_factor && - ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) { - if (!is_tess_inner) { - LLVMBuildStore(ctx->ac.builder, value, /* outer */ - ctx->invoc0_tess_factors[chan_index]); - } else if (chan_index < 2) { - LLVMBuildStore(ctx->ac.builder, value, /* inner */ - ctx->invoc0_tess_factors[4 + chan_index]); - } - } - } - - if (reg->Register.WriteMask == 0xF && !is_tess_factor) { - LLVMValueRef value = ac_build_gather_values(&ctx->ac, - values, 4); - ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr, - base, 0, ac_glc); - } -} - static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_variable *var, LLVMValueRef vertex_index, @@ -1452,14 +1118,13 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi, } } -LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, - unsigned input_index, - unsigned vtx_offset_param, - LLVMTypeRef type, - unsigned swizzle) +static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, + unsigned input_index, + unsigned vtx_offset_param, + LLVMTypeRef type, + unsigned swizzle) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; struct si_shader *shader = ctx->shader; LLVMValueRef vtx_offset, soffset; struct tgsi_shader_info *info = &shader->selector->info; @@ -1512,14 +1177,13 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, /* GFX6: input load from the ESGS ring in memory. */ if (swizzle == ~0) { - LLVMValueRef values[TGSI_NUM_CHANNELS]; + LLVMValueRef values[4]; unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (chan = 0; chan < 4; chan++) { values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param, type, chan); } - return ac_build_gather_values(&ctx->ac, values, - TGSI_NUM_CHANNELS); + return ac_build_gather_values(&ctx->ac, values, 4); } /* Get the vertex offset parameter on GFX6. */ @@ -1540,7 +1204,7 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0, vtx_offset, soffset, 0, ac_glc, true, false); - return si_llvm_emit_fetch_64bit(bld_base, type, value, value2); + return si_build_gather_64bit(ctx, type, value, value2); } return LLVMBuildBitCast(ctx->ac.builder, value, type, ""); } @@ -1570,58 +1234,6 @@ static LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi, return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); } -static LLVMValueRef fetch_input_gs( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct tgsi_shader_info *info = &ctx->shader->selector->info; - unsigned swizzle = swizzle_in & 0xffff; - - unsigned semantic_name = info->input_semantic_name[reg->Register.Index]; - if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) - return si_get_primitive_id(ctx, swizzle); - - if (!reg->Register.Dimension) - return NULL; - - return si_llvm_load_input_gs(&ctx->abi, reg->Register.Index, - reg->Dimension.Index, - tgsi2llvmtype(bld_base, type), - swizzle); -} - -static int lookup_interp_param_index(unsigned interpolate, unsigned location) -{ - switch (interpolate) { - case TGSI_INTERPOLATE_CONSTANT: - return 0; - - case TGSI_INTERPOLATE_LINEAR: - if (location == TGSI_INTERPOLATE_LOC_SAMPLE) - return SI_PARAM_LINEAR_SAMPLE; - else if (location == TGSI_INTERPOLATE_LOC_CENTROID) - return SI_PARAM_LINEAR_CENTROID; - else - return SI_PARAM_LINEAR_CENTER; - break; - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_PERSPECTIVE: - if (location == TGSI_INTERPOLATE_LOC_SAMPLE) - return SI_PARAM_PERSP_SAMPLE; - else if (location == TGSI_INTERPOLATE_LOC_CENTROID) - return SI_PARAM_PERSP_CENTROID; - else - return SI_PARAM_PERSP_CENTER; - break; - default: - fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); - return -1; - } -} - static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned attr_index, unsigned chan, LLVMValueRef prim_mask, @@ -1654,9 +1266,8 @@ static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, * @param face SI_PARAM_FRONT_FACE * @param result the return value (4 components) */ -static void interp_fs_input(struct si_shader_context *ctx, +static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index, - unsigned semantic_name, unsigned semantic_index, unsigned num_interp_inputs, unsigned colors_read_mask, @@ -1693,8 +1304,7 @@ static void interp_fs_input(struct si_shader_context *ctx, ctx->i32_1, ""); } - if (semantic_name == TGSI_SEMANTIC_COLOR && - ctx->shader->key.part.ps.prolog.color_two_side) { + if (ctx->shader->key.part.ps.prolog.color_two_side) { LLVMValueRef is_face_positive; /* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1", @@ -1707,7 +1317,7 @@ static void interp_fs_input(struct si_shader_context *ctx, is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, face, ctx->i32_0, ""); - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (chan = 0; chan < 4; chan++) { LLVMValueRef front, back; front = si_build_fs_interp(ctx, @@ -1723,14 +1333,8 @@ static void interp_fs_input(struct si_shader_context *ctx, back, ""); } - } else if (semantic_name == TGSI_SEMANTIC_FOG) { - result[0] = si_build_fs_interp(ctx, input_index, - 0, prim_mask, i, j); - result[1] = - result[2] = LLVMConstReal(ctx->f32, 0.0f); - result[3] = LLVMConstReal(ctx->f32, 1.0f); } else { - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (chan = 0; chan < 4; chan++) { result[chan] = si_build_fs_interp(ctx, input_index, chan, prim_mask, i, j); @@ -1738,60 +1342,6 @@ static void interp_fs_input(struct si_shader_context *ctx, } } -void si_llvm_load_input_fs( - struct si_shader_context *ctx, - unsigned input_index, - LLVMValueRef out[4]) -{ - struct si_shader *shader = ctx->shader; - struct tgsi_shader_info *info = &shader->selector->info; - LLVMValueRef main_fn = ctx->main_fn; - LLVMValueRef interp_param = NULL; - int interp_param_idx; - enum tgsi_semantic semantic_name = info->input_semantic_name[input_index]; - unsigned semantic_index = info->input_semantic_index[input_index]; - enum tgsi_interpolate_mode interp_mode = info->input_interpolate[input_index]; - enum tgsi_interpolate_loc interp_loc = info->input_interpolate_loc[input_index]; - - /* Get colors from input VGPRs (set by the prolog). */ - if (semantic_name == TGSI_SEMANTIC_COLOR) { - unsigned colors_read = shader->selector->info.colors_read; - unsigned mask = colors_read >> (semantic_index * 4); - unsigned offset = SI_PARAM_POS_FIXED_PT + 1 + - (semantic_index ? util_bitcount(colors_read & 0xf) : 0); - LLVMValueRef undef = LLVMGetUndef(ctx->f32); - - out[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef; - out[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef; - out[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef; - out[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef; - return; - } - - interp_param_idx = lookup_interp_param_index(interp_mode, interp_loc); - if (interp_param_idx == -1) - return; - else if (interp_param_idx) { - interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx); - } - - interp_fs_input(ctx, input_index, semantic_name, - semantic_index, 0, /* this param is unused */ - shader->selector->info.colors_read, interp_param, - ac_get_arg(&ctx->ac, ctx->args.prim_mask), - LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE), - &out[0]); -} - -static void declare_input_fs( - struct si_shader_context *ctx, - unsigned input_index, - const struct tgsi_full_declaration *decl, - LLVMValueRef out[4]) -{ - si_llvm_load_input_fs(ctx, input_index, out); -} - LLVMValueRef si_get_sample_id(struct si_shader_context *ctx) { return si_unpack_param(ctx, ctx->args.ancillary, 8, 4); @@ -1913,7 +1463,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx, addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, LLVMConstInt(ctx->i32, param, 0)); - return buffer_load(&ctx->bld_base, ctx->f32, + return buffer_load(ctx, ctx->f32, ~0, ctx->tess_offchip_ring, base, addr, true); } @@ -1982,211 +1532,6 @@ static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi) unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN"); } -void si_load_system_value(struct si_shader_context *ctx, - unsigned index, - const struct tgsi_full_declaration *decl) -{ - LLVMValueRef value = 0; - - assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES); - - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_INSTANCEID: - value = ctx->abi.instance_id; - break; - - case TGSI_SEMANTIC_VERTEXID: - value = LLVMBuildAdd(ctx->ac.builder, - ctx->abi.vertex_id, - ac_get_arg(&ctx->ac, ctx->args.base_vertex), ""); - break; - - case TGSI_SEMANTIC_VERTEXID_NOBASE: - /* Unused. Clarify the meaning in indexed vs. non-indexed - * draws if this is ever used again. */ - assert(false); - break; - - case TGSI_SEMANTIC_BASEVERTEX: - value = get_base_vertex(&ctx->abi); - break; - - case TGSI_SEMANTIC_BASEINSTANCE: - value = ac_get_arg(&ctx->ac, ctx->args.start_instance); - break; - - case TGSI_SEMANTIC_DRAWID: - value = ac_get_arg(&ctx->ac, ctx->args.draw_id); - break; - - case TGSI_SEMANTIC_INVOCATIONID: - if (ctx->type == PIPE_SHADER_TESS_CTRL) { - value = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); - } else if (ctx->type == PIPE_SHADER_GEOMETRY) { - if (ctx->screen->info.chip_class >= GFX10) { - value = LLVMBuildAnd(ctx->ac.builder, - ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id), - LLVMConstInt(ctx->i32, 127, 0), ""); - } else { - value = ac_get_arg(&ctx->ac, ctx->args.gs_invocation_id); - } - } else { - assert(!"INVOCATIONID not implemented"); - } - break; - - case TGSI_SEMANTIC_POSITION: - { - LLVMValueRef pos[4] = { - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT), - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT), - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT), - ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT)), - }; - value = ac_build_gather_values(&ctx->ac, pos, 4); - break; - } - - case TGSI_SEMANTIC_FACE: - value = ac_get_arg(&ctx->ac, ctx->args.front_face); - break; - - case TGSI_SEMANTIC_SAMPLEID: - value = si_get_sample_id(ctx); - break; - - case TGSI_SEMANTIC_SAMPLEPOS: { - LLVMValueRef pos[4] = { - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT), - LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT), - LLVMConstReal(ctx->f32, 0), - LLVMConstReal(ctx->f32, 0) - }; - pos[0] = ac_build_fract(&ctx->ac, pos[0], 32); - pos[1] = ac_build_fract(&ctx->ac, pos[1], 32); - value = ac_build_gather_values(&ctx->ac, pos, 4); - break; - } - - case TGSI_SEMANTIC_SAMPLEMASK: - /* This can only occur with the OpenGL Core profile, which - * doesn't support smoothing. - */ - value = LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE); - break; - - case TGSI_SEMANTIC_TESSCOORD: - value = si_load_tess_coord(&ctx->abi); - break; - - case TGSI_SEMANTIC_VERTICESIN: - value = si_load_patch_vertices_in(&ctx->abi); - break; - - case TGSI_SEMANTIC_TESSINNER: - case TGSI_SEMANTIC_TESSOUTER: - value = load_tess_level(ctx, decl->Semantic.Name); - break; - - case TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL: - case TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL: - value = load_tess_level_default(ctx, decl->Semantic.Name); - break; - - case TGSI_SEMANTIC_PRIMID: - value = si_get_primitive_id(ctx, 0); - break; - - case TGSI_SEMANTIC_GRID_SIZE: - value = ac_get_arg(&ctx->ac, ctx->args.num_work_groups); - break; - - case TGSI_SEMANTIC_BLOCK_SIZE: - value = get_block_size(&ctx->abi); - break; - - case TGSI_SEMANTIC_BLOCK_ID: - { - LLVMValueRef values[3]; - - for (int i = 0; i < 3; i++) { - values[i] = ctx->i32_0; - if (ctx->args.workgroup_ids[i].used) { - values[i] = ac_get_arg(&ctx->ac, ctx->args.workgroup_ids[i]); - } - } - value = ac_build_gather_values(&ctx->ac, values, 3); - break; - } - - case TGSI_SEMANTIC_THREAD_ID: - value = ac_get_arg(&ctx->ac, ctx->args.local_invocation_ids); - break; - - case TGSI_SEMANTIC_HELPER_INVOCATION: - value = ac_build_load_helper_invocation(&ctx->ac); - break; - - case TGSI_SEMANTIC_SUBGROUP_SIZE: - value = LLVMConstInt(ctx->i32, ctx->ac.wave_size, 0); - break; - - case TGSI_SEMANTIC_SUBGROUP_INVOCATION: - value = ac_get_thread_id(&ctx->ac); - break; - - case TGSI_SEMANTIC_SUBGROUP_EQ_MASK: - { - LLVMValueRef id = ac_get_thread_id(&ctx->ac); - if (ctx->ac.wave_size == 64) - id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); - value = LLVMBuildShl(ctx->ac.builder, - LLVMConstInt(ctx->ac.iN_wavemask, 1, 0), id, ""); - if (ctx->ac.wave_size == 32) - value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, ""); - value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); - break; - } - - case TGSI_SEMANTIC_SUBGROUP_GE_MASK: - case TGSI_SEMANTIC_SUBGROUP_GT_MASK: - case TGSI_SEMANTIC_SUBGROUP_LE_MASK: - case TGSI_SEMANTIC_SUBGROUP_LT_MASK: - { - LLVMValueRef id = ac_get_thread_id(&ctx->ac); - if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_GT_MASK || - decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK) { - /* All bits set except LSB */ - value = LLVMConstInt(ctx->ac.iN_wavemask, -2, 0); - } else { - /* All bits set */ - value = LLVMConstInt(ctx->ac.iN_wavemask, -1, 0); - } - if (ctx->ac.wave_size == 64) - id = LLVMBuildZExt(ctx->ac.builder, id, ctx->i64, ""); - value = LLVMBuildShl(ctx->ac.builder, value, id, ""); - if (decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LE_MASK || - decl->Semantic.Name == TGSI_SEMANTIC_SUBGROUP_LT_MASK) - value = LLVMBuildNot(ctx->ac.builder, value, ""); - if (ctx->ac.wave_size == 32) - value = LLVMBuildZExt(ctx->ac.builder, value, ctx->i64, ""); - value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->v2i32, ""); - break; - } - - case TGSI_SEMANTIC_CS_USER_DATA_AMD: - value = ac_get_arg(&ctx->ac, ctx->cs_user_data); - break; - - default: - assert(!"unknown system value"); - return; - } - - ctx->system_values[index] = value; -} - void si_declare_compute_memory(struct si_shader_context *ctx) { struct si_shader_selector *sel = ctx->shader->selector; @@ -2206,15 +1551,6 @@ void si_declare_compute_memory(struct si_shader_context *ctx) ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, ""); } -void si_tgsi_declare_compute_memory(struct si_shader_context *ctx, - const struct tgsi_full_declaration *decl) -{ - assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED); - assert(decl->Range.First == decl->Range.Last); - - si_declare_compute_memory(ctx); -} - static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx) { LLVMValueRef ptr = @@ -2256,15 +1592,6 @@ static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *c return ac_build_gather_values(&ctx->ac, desc_elems, 4); } -static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i) -{ - LLVMValueRef list_ptr = ac_get_arg(&ctx->ac, - ctx->const_and_shader_buffers); - - return ac_build_load_to_sgpr(&ctx->ac, list_ptr, - LLVMConstInt(ctx->i32, si_get_constbuf_slot(i), 0)); -} - static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); @@ -2299,72 +1626,6 @@ load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write) return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index); } -static LLVMValueRef fetch_constant( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct si_shader_selector *sel = ctx->shader->selector; - const struct tgsi_ind_register *ireg = ®->Indirect; - unsigned buf, idx; - unsigned swizzle = swizzle_in & 0xffff; - - LLVMValueRef addr, bufp; - - if (swizzle_in == LP_CHAN_ALL) { - unsigned chan; - LLVMValueRef values[4]; - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) - values[chan] = fetch_constant(bld_base, reg, type, chan); - - return ac_build_gather_values(&ctx->ac, values, 4); - } - - /* Split 64-bit loads. */ - if (tgsi_type_is_64bit(type)) { - LLVMValueRef lo, hi; - - lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle); - hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, (swizzle_in >> 16)); - return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - lo, hi); - } - - idx = reg->Register.Index * 4 + swizzle; - if (reg->Register.Indirect) { - addr = si_get_indirect_index(ctx, ireg, 16, idx * 4); - } else { - addr = LLVMConstInt(ctx->i32, idx * 4, 0); - } - - /* Fast path when user data SGPRs point to constant buffer 0 directly. */ - if (sel->info.const_buffers_declared == 1 && - sel->info.shader_buffers_declared == 0) { - LLVMValueRef desc = load_const_buffer_desc_fast_path(ctx); - LLVMValueRef result = buffer_load_const(ctx, desc, addr); - return bitcast(bld_base, type, result); - } - - assert(reg->Register.Dimension); - buf = reg->Dimension.Index; - - if (reg->Dimension.Indirect) { - LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers); - LLVMValueRef index; - index = si_get_bounded_indirect_index(ctx, ®->DimIndirect, - reg->Dimension.Index, - ctx->num_const_buffers); - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), ""); - bufp = ac_build_load_to_sgpr(&ctx->ac, ptr, index); - } else - bufp = load_const_buffer_desc(ctx, buf); - - return bitcast(bld_base, type, buffer_load_const(ctx, bufp, addr)); -} - /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_export_args(struct si_shader_context *ctx, LLVMValueRef *values, @@ -2495,11 +1756,8 @@ static void si_llvm_init_export_args(struct si_shader_context *ctx, } } -static void si_alpha_test(struct lp_build_tgsi_context *bld_base, - LLVMValueRef alpha) +static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha) { - struct si_shader_context *ctx = si_shader_context(bld_base); - if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) { static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = { [PIPE_FUNC_LESS] = LLVMRealOLT, @@ -2522,11 +1780,10 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } } -static LLVMValueRef si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base, +static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx, LLVMValueRef alpha, unsigned samplemask_param) { - struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef coverage; /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */ @@ -2569,8 +1826,8 @@ static void si_llvm_emit_clipvertex(struct si_shader_context *ctx, args->out[3] = LLVMConstReal(ctx->f32, 0.0f); /* Compute dot products of position and user clip plane vectors */ - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) { + for (chan = 0; chan < 4; chan++) { + for (const_chan = 0; const_chan < 4; const_chan++) { LLVMValueRef addr = LLVMConstInt(ctx->i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0); @@ -3030,9 +2287,8 @@ void si_llvm_export_vs(struct si_shader_context *ctx, * Forward all outputs from the vertex shader to the TES. This is only used * for the fixed function TCS. */ -static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) +static void si_copy_tcs_inputs(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context(bld_base); LLVMValueRef invocation_id, buffer, buffer_offset; LLVMValueRef lds_vertex_stride, lds_base; uint64_t inputs; @@ -3059,21 +2315,20 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base) invocation_id, LLVMConstInt(ctx->i32, i, 0)); - LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr); + LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr); ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0, ac_glc); } } -static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, +static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id, LLVMValueRef invocation_id, LLVMValueRef tcs_out_current_patch_data_offset, LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2]) { - struct si_shader_context *ctx = si_shader_context(bld_base); struct si_shader *shader = ctx->shader; unsigned tess_inner_index, tess_outer_index; LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; @@ -3082,7 +2337,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, /* Add a barrier before loading tess factors from LDS. */ if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) - si_llvm_emit_barrier(NULL, bld_base, NULL); + si_llvm_emit_barrier(ctx); /* Do this only for invocation 0, because the tess levels are per-patch, * not per-vertex. @@ -3144,11 +2399,11 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base, for (i = 0; i < outer_comps; i++) { outer[i] = out[i] = - lshs_lds_load(bld_base, ctx->ac.i32, i, lds_outer); + lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer); } for (i = 0; i < inner_comps; i++) { inner[i] = out[outer_comps+i] = - lshs_lds_load(bld_base, ctx->ac.i32, i, lds_inner); + lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner); } } @@ -3279,11 +2534,10 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, LLVMValueRef *addrs) { struct si_shader_context *ctx = si_shader_context_from_abi(abi); - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; - si_copy_tcs_inputs(bld_base); + si_copy_tcs_inputs(ctx); rel_patch_id = get_rel_patch_id(ctx); invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); @@ -3595,12 +2849,6 @@ static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi, emit_gs_epilogue(ctx); } -static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_gs_epilogue(ctx); -} - static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs) @@ -3677,24 +2925,15 @@ static void si_llvm_emit_prim_discard_cs_epilogue(struct ac_shader_abi *abi, ctx->return_value = ret; } -static void si_tgsi_emit_epilogue(struct lp_build_tgsi_context *bld_base) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - ctx->abi.emit_outputs(&ctx->abi, RADEON_LLVM_MAX_OUTPUTS, - &ctx->outputs[0][0]); -} - struct si_ps_exports { unsigned num; struct ac_export_args args[10]; }; -static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, +static void si_export_mrt_z(struct si_shader_context *ctx, LLVMValueRef depth, LLVMValueRef stencil, LLVMValueRef samplemask, struct si_ps_exports *exp) { - struct si_shader_context *ctx = si_shader_context(bld_base); struct ac_export_args args; ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args); @@ -3702,12 +2941,11 @@ static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base, memcpy(&exp->args[exp->num++], &args, sizeof(args)); } -static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, +static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index, unsigned samplemask_param, bool is_last, struct si_ps_exports *exp) { - struct si_shader_context *ctx = si_shader_context(bld_base); int i; /* Clamp color */ @@ -3722,11 +2960,11 @@ static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base, /* Alpha test */ if (index == 0 && ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS) - si_alpha_test(bld_base, color[3]); + si_alpha_test(ctx, color[3]); /* Line & polygon smoothing */ if (ctx->shader->key.part.ps.epilog.poly_line_smoothing) - color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3], + color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param); /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */ @@ -3873,345 +3111,6 @@ static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, ctx->return_value = ret; } -static void membar_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef src0 = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0); - unsigned flags = LLVMConstIntGetZExtValue(src0); - unsigned wait_flags = 0; - - if (flags & TGSI_MEMBAR_THREAD_GROUP) - wait_flags |= AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE; - - if (flags & (TGSI_MEMBAR_ATOMIC_BUFFER | - TGSI_MEMBAR_SHADER_BUFFER | - TGSI_MEMBAR_SHADER_IMAGE)) - wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE; - - if (flags & TGSI_MEMBAR_SHARED) - wait_flags |= AC_WAIT_LGKM; - - ac_build_waitcnt(&ctx->ac, wait_flags); -} - -static void clock_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef tmp = ac_build_shader_clock(&ctx->ac); - - emit_data->output[0] = - LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_0, ""); - emit_data->output[1] = - LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, ""); -} - -static void si_llvm_emit_ddxy( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - unsigned opcode = emit_data->info->opcode; - LLVMValueRef val; - int idx; - unsigned mask; - - if (opcode == TGSI_OPCODE_DDX_FINE) - mask = AC_TID_MASK_LEFT; - else if (opcode == TGSI_OPCODE_DDY_FINE) - mask = AC_TID_MASK_TOP; - else - mask = AC_TID_MASK_TOP_LEFT; - - /* for DDX we want to next X pixel, DDY next Y pixel. */ - idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 : 2; - - val = ac_to_integer(&ctx->ac, emit_data->args[0]); - val = ac_build_ddxy(&ctx->ac, mask, idx, val); - emit_data->output[emit_data->chan] = val; -} - -static void build_interp_intrinsic(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct si_shader *shader = ctx->shader; - const struct tgsi_shader_info *info = &shader->selector->info; - LLVMValueRef interp_param; - const struct tgsi_full_instruction *inst = emit_data->inst; - const struct tgsi_full_src_register *input = &inst->Src[0]; - int input_base, input_array_size; - int chan; - int i; - LLVMValueRef prim_mask = ac_get_arg(&ctx->ac, ctx->args.prim_mask); - LLVMValueRef array_idx, offset_x = NULL, offset_y = NULL; - int interp_param_idx; - unsigned interp; - unsigned location; - - if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { - /* offset is in second src, first two channels */ - offset_x = lp_build_emit_fetch(bld_base, emit_data->inst, 1, - TGSI_CHAN_X); - offset_y = lp_build_emit_fetch(bld_base, emit_data->inst, 1, - TGSI_CHAN_Y); - } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { - LLVMValueRef sample_position; - LLVMValueRef sample_id; - LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f); - - /* fetch sample ID, then fetch its sample position, - * and place into first two channels. - */ - sample_id = lp_build_emit_fetch(bld_base, - emit_data->inst, 1, TGSI_CHAN_X); - sample_id = ac_to_integer(&ctx->ac, sample_id); - - /* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading - * Language 4.50 spec says about interpolateAtSample: - * - * "Returns the value of the input interpolant variable at - * the location of sample number sample. If multisample - * buffers are not available, the input variable will be - * evaluated at the center of the pixel. If sample sample - * does not exist, the position used to interpolate the - * input variable is undefined." - * - * This means that sample_id values outside of the valid are - * in fact valid input, and the usual mechanism for loading the - * sample position doesn't work. - */ - if (ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center) { - LLVMValueRef center[4] = { - LLVMConstReal(ctx->f32, 0.5), - LLVMConstReal(ctx->f32, 0.5), - ctx->ac.f32_0, - ctx->ac.f32_0, - }; - - sample_position = ac_build_gather_values(&ctx->ac, center, 4); - } else { - sample_position = load_sample_position(&ctx->abi, sample_id); - } - - offset_x = LLVMBuildExtractElement(ctx->ac.builder, sample_position, - ctx->i32_0, ""); - - offset_x = LLVMBuildFSub(ctx->ac.builder, offset_x, halfval, ""); - offset_y = LLVMBuildExtractElement(ctx->ac.builder, sample_position, - ctx->i32_1, ""); - offset_y = LLVMBuildFSub(ctx->ac.builder, offset_y, halfval, ""); - } - - assert(input->Register.File == TGSI_FILE_INPUT); - - if (input->Register.Indirect) { - unsigned array_id = input->Indirect.ArrayID; - - if (array_id) { - input_base = info->input_array_first[array_id]; - input_array_size = info->input_array_last[array_id] - input_base + 1; - } else { - input_base = inst->Src[0].Register.Index; - input_array_size = info->num_inputs - input_base; - } - - array_idx = si_get_indirect_index(ctx, &input->Indirect, - 1, input->Register.Index - input_base); - } else { - input_base = inst->Src[0].Register.Index; - input_array_size = 1; - array_idx = ctx->i32_0; - } - - interp = shader->selector->info.input_interpolate[input_base]; - - if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || - inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) - location = TGSI_INTERPOLATE_LOC_CENTER; - else - location = TGSI_INTERPOLATE_LOC_CENTROID; - - interp_param_idx = lookup_interp_param_index(interp, location); - if (interp_param_idx == -1) - return; - else if (interp_param_idx) - interp_param = LLVMGetParam(ctx->main_fn, interp_param_idx); - else - interp_param = NULL; - - if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || - inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { - LLVMValueRef ij_out[2]; - LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param); - - /* - * take the I then J parameters, and the DDX/Y for it, and - * calculate the IJ inputs for the interpolator. - * temp1 = ddx * offset/sample.x + I; - * interp_param.I = ddy * offset/sample.y + temp1; - * temp1 = ddx * offset/sample.x + J; - * interp_param.J = ddy * offset/sample.y + temp1; - */ - for (i = 0; i < 2; i++) { - LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0); - LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0); - LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, ix_ll, ""); - LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, - ddxy_out, iy_ll, ""); - LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, - interp_param, ix_ll, ""); - LLVMValueRef temp; - - interp_el = ac_to_float(&ctx->ac, interp_el); - - temp = ac_build_fmad(&ctx->ac, ddx_el, offset_x, interp_el); - ij_out[i] = ac_build_fmad(&ctx->ac, ddy_el, offset_y, temp); - } - interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2); - } - - if (interp_param) - interp_param = ac_to_float(&ctx->ac, interp_param); - - for (chan = 0; chan < 4; chan++) { - LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, input_array_size)); - unsigned schan = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan); - - for (unsigned idx = 0; idx < input_array_size; ++idx) { - LLVMValueRef v, i = NULL, j = NULL; - - if (interp_param) { - i = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->i32_0, ""); - j = LLVMBuildExtractElement( - ctx->ac.builder, interp_param, ctx->i32_1, ""); - } - v = si_build_fs_interp(ctx, input_base + idx, schan, - prim_mask, i, j); - - gather = LLVMBuildInsertElement(ctx->ac.builder, - gather, v, LLVMConstInt(ctx->i32, idx, false), ""); - } - - emit_data->output[chan] = LLVMBuildExtractElement( - ctx->ac.builder, gather, array_idx, ""); - } -} - -static void vote_all_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, emit_data->args[0]); - emit_data->output[emit_data->chan] = - LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, ""); -} - -static void vote_any_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, emit_data->args[0]); - emit_data->output[emit_data->chan] = - LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, ""); -} - -static void vote_eq_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, emit_data->args[0]); - emit_data->output[emit_data->chan] = - LLVMBuildSExt(ctx->ac.builder, tmp, ctx->i32, ""); -} - -static void ballot_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef tmp; - - tmp = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); - tmp = ac_build_ballot(&ctx->ac, tmp); - - emit_data->output[0] = LLVMBuildTrunc(builder, tmp, ctx->i32, ""); - - if (ctx->ac.wave_size == 32) { - emit_data->output[1] = ctx->i32_0; - } else { - tmp = LLVMBuildLShr(builder, tmp, LLVMConstInt(ctx->i64, 32, 0), ""); - emit_data->output[1] = LLVMBuildTrunc(builder, tmp, ctx->i32, ""); - } -} - -static void read_lane_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_READ_INVOC) { - emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst, - 0, emit_data->src_chan); - - /* Always read the source invocation (= lane) from the X channel. */ - emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, - 1, TGSI_CHAN_X); - emit_data->arg_count = 2; - } - - /* We currently have no other way to prevent LLVM from lifting the icmp - * calls to a dominating basic block. - */ - ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]); - - for (unsigned i = 0; i < emit_data->arg_count; ++i) - emit_data->args[i] = ac_to_integer(&ctx->ac, emit_data->args[i]); - - emit_data->output[emit_data->chan] = - ac_build_intrinsic(&ctx->ac, action->intr_name, - ctx->i32, emit_data->args, emit_data->arg_count, - AC_FUNC_ATTR_READNONE | - AC_FUNC_ATTR_CONVERGENT); -} - -static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct tgsi_src_register src0 = emit_data->inst->Src[0].Register; - LLVMValueRef imm; - unsigned stream; - - assert(src0.File == TGSI_FILE_IMMEDIATE); - - imm = ctx->imms[src0.Index * TGSI_NUM_CHANNELS + src0.SwizzleX]; - stream = LLVMConstIntGetZExtValue(imm) & 0x3; - return stream; -} - /* Emit one vertex from the geometry shader */ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, @@ -4296,18 +3195,6 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, ac_build_endif(&ctx->ac, 6505); } -/* Emit one vertex from the geometry shader */ -static void si_tgsi_emit_vertex( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - unsigned stream = si_llvm_get_stream(bld_base, emit_data); - - si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]); -} - /* Cut one primitive from the geometry shader */ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream) @@ -4324,23 +3211,8 @@ static void si_llvm_emit_primitive(struct ac_shader_abi *abi, si_get_gs_wave_id(ctx)); } -/* Cut one primitive from the geometry shader */ -static void si_tgsi_emit_primitive( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - si_llvm_emit_primitive(&ctx->abi, si_llvm_get_stream(bld_base, emit_data)); -} - -static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) +static void si_llvm_emit_barrier(struct si_shader_context *ctx) { - struct si_shader_context *ctx = si_shader_context(bld_base); - /* GFX6 only (thanks to a hw bug workaround): * The real barrier instruction isn’t needed, because an entire patch * always fits into a single wave. @@ -5654,9 +4526,9 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, shader->selector = gs_selector; shader->is_gs_copy_shader = true; - si_init_shader_ctx(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false), - false); + si_llvm_context_init(&ctx, sscreen, compiler, + si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false), + 64); ctx.shader = shader; ctx.type = PIPE_SHADER_VERTEX; @@ -5917,47 +4789,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f) } } -static void si_init_shader_ctx(struct si_shader_context *ctx, - struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - unsigned wave_size, - bool nir) -{ - struct lp_build_tgsi_context *bld_base; - - si_llvm_context_init(ctx, sscreen, compiler, wave_size, - nir ? 64 : wave_size); - - bld_base = &ctx->bld_base; - bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; - - bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID].emit = build_interp_intrinsic; - bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE].emit = build_interp_intrinsic; - bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET].emit = build_interp_intrinsic; - - bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit; - - bld_base->op_actions[TGSI_OPCODE_CLOCK].emit = clock_emit; - - bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy; - bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy; - bld_base->op_actions[TGSI_OPCODE_DDX_FINE].emit = si_llvm_emit_ddxy; - bld_base->op_actions[TGSI_OPCODE_DDY_FINE].emit = si_llvm_emit_ddxy; - - bld_base->op_actions[TGSI_OPCODE_VOTE_ALL].emit = vote_all_emit; - bld_base->op_actions[TGSI_OPCODE_VOTE_ANY].emit = vote_any_emit; - bld_base->op_actions[TGSI_OPCODE_VOTE_EQ].emit = vote_eq_emit; - bld_base->op_actions[TGSI_OPCODE_BALLOT].emit = ballot_emit; - bld_base->op_actions[TGSI_OPCODE_READ_FIRST].intr_name = "llvm.amdgcn.readfirstlane"; - bld_base->op_actions[TGSI_OPCODE_READ_FIRST].emit = read_lane_emit; - bld_base->op_actions[TGSI_OPCODE_READ_INVOC].intr_name = "llvm.amdgcn.readlane"; - bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit; - - bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex; - bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_tgsi_emit_primitive; - bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier; -} - static void si_optimize_vs_outputs(struct si_shader_context *ctx) { struct si_shader *shader = ctx->shader; @@ -6014,17 +4845,34 @@ LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx) si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), ""); } +static void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + LLVMBuilderRef builder = ctx->ac.builder; + + if (ctx->shader->selector->force_correct_derivs_after_kill) { + /* Kill immediately while maintaining WQM. */ + ac_build_kill_if_false(&ctx->ac, + ac_build_wqm_vote(&ctx->ac, visible)); + + LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, ""); + mask = LLVMBuildAnd(builder, mask, visible, ""); + LLVMBuildStore(builder, mask, ctx->postponed_kill); + return; + } + + ac_build_kill_if_false(&ctx->ac, visible); +} + static bool si_compile_tgsi_main(struct si_shader_context *ctx, struct nir_shader *nir, bool free_nir) { struct si_shader *shader = ctx->shader; struct si_shader_selector *sel = shader->selector; - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; // TODO clean all this up! switch (ctx->type) { case PIPE_SHADER_VERTEX: - ctx->load_input = declare_input_vs; if (shader->key.as_ls) ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue; else if (shader->key.as_es) @@ -6035,22 +4883,16 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; else ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; - bld_base->emit_epilogue = si_tgsi_emit_epilogue; ctx->abi.load_base_vertex = get_base_vertex; break; case PIPE_SHADER_TESS_CTRL: - bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs; ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings; ctx->abi.load_tess_level = si_load_tess_level; - bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs; - bld_base->emit_store = store_output_tcs; ctx->abi.store_tcs_outputs = si_nir_store_output_tcs; ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue; ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; - bld_base->emit_epilogue = si_tgsi_emit_epilogue; break; case PIPE_SHADER_TESS_EVAL: - bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes; ctx->abi.load_tess_varyings = si_nir_load_input_tes; ctx->abi.load_tess_coord = si_load_tess_coord; ctx->abi.load_tess_level = si_load_tess_level; @@ -6061,20 +4903,15 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; else ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; - bld_base->emit_epilogue = si_tgsi_emit_epilogue; break; case PIPE_SHADER_GEOMETRY: - bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs; ctx->abi.load_inputs = si_nir_load_input_gs; ctx->abi.emit_vertex = si_llvm_emit_vertex; ctx->abi.emit_primitive = si_llvm_emit_primitive; ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue; - bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue; break; case PIPE_SHADER_FRAGMENT: - ctx->load_input = declare_input_fs; ctx->abi.emit_outputs = si_llvm_return_fs_outputs; - bld_base->emit_epilogue = si_tgsi_emit_epilogue; ctx->abi.load_sample_position = load_sample_position; ctx->abi.load_sample_mask_in = load_sample_mask_in; ctx->abi.emit_fbfetch = si_nir_emit_fbfetch; @@ -6229,7 +5066,7 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, * and contains a barrier, it will wait there and then * reach s_endpgm. */ - si_llvm_emit_barrier(NULL, bld_base, NULL); + si_llvm_emit_barrier(ctx); } } } @@ -6241,19 +5078,12 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx, ctx->postponed_kill); } - if (sel->tokens) { - if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { - fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); - return false; - } - } else { - bool success = si_nir_build_llvm(ctx, nir); - if (free_nir) - ralloc_free(nir); - if (!success) { - fprintf(stderr, "Failed to translate shader from NIR to LLVM\n"); - return false; - } + bool success = si_nir_build_llvm(ctx, nir); + if (free_nir) + ralloc_free(nir); + if (!success) { + fprintf(stderr, "Failed to translate shader from NIR to LLVM\n"); + return false; } si_llvm_build_ret(ctx, ctx->return_value); @@ -6899,10 +5729,10 @@ static struct nir_shader *get_nir_shader(struct si_shader_selector *sel, return NULL; } -int si_compile_tgsi_shader(struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - struct si_shader *shader, - struct pipe_debug_callback *debug) +int si_compile_shader(struct si_screen *sscreen, + struct ac_llvm_compiler *compiler, + struct si_shader *shader, + struct pipe_debug_callback *debug) { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; @@ -6914,16 +5744,12 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, * conversion fails. */ if (si_can_dump_shader(sscreen, sel->type) && !(sscreen->debug_flags & DBG(NO_TGSI))) { - if (sel->tokens) - tgsi_dump(sel->tokens, 0); - else - nir_print_shader(nir, stderr); + nir_print_shader(nir, stderr); si_dump_streamout(&sel->so); } - si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), - nir != NULL); - si_llvm_context_set_ir(&ctx, shader, nir); + si_llvm_context_init(&ctx, sscreen, compiler, si_get_shader_wave_size(shader), 64); + si_llvm_context_set_ir(&ctx, shader); memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(shader->info.vs_output_param_offset)); @@ -6982,7 +5808,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, shader_ls.key.mono = shader->key.mono; shader_ls.key.opt = shader->key.opt; shader_ls.is_monolithic = true; - si_llvm_context_set_ir(&ctx, &shader_ls, nir); + si_llvm_context_set_ir(&ctx, &shader_ls); if (!si_compile_tgsi_main(&ctx, nir, free_nir)) { si_llvm_dispose(&ctx); @@ -7050,7 +5876,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen, shader_es.key.mono = shader->key.mono; shader_es.key.opt = shader->key.opt; shader_es.is_monolithic = true; - si_llvm_context_set_ir(&ctx, &shader_es, nir); + si_llvm_context_set_ir(&ctx, &shader_es); if (!si_compile_tgsi_main(&ctx, nir, free_nir)) { si_llvm_dispose(&ctx); @@ -7269,10 +6095,10 @@ si_get_shader_part(struct si_screen *sscreen, } struct si_shader_context ctx; - si_init_shader_ctx(&ctx, sscreen, compiler, - si_get_wave_size(sscreen, type, shader.key.as_ngg, - shader.key.as_es), - false); + si_llvm_context_init(&ctx, sscreen, compiler, + si_get_wave_size(sscreen, type, shader.key.as_ngg, + shader.key.as_es), + 64); ctx.shader = &shader; ctx.type = type; @@ -7540,8 +6366,6 @@ static bool si_shader_select_vs_parts(struct si_screen *sscreen, static void si_build_tcs_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - memset(&ctx->args, 0, sizeof(ctx->args)); if (ctx->screen->info.chip_class >= GFX9) { @@ -7608,7 +6432,7 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx, for (unsigned i = 0; i < 6; i++) invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]); - si_write_tess_factors(bld_base, + si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id), ac_get_arg(&ctx->ac, invocation_id), ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset), @@ -7914,9 +6738,8 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, face = ac_to_integer(&ctx->ac, face); } - interp_fs_input(ctx, - key->ps_prolog.color_attr_index[i], - TGSI_SEMANTIC_COLOR, i, + interp_fs_color(ctx, + key->ps_prolog.color_attr_index[i], i, key->ps_prolog.num_interp_inputs, key->ps_prolog.colors_read, interp_ij, prim_mask, face, color); @@ -7990,7 +6813,6 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx, static void si_build_ps_epilog_function(struct si_shader_context *ctx, union si_shader_part_key *key) { - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL; int i; struct si_ps_exports exp = {}; @@ -8060,7 +6882,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, for (i = 0; i < 4; i++) color[i] = LLVMGetParam(ctx->main_fn, vgpr++); - si_export_mrt_color(bld_base, color, mrt, + si_export_mrt_color(ctx, color, mrt, ctx->args.arg_count - 1, mrt == last_color_export, &exp); } @@ -8074,7 +6896,7 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx, samplemask = LLVMGetParam(ctx->main_fn, vgpr++); if (depth || stencil || samplemask) - si_export_mrt_z(bld_base, depth, stencil, samplemask, &exp); + si_export_mrt_z(ctx, depth, stencil, samplemask, &exp); else if (last_color_export == -1) ac_build_export_null(&ctx->ac); @@ -8240,7 +7062,7 @@ bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compil /* Monolithic shader (compiled as a whole, has many variants, * may take a long time to compile). */ - r = si_compile_tgsi_shader(sscreen, compiler, shader, debug); + r = si_compile_shader(sscreen, compiler, shader, debug); if (r) return false; } else { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d9a199bfa3c..30dbe1c6a6e 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -326,7 +326,6 @@ struct si_shader_selector { struct si_shader *gs_copy_shader; - struct tgsi_token *tokens; struct nir_shader *nir; void *nir_binary; unsigned nir_size; @@ -730,10 +729,10 @@ si_generate_gs_copy_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader_selector *gs_selector, struct pipe_debug_callback *debug); -int si_compile_tgsi_shader(struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - struct si_shader *shader, - struct pipe_debug_callback *debug); +int si_compile_shader(struct si_screen *sscreen, + struct ac_llvm_compiler *compiler, + struct si_shader *shader, + struct pipe_debug_callback *debug); bool si_shader_create(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, struct si_shader *shader, struct pipe_debug_callback *debug); diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index a9b40f41b4c..1ec74a84a69 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -26,10 +26,6 @@ #define SI_SHADER_PRIVATE_H #include "si_shader.h" -#include "gallivm/lp_bld_flow.h" -#include "gallivm/lp_bld_init.h" -#include "gallivm/lp_bld_tgsi.h" -#include "tgsi/tgsi_parse.h" #include "ac_shader_abi.h" #include @@ -37,12 +33,7 @@ struct pipe_debug_callback; -#define RADEON_LLVM_MAX_INPUT_SLOTS 32 #define RADEON_LLVM_MAX_INPUTS 32 * 4 -#define RADEON_LLVM_MAX_OUTPUTS 32 * 4 - -#define RADEON_LLVM_MAX_SYSTEM_VALUES 11 -#define RADEON_LLVM_MAX_ADDRS 16 struct si_shader_output_values { LLVMValueRef values[4]; @@ -52,8 +43,6 @@ struct si_shader_output_values { }; struct si_shader_context { - struct lp_build_tgsi_context bld_base; - struct gallivm_state gallivm; struct ac_llvm_context ac; struct si_shader *shader; struct si_screen *screen; @@ -69,42 +58,11 @@ struct si_shader_context { struct ac_shader_args args; struct ac_shader_abi abi; - /** This function is responsible for initilizing the inputs array and will be - * called once for each input declared in the TGSI shader. - */ - void (*load_input)(struct si_shader_context *, - unsigned input_index, - const struct tgsi_full_declaration *decl, - LLVMValueRef out[4]); - - /** This array contains the input values for the shader. Typically these - * values will be in the form of a target intrinsic that will inform the - * backend how to load the actual inputs to the shader. - */ - struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS]; LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS]; - LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; - LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS]; - - /** This pointer is used to contain the temporary values. - * The amount of temporary used in tgsi can't be bound to a max value and - * thus we must allocate this array at runtime. - */ - LLVMValueRef *temps; - unsigned temps_count; - LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; - - LLVMValueRef *imms; - unsigned imms_num; LLVMBasicBlockRef merged_wrap_if_entry_block; int merged_wrap_if_label; - struct tgsi_array_info *temp_arrays; - LLVMValueRef *temp_array_allocas; - - LLVMValueRef undef_alloca; - LLVMValueRef main_fn; LLVMTypeRef return_type; @@ -233,12 +191,6 @@ struct si_shader_context { LLVMValueRef i1true; }; -static inline struct si_shader_context * -si_shader_context(struct lp_build_tgsi_context *bld_base) -{ - return (struct si_shader_context*)bld_base; -} - static inline struct si_shader_context * si_shader_context_from_abi(struct ac_shader_abi *abi) { @@ -255,12 +207,6 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary, struct pipe_debug_callback *debug, bool less_optimized, unsigned wave_size); -LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type); - -LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, LLVMValueRef value); - LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValueRef index, unsigned num); @@ -271,8 +217,7 @@ void si_llvm_context_init(struct si_shader_context *ctx, unsigned wave_size, unsigned ballot_mask_bits); void si_llvm_context_set_ir(struct si_shader_context *ctx, - struct si_shader *shader, - struct nir_shader *nir); + struct si_shader *shader); void si_llvm_create_func(struct si_shader_context *ctx, const char *name, @@ -282,18 +227,6 @@ void si_llvm_dispose(struct si_shader_context *ctx); void si_llvm_optimize_module(struct si_shader_context *ctx); -LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, - LLVMTypeRef type, - LLVMValueRef ptr, - LLVMValueRef ptr2); - -LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle); - -void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible); - LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type, LLVMValueRef vertex_index, @@ -306,34 +239,10 @@ LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, bool is_patch, bool is_compact, bool load_input); - -LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, - unsigned input_index, - unsigned vtx_offset_param, - LLVMTypeRef type, - unsigned swizzle); - LLVMValueRef si_nir_lookup_interp_param(struct ac_shader_abi *abi, enum glsl_interp_mode interp, unsigned location); - -void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - unsigned index, - LLVMValueRef dst[4]); - -LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx, - const struct tgsi_ind_register *ind, - unsigned addr_mul, int rel_index); -LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx, - const struct tgsi_ind_register *ind, - int rel_index, unsigned num); LLVMValueRef si_get_sample_id(struct si_shader_context *ctx); - -void si_shader_context_init_alu(struct si_shader_context *ctx); -void si_shader_context_init_mem(struct si_shader_context *ctx); - LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, LLVMValueRef list, LLVMValueRef index, enum ac_descriptor_type type); @@ -342,14 +251,7 @@ LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, enum ac_descriptor_type desc_type, bool uses_store, bool bindless); LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi); - -void si_load_system_value(struct si_shader_context *ctx, - unsigned index, - const struct tgsi_full_declaration *decl); void si_declare_compute_memory(struct si_shader_context *ctx); -void si_tgsi_declare_compute_memory(struct si_shader_context *ctx, - const struct tgsi_full_declaration *decl); - LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle); void si_llvm_export_vs(struct si_shader_context *ctx, @@ -365,10 +267,6 @@ void si_llvm_load_input_vs( struct si_shader_context *ctx, unsigned input_index, LLVMValueRef out[4]); -void si_llvm_load_input_fs( - struct si_shader_context *ctx, - unsigned input_index, - LLVMValueRef out[4]); bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c new file mode 100644 index 00000000000..64ceaf7ed34 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -0,0 +1,239 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "si_shader_internal.h" +#include "si_pipe.h" +#include "ac_llvm_util.h" +#include "util/u_memory.h" + +struct si_llvm_diagnostics { + struct pipe_debug_callback *debug; + unsigned retval; +}; + +static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) +{ + struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context; + LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); + const char *severity_str = NULL; + + switch (severity) { + case LLVMDSError: + severity_str = "error"; + break; + case LLVMDSWarning: + severity_str = "warning"; + break; + case LLVMDSRemark: + case LLVMDSNote: + default: + return; + } + + char *description = LLVMGetDiagInfoDescription(di); + + pipe_debug_message(diag->debug, SHADER_INFO, + "LLVM diagnostic (%s): %s", severity_str, description); + + if (severity == LLVMDSError) { + diag->retval = 1; + fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); + } + + LLVMDisposeMessage(description); +} + +/** + * Compile an LLVM module to machine code. + * + * @returns 0 for success, 1 for failure + */ +unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary, + struct ac_llvm_compiler *compiler, + struct pipe_debug_callback *debug, + bool less_optimized, unsigned wave_size) +{ + struct ac_compiler_passes *passes = compiler->passes; + + if (wave_size == 32) + passes = compiler->passes_wave32; + else if (less_optimized && compiler->low_opt_passes) + passes = compiler->low_opt_passes; + + struct si_llvm_diagnostics diag; + LLVMContextRef llvm_ctx; + + diag.debug = debug; + diag.retval = 0; + + /* Setup Diagnostic Handler*/ + llvm_ctx = LLVMGetModuleContext(M); + + LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); + + /* Compile IR. */ + if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer, + &binary->elf_size)) + diag.retval = 1; + + if (diag.retval != 0) + pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); + return diag.retval; +} + +void si_shader_binary_clean(struct si_shader_binary *binary) +{ + free((void *)binary->elf_buffer); + binary->elf_buffer = NULL; + + free(binary->llvm_ir_string); + binary->llvm_ir_string = NULL; +} + +void si_llvm_context_init(struct si_shader_context *ctx, + struct si_screen *sscreen, + struct ac_llvm_compiler *compiler, + unsigned wave_size, + unsigned ballot_mask_bits) +{ + /* Initialize the gallivm object: + * We are only using the module, context, and builder fields of this struct. + * This should be enough for us to be able to pass our gallivm struct to the + * helper functions in the gallivm module. + */ + memset(ctx, 0, sizeof(*ctx)); + ctx->screen = sscreen; + ctx->compiler = compiler; + + ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class, + sscreen->info.family, + AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH, + wave_size, ballot_mask_bits); + + ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context); + ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context); + ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context); + ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context); + ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context); + ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128); + ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context); + ctx->v2i32 = LLVMVectorType(ctx->i32, 2); + ctx->v4i32 = LLVMVectorType(ctx->i32, 4); + ctx->v4f32 = LLVMVectorType(ctx->f32, 4); + ctx->v8i32 = LLVMVectorType(ctx->i32, 8); + + ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0); + ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0); + ctx->i1false = LLVMConstInt(ctx->i1, 0, 0); + ctx->i1true = LLVMConstInt(ctx->i1, 1, 0); +} + +/* Set the context to a certain TGSI shader. Can be called repeatedly + * to change the shader. */ +void si_llvm_context_set_ir(struct si_shader_context *ctx, + struct si_shader *shader) +{ + struct si_shader_selector *sel = shader->selector; + const struct tgsi_shader_info *info = &sel->info; + + ctx->shader = shader; + ctx->type = sel->type; + + ctx->num_const_buffers = util_last_bit(info->const_buffers_declared); + ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared); + + ctx->num_samplers = util_last_bit(info->samplers_declared); + ctx->num_images = util_last_bit(info->images_declared); +} + +void si_llvm_create_func(struct si_shader_context *ctx, + const char *name, + LLVMTypeRef *return_types, unsigned num_return_elems) +{ + LLVMTypeRef ret_type; + enum ac_llvm_calling_convention call_conv; + enum pipe_shader_type real_shader_type; + + if (num_return_elems) + ret_type = LLVMStructTypeInContext(ctx->ac.context, + return_types, + num_return_elems, true); + else + ret_type = ctx->voidt; + + real_shader_type = ctx->type; + + /* LS is merged into HS (TCS), and ES is merged into GS. */ + if (ctx->screen->info.chip_class >= GFX9) { + if (ctx->shader->key.as_ls) + real_shader_type = PIPE_SHADER_TESS_CTRL; + else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg) + real_shader_type = PIPE_SHADER_GEOMETRY; + } + + switch (real_shader_type) { + case PIPE_SHADER_VERTEX: + case PIPE_SHADER_TESS_EVAL: + call_conv = AC_LLVM_AMDGPU_VS; + break; + case PIPE_SHADER_TESS_CTRL: + call_conv = AC_LLVM_AMDGPU_HS; + break; + case PIPE_SHADER_GEOMETRY: + call_conv = AC_LLVM_AMDGPU_GS; + break; + case PIPE_SHADER_FRAGMENT: + call_conv = AC_LLVM_AMDGPU_PS; + break; + case PIPE_SHADER_COMPUTE: + call_conv = AC_LLVM_AMDGPU_CS; + break; + default: + unreachable("Unhandle shader type"); + } + + /* Setup the function */ + ctx->return_type = ret_type; + ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name, + ret_type, ctx->ac.module); +} + +void si_llvm_optimize_module(struct si_shader_context *ctx) +{ + /* Dump LLVM IR before any optimization passes */ + if (ctx->screen->debug_flags & DBG(PREOPT_IR) && + si_can_dump_shader(ctx->screen, ctx->type)) + LLVMDumpModule(ctx->ac.module); + + /* Run the pass */ + LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module); + LLVMDisposeBuilder(ctx->ac.builder); +} + +void si_llvm_dispose(struct si_shader_context *ctx) +{ + LLVMDisposeModule(ctx->ac.module); + LLVMContextDispose(ctx->ac.context); + ac_llvm_context_dispose(&ctx->ac); +} diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_build.c b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c new file mode 100644 index 00000000000..e3625214258 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_build.c @@ -0,0 +1,219 @@ +/* + * Copyright 2017 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include "si_shader_internal.h" +#include "si_pipe.h" +#include "sid.h" +#include "ac_llvm_util.h" + +/** + * Return a value that is equal to the given i32 \p index if it lies in [0,num) + * or an undefined value in the same interval otherwise. + */ +LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, + LLVMValueRef index, + unsigned num) +{ + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); + LLVMValueRef cc; + + if (util_is_power_of_two_or_zero(num)) { + index = LLVMBuildAnd(builder, index, c_max, ""); + } else { + /* In theory, this MAX pattern should result in code that is + * as good as the bit-wise AND above. + * + * In practice, LLVM generates worse code (at the time of + * writing), because its value tracking is not strong enough. + */ + cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); + index = LLVMBuildSelect(builder, cc, index, c_max, ""); + } + + return index; +} + +/** + * Given a 256-bit resource descriptor, force the DCC enable bit to off. + * + * At least on Tonga, executing image stores on images with DCC enabled and + * non-trivial can eventually lead to lockups. This can occur when an + * application binds an image as read-only but then uses a shader that writes + * to it. The OpenGL spec allows almost arbitrarily bad behavior (including + * program termination) in this case, but it doesn't cost much to be a bit + * nicer: disabling DCC in the shader still leads to undefined results but + * avoids the lockup. + */ +static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, + LLVMValueRef rsrc) +{ + if (ctx->screen->info.chip_class <= GFX7) { + return rsrc; + } else { + LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); + LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); + LLVMValueRef tmp; + + tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, ""); + tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, ""); + return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, ""); + } +} + +/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should + * adjust "index" to point to FMASK. */ +LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum ac_descriptor_type desc_type, + bool uses_store, bool bindless) +{ + LLVMBuilderRef builder = ctx->ac.builder; + LLVMValueRef rsrc; + + if (desc_type == AC_DESC_BUFFER) { + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + } else { + assert(desc_type == AC_DESC_IMAGE || + desc_type == AC_DESC_FMASK); + } + + if (bindless) + rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index); + else + rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); + + if (desc_type == AC_DESC_IMAGE && uses_store) + rsrc = force_dcc_off(ctx, rsrc); + return rsrc; +} + +/** + * Load an image view, fmask view. or sampler state descriptor. + */ +LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, + LLVMValueRef list, LLVMValueRef index, + enum ac_descriptor_type type) +{ + LLVMBuilderRef builder = ctx->ac.builder; + + switch (type) { + case AC_DESC_IMAGE: + /* The image is at [0:7]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); + break; + case AC_DESC_BUFFER: + /* The buffer is in [4:7]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + ctx->i32_1); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + break; + case AC_DESC_FMASK: + /* The FMASK is at [8:15]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), + ctx->i32_1); + break; + case AC_DESC_SAMPLER: + /* The sampler state is at [12:15]. */ + index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), + LLVMConstInt(ctx->i32, 3, 0)); + list = LLVMBuildPointerCast(builder, list, + ac_array_in_const32_addr_space(ctx->v4i32), ""); + break; + case AC_DESC_PLANE_0: + case AC_DESC_PLANE_1: + case AC_DESC_PLANE_2: + /* Only used for the multiplane image support for Vulkan. Should + * never be reached in radeonsi. + */ + unreachable("Plane descriptor requested in radeonsi."); + } + + return ac_build_load_to_sgpr(&ctx->ac, list, index); +} + +LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) +{ + struct si_shader_context *ctx = si_shader_context_from_abi(abi); + struct ac_image_args args = {}; + LLVMValueRef ptr, image, fmask; + + /* Ignore src0, because KHR_blend_func_extended disallows multiple render + * targets. + */ + + /* Load the image descriptor. */ + STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); + ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); + ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, + ac_array_in_const32_addr_space(ctx->v8i32), ""); + image = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); + + unsigned chan = 0; + + args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16); + + if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) + args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16); + + /* Get the current render target layer index. */ + if (ctx->shader->key.mono.u.ps.fbfetch_layered) + args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + args.coords[chan++] = si_get_sample_id(ctx); + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa && + !(ctx->screen->debug_flags & DBG(NO_FMASK))) { + fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, + LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); + + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, + ctx->shader->key.mono.u.ps.fbfetch_layered); + } + + args.opcode = ac_image_load; + args.resource = image; + args.dmask = 0xf; + args.attributes = AC_FUNC_ATTR_READNONE; + + if (ctx->shader->key.mono.u.ps.fbfetch_msaa) + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_2darraymsaa : ac_image_2dmsaa; + else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D) + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_1darray : ac_image_1d; + else + args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? + ac_image_2darray : ac_image_2d; + + return ac_build_image_opcode(&ctx->ac, &args); +} diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c deleted file mode 100644 index 4be410ec331..00000000000 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c +++ /dev/null @@ -1,834 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "si_shader_internal.h" -#include "si_pipe.h" -#include "ac_llvm_util.h" - -void si_llvm_emit_kill(struct ac_shader_abi *abi, LLVMValueRef visible) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - LLVMBuilderRef builder = ctx->ac.builder; - - if (ctx->shader->selector->force_correct_derivs_after_kill) { - /* Kill immediately while maintaining WQM. */ - ac_build_kill_if_false(&ctx->ac, - ac_build_wqm_vote(&ctx->ac, visible)); - - LLVMValueRef mask = LLVMBuildLoad(builder, ctx->postponed_kill, ""); - mask = LLVMBuildAnd(builder, mask, visible, ""); - LLVMBuildStore(builder, mask, ctx->postponed_kill); - return; - } - - ac_build_kill_if_false(&ctx->ac, visible); -} - -static void kil_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef visible; - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL_IF) { - const struct tgsi_full_instruction *inst = emit_data->inst; - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned i; - LLVMValueRef conds[TGSI_NUM_CHANNELS]; - - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i); - /* UGE because NaN shouldn't get killed */ - conds[i] = LLVMBuildFCmp(builder, LLVMRealUGE, value, - ctx->ac.f32_0, ""); - } - - /* And the conditions together */ - for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) { - conds[i - 1] = LLVMBuildAnd(builder, conds[i], conds[i - 1], ""); - } - visible = conds[0]; - } else { - assert(emit_data->inst->Instruction.Opcode == TGSI_OPCODE_KILL); - visible = ctx->i1false; - } - - si_llvm_emit_kill(&ctx->abi, visible); -} - -static void emit_icmp(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - unsigned pred; - struct si_shader_context *ctx = si_shader_context(bld_base); - - switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_USEQ: - case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break; - case TGSI_OPCODE_USNE: - case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break; - case TGSI_OPCODE_USGE: - case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break; - case TGSI_OPCODE_USLT: - case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break; - case TGSI_OPCODE_ISGE: - case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break; - case TGSI_OPCODE_ISLT: - case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break; - default: - assert(!"unknown instruction"); - pred = 0; - break; - } - - LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, pred, - emit_data->args[0], emit_data->args[1],""); - - v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, ""); - - emit_data->output[emit_data->chan] = v; -} - -static void emit_ucmp(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]); - - LLVMValueRef v = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, arg0, - ctx->i32_0, ""); - - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, v, emit_data->args[1], emit_data->args[2], ""); -} - -static void emit_cmp(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef cond, *args = emit_data->args; - - cond = LLVMBuildFCmp(ctx->ac.builder, LLVMRealOLT, args[0], - ctx->ac.f32_0, ""); - - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond, args[1], args[2], ""); -} - -static void emit_set_cond(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMRealPredicate pred; - LLVMValueRef cond; - - /* Use ordered for everything but NE (which is usual for - * float comparisons) - */ - switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break; - case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break; - case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break; - case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break; - case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break; - case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break; - default: assert(!"unknown instruction"); pred = 0; break; - } - - cond = LLVMBuildFCmp(ctx->ac.builder, - pred, emit_data->args[0], emit_data->args[1], ""); - - emit_data->output[emit_data->chan] = LLVMBuildSelect(ctx->ac.builder, - cond, ctx->ac.f32_1, ctx->ac.f32_0, ""); -} - -static void emit_fcmp(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMRealPredicate pred; - - /* Use ordered for everything but NE (which is usual for - * float comparisons) - */ - switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break; - case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break; - case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break; - case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break; - default: assert(!"unknown instruction"); pred = 0; break; - } - - LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred, - emit_data->args[0], emit_data->args[1],""); - - v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, ""); - - emit_data->output[emit_data->chan] = v; -} - -static void emit_dcmp(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMRealPredicate pred; - - /* Use ordered for everything but NE (which is usual for - * float comparisons) - */ - switch (emit_data->inst->Instruction.Opcode) { - case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break; - case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break; - case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break; - case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break; - default: assert(!"unknown instruction"); pred = 0; break; - } - - LLVMValueRef v = LLVMBuildFCmp(ctx->ac.builder, pred, - emit_data->args[0], emit_data->args[1],""); - - v = LLVMBuildSExtOrBitCast(ctx->ac.builder, v, ctx->i32, ""); - - emit_data->output[emit_data->chan] = v; -} - -static void emit_not(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]); - emit_data->output[emit_data->chan] = LLVMBuildNot(ctx->ac.builder, v, ""); -} - -static void emit_arl(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef floor_index = - ac_build_intrinsic(&ctx->ac, "llvm.floor.f32", ctx->f32, - &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE); - emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder, - floor_index, ctx->i32, ""); -} - -static void emit_and(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildAnd(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_or(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildOr(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_uadd(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildAdd(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_udiv(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildUDiv(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_idiv(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildSDiv(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_mod(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildSRem(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_umod(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildURem(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_shl(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildShl(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_ushr(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildLShr(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} -static void emit_ishr(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildAShr(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_xor(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildXor(ctx->ac.builder, - emit_data->args[0], emit_data->args[1], ""); -} - -static void emit_ssg(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - LLVMValueRef val; - - if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) { - val = ac_build_isign(&ctx->ac, emit_data->args[0], 64); - } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { - val = ac_build_isign(&ctx->ac, emit_data->args[0], 32); - } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_DSSG) { - val = ac_build_fsign(&ctx->ac, emit_data->args[0], 64); - } else { - val = ac_build_fsign(&ctx->ac, emit_data->args[0], 32); - } - - emit_data->output[emit_data->chan] = val; -} - -static void emit_ineg(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildNeg(ctx->ac.builder, - emit_data->args[0], ""); -} - -static void emit_dneg(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildFNeg(ctx->ac.builder, - emit_data->args[0], ""); -} - -static void emit_frac(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - unsigned bitsize; - - if (emit_data->info->opcode == TGSI_OPCODE_FRC) - bitsize = 32; - else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC) - bitsize = 64; - else { - assert(0); - return; - } - - emit_data->output[emit_data->chan] = - ac_build_fract(&ctx->ac, emit_data->args[0], bitsize); -} - -static void emit_f2i(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildFPToSI(ctx->ac.builder, - emit_data->args[0], ctx->i32, ""); -} - -static void emit_f2u(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildFPToUI(ctx->ac.builder, - emit_data->args[0], ctx->i32, ""); -} - -static void emit_i2f(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildSIToFP(ctx->ac.builder, - emit_data->args[0], ctx->f32, ""); -} - -static void emit_u2f(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = LLVMBuildUIToFP(ctx->ac.builder, - emit_data->args[0], ctx->f32, ""); -} - -static void -build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = - ac_build_intrinsic(&ctx->ac, action->intr_name, - emit_data->dst_type, emit_data->args, - emit_data->arg_count, AC_FUNC_ATTR_READNONE); -} - -static void emit_bfi(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef bfi_args[3]; - LLVMValueRef bfi_sm5; - LLVMValueRef cond; - - // Calculate the bitmask: (((1 << src3) - 1) << src2 - bfi_args[0] = LLVMBuildShl(builder, - LLVMBuildSub(builder, - LLVMBuildShl(builder, - ctx->i32_1, - emit_data->args[3], ""), - ctx->i32_1, ""), - emit_data->args[2], ""); - - bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1], - emit_data->args[2], ""); - - bfi_args[2] = emit_data->args[0]; - - /* Calculate: - * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2) - * Use the right-hand side, which the LLVM backend can convert to V_BFI. - */ - bfi_sm5 = - LLVMBuildXor(builder, bfi_args[2], - LLVMBuildAnd(builder, bfi_args[0], - LLVMBuildXor(builder, bfi_args[1], bfi_args[2], - ""), ""), ""); - - /* Since shifts of >= 32 bits are undefined in LLVM IR, the backend - * uses the convenient V_BFI lowering for the above, which follows SM5 - * and disagrees with GLSL semantics when bits (src3) is 32. - */ - cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3], - LLVMConstInt(ctx->i32, 32, 0), ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, ""); -} - -static void emit_bfe(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - /* FIXME: LLVM 7 returns incorrect result when count is 0. - * https://bugs.freedesktop.org/show_bug.cgi?id=107276 - */ - LLVMValueRef zero = ctx->i32_0; - LLVMValueRef bfe_sm5 = - ac_build_bfe(&ctx->ac, emit_data->args[0], - emit_data->args[1], emit_data->args[2], - emit_data->info->opcode == TGSI_OPCODE_IBFE); - - /* Correct for GLSL semantics. */ - LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntUGE, emit_data->args[2], - LLVMConstInt(ctx->i32, 32, 0), ""); - LLVMValueRef cond2 = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, emit_data->args[2], - zero, ""); - bfe_sm5 = LLVMBuildSelect(ctx->ac.builder, cond, emit_data->args[0], bfe_sm5, ""); - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, cond2, zero, bfe_sm5, ""); -} - -/* this is ffs in C */ -static void emit_lsb(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - emit_data->output[emit_data->chan] = ac_find_lsb(&ctx->ac, emit_data->dst_type, emit_data->args[0]); -} - -/* Find the last bit set. */ -static void emit_umsb(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - emit_data->output[emit_data->chan] = - ac_build_umsb(&ctx->ac, emit_data->args[0], emit_data->dst_type); -} - -/* Find the last bit opposite of the sign bit. */ -static void emit_imsb(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - emit_data->output[emit_data->chan] = - ac_build_imsb(&ctx->ac, emit_data->args[0], - emit_data->dst_type); -} - -static void emit_iabs(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - emit_data->output[emit_data->chan] = - ac_build_imax(&ctx->ac, emit_data->args[0], - LLVMBuildNeg(ctx->ac.builder, emit_data->args[0], "")); -} - -static void emit_minmax_int(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMIntPredicate op; - - switch (emit_data->info->opcode) { - default: - assert(0); - case TGSI_OPCODE_IMAX: - case TGSI_OPCODE_I64MAX: - op = LLVMIntSGT; - break; - case TGSI_OPCODE_IMIN: - case TGSI_OPCODE_I64MIN: - op = LLVMIntSLT; - break; - case TGSI_OPCODE_UMAX: - case TGSI_OPCODE_U64MAX: - op = LLVMIntUGT; - break; - case TGSI_OPCODE_UMIN: - case TGSI_OPCODE_U64MIN: - op = LLVMIntULT; - break; - } - - emit_data->output[emit_data->chan] = - LLVMBuildSelect(ctx->ac.builder, - LLVMBuildICmp(ctx->ac.builder, op, emit_data->args[0], - emit_data->args[1], ""), - emit_data->args[0], - emit_data->args[1], ""); -} - -static void emit_pk2h(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef v[] = { - lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X), - lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_Y), - }; - - - /* From the GLSL 4.50 spec: - * "The rounding mode cannot be set and is undefined." - * - * v_cvt_pkrtz_f16 rounds to zero, but it's fastest. - */ - emit_data->output[emit_data->chan] = - LLVMBuildBitCast(ctx->ac.builder, ac_build_cvt_pkrtz_f16(&ctx->ac, v), - ctx->i32, ""); -} - -static void emit_up2h(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMTypeRef i16; - LLVMValueRef const16, input, val; - unsigned i; - - i16 = LLVMInt16TypeInContext(ctx->ac.context); - const16 = LLVMConstInt(ctx->i32, 16, 0); - input = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); - - for (i = 0; i < 2; i++) { - val = i == 1 ? LLVMBuildLShr(ctx->ac.builder, input, const16, "") : input; - val = LLVMBuildTrunc(ctx->ac.builder, val, i16, ""); - val = ac_to_float(&ctx->ac, val); - emit_data->output[i] = LLVMBuildFPExt(ctx->ac.builder, val, ctx->f32, ""); - } -} - -static void emit_fdiv(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - emit_data->output[emit_data->chan] = - ac_build_fdiv(&ctx->ac, emit_data->args[0], emit_data->args[1]); -} - -/* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in - * the target machine. f64 needs global unsafe math flags to get rsq. */ -static void emit_rsq(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - LLVMValueRef sqrt = - ac_build_intrinsic(&ctx->ac, "llvm.sqrt.f32", ctx->f32, - &emit_data->args[0], 1, AC_FUNC_ATTR_READNONE); - - emit_data->output[emit_data->chan] = - ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, sqrt); -} - -static void dfracexp_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef in = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); - - emit_data->output[emit_data->chan] = - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64", - ctx->ac.f64, &in, 1, 0); - emit_data->output1[emit_data->chan] = - ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64", - ctx->ac.i32, &in, 1, 0); -} - -void si_shader_context_init_alu(struct si_shader_context *ctx) -{ - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - - lp_set_default_actions(bld_base); - - bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; - bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl; - bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi; - bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_BREV].intr_name = "llvm.bitreverse.i32"; - bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32"; - bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp; - bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; - bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64"; - bld_base->op_actions[TGSI_OPCODE_DCEIL].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DCEIL].intr_name = "llvm.ceil.f64"; - bld_base->op_actions[TGSI_OPCODE_DFLR].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DFLR].intr_name = "llvm.floor.f64"; - bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; - bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac; - bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv; - bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; - bld_base->op_actions[TGSI_OPCODE_DROUND].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DROUND].intr_name = "llvm.rint.f64"; - bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; - bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp; - bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp; - bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp; - bld_base->op_actions[TGSI_OPCODE_DSSG].emit = emit_ssg; - bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.amdgcn.rsq.f64"; - bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; - bld_base->op_actions[TGSI_OPCODE_DTRUNC].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DTRUNC].intr_name = "llvm.trunc.f64"; - bld_base->op_actions[TGSI_OPCODE_DFRACEXP].emit = dfracexp_emit; - bld_base->op_actions[TGSI_OPCODE_DLDEXP].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_DLDEXP].intr_name = "llvm.amdgcn.ldexp.f64"; - bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32"; - bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32"; - - /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ - if (ctx->screen->info.chip_class >= GFX10) { - bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32"; - } else { - bld_base->op_actions[TGSI_OPCODE_FMA].emit = - bld_base->op_actions[TGSI_OPCODE_MAD].emit; - } - - bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac; - bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; - bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; - bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp; - bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp; - bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp; - bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp; - bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs; - bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe; - bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; - bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb; - bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; - bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; - bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; - bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; - bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit; - bld_base->op_actions[TGSI_OPCODE_KILL].emit = kil_emit; - bld_base->op_actions[TGSI_OPCODE_LDEXP].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_LDEXP].intr_name = "llvm.amdgcn.ldexp.f32"; - bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb; - bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32"; - bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32"; - bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32"; - bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; - bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb; - bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; - bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; - bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h; - bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32"; - bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32"; - bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32"; - bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq; - bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; - bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond; - bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32"; - bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32"; - bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; - bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem; - bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32"; - bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; - bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe; - bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; - bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; - bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; - bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; - bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; - bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp; - bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h; - - bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int; - bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs; - bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg; - bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg; - - bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp; - bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp; - - bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd; - bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl; - bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr; - bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr; - - bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod; - bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod; - bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv; - bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv; -} diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c deleted file mode 100644 index 21b861b8244..00000000000 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ /dev/null @@ -1,1852 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -#include "si_shader_internal.h" -#include "si_pipe.h" -#include "sid.h" -#include "tgsi/tgsi_build.h" -#include "tgsi/tgsi_util.h" -#include "ac_llvm_util.h" - -static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data, - LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, - LLVMValueRef *fmask_ptr); - -/** - * Given a v8i32 resource descriptor for a buffer, extract the size of the - * buffer in number of elements and return it as an i32. - */ -static LLVMValueRef get_buffer_size( - struct lp_build_tgsi_context *bld_base, - LLVMValueRef descriptor) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef size = - LLVMBuildExtractElement(builder, descriptor, - LLVMConstInt(ctx->i32, 2, 0), ""); - - if (ctx->screen->info.chip_class == GFX8) { - /* On GFX8, the descriptor contains the size in bytes, - * but TXQ must return the size in elements. - * The stride is always non-zero for resources using TXQ. - */ - LLVMValueRef stride = - LLVMBuildExtractElement(builder, descriptor, - ctx->i32_1, ""); - stride = LLVMBuildLShr(builder, stride, - LLVMConstInt(ctx->i32, 16, 0), ""); - stride = LLVMBuildAnd(builder, stride, - LLVMConstInt(ctx->i32, 0x3FFF, 0), ""); - - size = LLVMBuildUDiv(builder, size, stride, ""); - } - - return size; -} - -static LLVMValueRef -shader_buffer_fetch_rsrc(struct si_shader_context *ctx, - const struct tgsi_full_src_register *reg, - bool ubo) -{ - LLVMValueRef index; - - if (!reg->Register.Indirect) { - index = LLVMConstInt(ctx->i32, reg->Register.Index, false); - } else { - index = si_get_indirect_index(ctx, ®->Indirect, - 1, reg->Register.Index); - } - - if (ubo) - return ctx->abi.load_ubo(&ctx->abi, index); - else - return ctx->abi.load_ssbo(&ctx->abi, index, false); -} - -static enum ac_image_dim -ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) -{ - switch (target) { - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: - if (screen->info.chip_class == GFX9) - return ac_image_2d; - return ac_image_1d; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOWRECT: - return ac_image_2d; - case TGSI_TEXTURE_3D: - return ac_image_3d; - case TGSI_TEXTURE_CUBE: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_CUBE_ARRAY: - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - return ac_image_cube; - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - if (screen->info.chip_class == GFX9) - return ac_image_2darray; - return ac_image_1darray; - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - return ac_image_2darray; - case TGSI_TEXTURE_2D_MSAA: - return ac_image_2dmsaa; - case TGSI_TEXTURE_2D_ARRAY_MSAA: - return ac_image_2darraymsaa; - default: - unreachable("unhandled texture type"); - } -} - -static enum ac_image_dim -ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target) -{ - enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target); - - /* Match the resource type set in the descriptor. */ - if (dim == ac_image_cube || - (screen->info.chip_class <= GFX8 && dim == ac_image_3d)) - dim = ac_image_2darray; - else if (target == TGSI_TEXTURE_2D && screen->info.chip_class == GFX9) { - /* When a single layer of a 3D texture is bound, the shader - * will refer to a 2D target, but the descriptor has a 3D type. - * Since the HW ignores BASE_ARRAY in this case, we need to - * send 3 coordinates. This doesn't hurt when the underlying - * texture is non-3D. - */ - dim = ac_image_3d; - } - - return dim; -} - -/** - * Given a 256-bit resource descriptor, force the DCC enable bit to off. - * - * At least on Tonga, executing image stores on images with DCC enabled and - * non-trivial can eventually lead to lockups. This can occur when an - * application binds an image as read-only but then uses a shader that writes - * to it. The OpenGL spec allows almost arbitrarily bad behavior (including - * program termination) in this case, but it doesn't cost much to be a bit - * nicer: disabling DCC in the shader still leads to undefined results but - * avoids the lockup. - */ -static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, - LLVMValueRef rsrc) -{ - if (ctx->screen->info.chip_class <= GFX7) { - return rsrc; - } else { - LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0); - LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0); - LLVMValueRef tmp; - - tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, ""); - tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, ""); - return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, ""); - } -} - -/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should - * adjust "index" to point to FMASK. */ -LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type desc_type, - bool uses_store, bool bindless) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef rsrc; - - if (desc_type == AC_DESC_BUFFER) { - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), - ctx->i32_1); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - } else { - assert(desc_type == AC_DESC_IMAGE || - desc_type == AC_DESC_FMASK); - } - - if (bindless) - rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index); - else - rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index); - - if (desc_type == AC_DESC_IMAGE && uses_store) - rsrc = force_dcc_off(ctx, rsrc); - return rsrc; -} - -/** - * Load the resource descriptor for \p image. - */ -static void -image_fetch_rsrc( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *image, - bool fmask, bool is_store, unsigned target, - LLVMValueRef *rsrc) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - bool bindless = image->Register.File != TGSI_FILE_IMAGE; - LLVMValueRef rsrc_ptr, index; - - if (bindless) { - /* Bindless descriptors are accessible from a different pair of - * user SGPR indices. - */ - rsrc_ptr = ac_get_arg(&ctx->ac, - ctx->bindless_samplers_and_images); - index = lp_build_emit_fetch_src(bld_base, image, TGSI_TYPE_UNSIGNED, 0); - - /* Bindless image descriptors use 16-dword slots. */ - index = LLVMBuildMul(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, 2, 0), ""); - /* FMASK is right after the image. */ - if (fmask) - index = LLVMBuildAdd(ctx->ac.builder, index, ctx->i32_1, ""); - } else { - rsrc_ptr = ac_get_arg(&ctx->ac, ctx->samplers_and_images); - - if (!image->Register.Indirect) { - index = LLVMConstInt(ctx->i32, image->Register.Index, 0); - } else { - /* From the GL_ARB_shader_image_load_store extension spec: - * - * If a shader performs an image load, store, or atomic - * operation using an image variable declared as an array, - * and if the index used to select an individual element is - * negative or greater than or equal to the size of the - * array, the results of the operation are undefined but may - * not lead to termination. - */ - index = si_get_bounded_indirect_index(ctx, &image->Indirect, - image->Register.Index, - ctx->num_images); - } - /* FMASKs are separate from images. */ - if (fmask) { - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), ""); - } - index = LLVMBuildSub(ctx->ac.builder, - LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0), - index, ""); - } - - *rsrc = si_load_image_desc(ctx, rsrc_ptr, index, - fmask ? AC_DESC_FMASK : - target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE, - is_store, bindless); -} - -static void image_fetch_coords( - struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - unsigned src, LLVMValueRef desc, - LLVMValueRef *coords) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned target = inst->Memory.Texture; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); - LLVMValueRef tmp; - int chan; - - for (chan = 0; chan < num_coords; ++chan) { - tmp = lp_build_emit_fetch(bld_base, inst, src, chan); - tmp = ac_to_integer(&ctx->ac, tmp); - coords[chan] = tmp; - } - - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - /* Need the sample index as well. */ - tmp = lp_build_emit_fetch(bld_base, inst, src, TGSI_SWIZZLE_W); - coords[chan] = ac_to_integer(&ctx->ac, tmp); - } - - if (ctx->screen->info.chip_class == GFX9) { - /* 1D textures are allocated and used as 2D on GFX9. */ - if (target == TGSI_TEXTURE_1D) { - coords[1] = ctx->i32_0; - } else if (target == TGSI_TEXTURE_1D_ARRAY) { - coords[2] = coords[1]; - coords[1] = ctx->i32_0; - } else if (target == TGSI_TEXTURE_2D) { - /* The hw can't bind a slice of a 3D image as a 2D - * image, because it ignores BASE_ARRAY if the target - * is 3D. The workaround is to read BASE_ARRAY and set - * it as the 3rd address operand for all 2D images. - */ - LLVMValueRef first_layer, const5, mask; - - const5 = LLVMConstInt(ctx->i32, 5, 0); - mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0); - first_layer = LLVMBuildExtractElement(builder, desc, const5, ""); - first_layer = LLVMBuildAnd(builder, first_layer, mask, ""); - - coords[2] = first_layer; - } - } -} - -static unsigned get_cache_policy(struct si_shader_context *ctx, - const struct tgsi_full_instruction *inst, - bool atomic, bool may_store_unaligned, - bool writeonly_memory) -{ - unsigned cache_policy = 0; - - if (!atomic && - /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. - * All store opcodes not aligned to a dword are affected. - * The only way to get unaligned stores in radeonsi is through - * shader images. */ - ((may_store_unaligned && ctx->screen->info.chip_class == GFX6) || - /* If this is write-only, don't keep data in L1 to prevent - * evicting L1 cache lines that may be needed by other - * instructions. */ - writeonly_memory || - inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))) { - cache_policy |= ac_glc; - } - - if (inst->Memory.Qualifier & TGSI_MEMORY_STREAM_CACHE_POLICY) - cache_policy |= ac_slc; - - return cache_policy; -} - -static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx, - const struct tgsi_full_instruction *inst, - LLVMTypeRef type, int arg) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef offset, ptr; - int addr_space; - - offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0); - offset = ac_to_integer(&ctx->ac, offset); - - ptr = ctx->ac.lds; - ptr = LLVMBuildGEP(builder, ptr, &offset, 1, ""); - addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)); - ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), ""); - - return ptr; -} - -static void load_emit_memory( - struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - unsigned writemask = inst->Dst[0].Register.WriteMask; - LLVMValueRef channels[4], ptr, derived_ptr, index; - int chan; - - ptr = get_memory_ptr(ctx, inst, ctx->f32, 1); - - for (chan = 0; chan < 4; ++chan) { - if (!(writemask & (1 << chan))) { - channels[chan] = LLVMGetUndef(ctx->f32); - continue; - } - - index = LLVMConstInt(ctx->i32, chan, 0); - derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, ""); - channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, ""); - } - emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4); -} - -/** - * Return true if the memory accessed by a LOAD or STORE instruction is - * read-only or write-only, respectively. - * - * \param shader_buffers_reverse_access_mask - * For LOAD, set this to (store | atomic) slot usage in the shader. - * For STORE, set this to (load | atomic) slot usage in the shader. - * \param images_reverse_access_mask Same as above, but for images. - * \param bindless_buffer_reverse_access_mask Same as above, but for bindless image buffers. - * \param bindless_image_reverse_access_mask Same as above, but for bindless images. - */ -static bool is_oneway_access_only(const struct tgsi_full_instruction *inst, - const struct tgsi_shader_info *info, - unsigned shader_buffers_reverse_access_mask, - unsigned images_reverse_access_mask, - bool bindless_buffer_reverse_access_mask, - bool bindless_image_reverse_access_mask) -{ - enum tgsi_file_type resource_file; - unsigned resource_index; - bool resource_indirect; - - if (inst->Instruction.Opcode == TGSI_OPCODE_STORE) { - resource_file = inst->Dst[0].Register.File; - resource_index = inst->Dst[0].Register.Index; - resource_indirect = inst->Dst[0].Register.Indirect; - } else { - resource_file = inst->Src[0].Register.File; - resource_index = inst->Src[0].Register.Index; - resource_indirect = inst->Src[0].Register.Indirect; - } - - assert(resource_file == TGSI_FILE_BUFFER || - resource_file == TGSI_FILE_IMAGE || - /* bindless image */ - resource_file == TGSI_FILE_INPUT || - resource_file == TGSI_FILE_OUTPUT || - resource_file == TGSI_FILE_CONSTANT || - resource_file == TGSI_FILE_TEMPORARY || - resource_file == TGSI_FILE_IMMEDIATE); - - assert(resource_file != TGSI_FILE_BUFFER || - inst->Memory.Texture == TGSI_TEXTURE_BUFFER); - - bool bindless = resource_file != TGSI_FILE_BUFFER && - resource_file != TGSI_FILE_IMAGE; - - /* RESTRICT means NOALIAS. - * If there are no writes, we can assume the accessed memory is read-only. - * If there are no reads, we can assume the accessed memory is write-only. - */ - if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT && !bindless) { - unsigned reverse_access_mask; - - if (resource_file == TGSI_FILE_BUFFER) { - reverse_access_mask = shader_buffers_reverse_access_mask; - } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - reverse_access_mask = info->images_buffers & - images_reverse_access_mask; - } else { - reverse_access_mask = ~info->images_buffers & - images_reverse_access_mask; - } - - if (resource_indirect) { - if (!reverse_access_mask) - return true; - } else { - if (!(reverse_access_mask & - (1u << resource_index))) - return true; - } - } - - /* If there are no buffer writes (for both shader buffers & image - * buffers), it implies that buffer memory is read-only. - * If there are no buffer reads (for both shader buffers & image - * buffers), it implies that buffer memory is write-only. - * - * Same for the case when there are no writes/reads for non-buffer - * images. - */ - if (resource_file == TGSI_FILE_BUFFER || - inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - if (!shader_buffers_reverse_access_mask && - !(info->images_buffers & images_reverse_access_mask) && - !bindless_buffer_reverse_access_mask) - return true; - } else { - if (!(~info->images_buffers & images_reverse_access_mask) && - !bindless_image_reverse_access_mask) - return true; - } - return false; -} - -static void load_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - const struct tgsi_shader_info *info = &ctx->shader->selector->info; - bool can_speculate = false; - LLVMValueRef vindex = ctx->i32_0; - LLVMValueRef voffset = ctx->i32_0; - struct ac_image_args args = {}; - - if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { - load_emit_memory(ctx, emit_data); - return; - } - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || - inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { - bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF; - args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo); - voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); - } else { - unsigned target = inst->Memory.Texture; - - image_fetch_rsrc(bld_base, &inst->Src[0], false, false, target, &args.resource); - image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); - - if ((inst->Memory.Texture == TGSI_TEXTURE_2D_MSAA || - inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA) && - !(ctx->screen->debug_flags & DBG(NO_FMASK))) { - LLVMValueRef fmask; - - image_fetch_rsrc(bld_base, &inst->Src[0], true, false, target, &fmask); - ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, - inst->Memory.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); - } - vindex = args.coords[0]; /* for buffers only */ - } - - if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) { - emit_data->output[emit_data->chan] = - ac_build_buffer_load(&ctx->ac, args.resource, - util_last_bit(inst->Dst[0].Register.WriteMask), - NULL, voffset, NULL, 0, 0, true, true); - return; - } - - if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); - - can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) && - is_oneway_access_only(inst, info, - info->shader_buffers_store | - info->shader_buffers_atomic, - info->images_store | - info->images_atomic, - info->uses_bindless_buffer_store | - info->uses_bindless_buffer_atomic, - info->uses_bindless_image_store | - info->uses_bindless_image_atomic); - args.cache_policy = get_cache_policy(ctx, inst, false, false, false); - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - /* Don't use SMEM for shader buffer loads, because LLVM doesn't - * select SMEM for SI.load.const with a non-constant offset, and - * constant offsets practically don't exist with shader buffers. - * - * Also, SI.load.const doesn't use inst_offset when it's lowered - * to VMEM, so we just end up with more VALU instructions in the end - * and no benefit. - * - * TODO: Remove this line once LLVM can select SMEM with a non-constant - * offset, and can derive inst_offset when VMEM is selected. - * After that, si_memory_barrier should invalidate sL1 for shader - * buffers. - */ - emit_data->output[emit_data->chan] = - ac_build_buffer_load(&ctx->ac, args.resource, - util_last_bit(inst->Dst[0].Register.WriteMask), - NULL, voffset, NULL, 0, - args.cache_policy, can_speculate, false); - return; - } - - if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); - LLVMValueRef result = - ac_build_buffer_load_format(&ctx->ac, - args.resource, - vindex, - ctx->i32_0, - num_channels, - args.cache_policy, - can_speculate); - emit_data->output[emit_data->chan] = - ac_build_expand_to_vec4(&ctx->ac, result, num_channels); - } else { - args.opcode = ac_image_load; - args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); - args.attributes = ac_get_load_intr_attribs(can_speculate); - args.dmask = 0xf; - - emit_data->output[emit_data->chan] = - ac_build_image_opcode(&ctx->ac, &args); - } -} - -static void store_emit_buffer(struct si_shader_context *ctx, - LLVMValueRef resource, - unsigned writemask, - LLVMValueRef value, - LLVMValueRef voffset, - unsigned cache_policy, - bool writeonly_memory) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef base_data = value; - LLVMValueRef base_offset = voffset; - - while (writemask) { - int start, count; - LLVMValueRef data, voff; - - u_bit_scan_consecutive_range(&writemask, &start, &count); - - if (count == 3 && ac_has_vec3_support(ctx->ac.chip_class, false)) { - LLVMValueRef values[3] = { - LLVMBuildExtractElement(builder, base_data, - LLVMConstInt(ctx->i32, start, 0), ""), - LLVMBuildExtractElement(builder, base_data, - LLVMConstInt(ctx->i32, start + 1, 0), ""), - LLVMBuildExtractElement(builder, base_data, - LLVMConstInt(ctx->i32, start + 2, 0), ""), - }; - data = ac_build_gather_values(&ctx->ac, values, 3); - } else if (count >= 3) { - data = base_data; - } else if (count == 2) { - LLVMValueRef values[2] = { - LLVMBuildExtractElement(builder, base_data, - LLVMConstInt(ctx->i32, start, 0), ""), - LLVMBuildExtractElement(builder, base_data, - LLVMConstInt(ctx->i32, start + 1, 0), ""), - }; - - data = ac_build_gather_values(&ctx->ac, values, 2); - } else { - assert(count == 1); - data = LLVMBuildExtractElement( - builder, base_data, - LLVMConstInt(ctx->i32, start, 0), ""); - } - - voff = base_offset; - if (start != 0) { - voff = LLVMBuildAdd( - builder, voff, - LLVMConstInt(ctx->i32, start * 4, 0), ""); - } - - ac_build_buffer_store_dword(&ctx->ac, resource, data, count, - voff, ctx->i32_0, 0, cache_policy); - } -} - -static void store_emit_memory( - struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data) -{ - const struct tgsi_full_instruction *inst = emit_data->inst; - LLVMBuilderRef builder = ctx->ac.builder; - unsigned writemask = inst->Dst[0].Register.WriteMask; - LLVMValueRef ptr, derived_ptr, data, index; - int chan; - - ptr = get_memory_ptr(ctx, inst, ctx->f32, 0); - - for (chan = 0; chan < 4; ++chan) { - if (!(writemask & (1 << chan))) { - continue; - } - data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan); - index = LLVMConstInt(ctx->i32, chan, 0); - derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, ""); - LLVMBuildStore(builder, data, derived_ptr); - } -} - -static void store_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - const struct tgsi_shader_info *info = &ctx->shader->selector->info; - struct tgsi_full_src_register resource_reg = - tgsi_full_src_register_from_dst(&inst->Dst[0]); - unsigned target = inst->Memory.Texture; - - if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) { - store_emit_memory(ctx, emit_data); - return; - } - - bool writeonly_memory = is_oneway_access_only(inst, info, - info->shader_buffers_load | - info->shader_buffers_atomic, - info->images_load | - info->images_atomic, - info->uses_bindless_buffer_load | - info->uses_bindless_buffer_atomic, - info->uses_bindless_image_load | - info->uses_bindless_image_atomic); - LLVMValueRef chans[4]; - LLVMValueRef vindex = ctx->i32_0; - LLVMValueRef voffset = ctx->i32_0; - struct ac_image_args args = {}; - - for (unsigned chan = 0; chan < 4; ++chan) - chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan); - - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - args.resource = shader_buffer_fetch_rsrc(ctx, &resource_reg, false); - voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 0, 0)); - } else { - image_fetch_rsrc(bld_base, &resource_reg, false, true, target, &args.resource); - image_fetch_coords(bld_base, inst, 0, args.resource, args.coords); - vindex = args.coords[0]; /* for buffers only */ - } - - if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) - ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE); - - bool is_image = inst->Dst[0].Register.File != TGSI_FILE_BUFFER; - args.cache_policy = get_cache_policy(ctx, inst, - false, /* atomic */ - is_image, /* may_store_unaligned */ - writeonly_memory); - - if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { - store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask, - ac_build_gather_values(&ctx->ac, chans, 4), - voffset, args.cache_policy, writeonly_memory); - return; - } - - if (target == TGSI_TEXTURE_BUFFER) { - unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask); - - ac_build_buffer_store_format(&ctx->ac, args.resource, - ac_build_gather_values(&ctx->ac, chans, num_channels), - vindex, ctx->i32_0 /* voffset */, - num_channels, - args.cache_policy); - } else { - args.opcode = ac_image_store; - args.data[0] = ac_build_gather_values(&ctx->ac, chans, 4); - args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); - args.attributes = AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY; - args.dmask = 0xf; - - emit_data->output[emit_data->chan] = - ac_build_image_opcode(&ctx->ac, &args); - } -} - -static void atomic_emit_memory(struct si_shader_context *ctx, - struct lp_build_emit_data *emit_data) { - LLVMBuilderRef builder = ctx->ac.builder; - const struct tgsi_full_instruction * inst = emit_data->inst; - LLVMValueRef ptr, result, arg; - const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup"; - - ptr = get_memory_ptr(ctx, inst, ctx->i32, 1); - - arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0); - arg = ac_to_integer(&ctx->ac, arg); - - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - LLVMValueRef new_data; - new_data = lp_build_emit_fetch(&ctx->bld_base, - inst, 3, 0); - - new_data = ac_to_integer(&ctx->ac, new_data); - - result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, arg, new_data, - sync_scope); - result = LLVMBuildExtractValue(builder, result, 0, ""); - } else { - LLVMAtomicRMWBinOp op; - - switch(inst->Instruction.Opcode) { - case TGSI_OPCODE_ATOMUADD: - op = LLVMAtomicRMWBinOpAdd; - break; - case TGSI_OPCODE_ATOMXCHG: - op = LLVMAtomicRMWBinOpXchg; - break; - case TGSI_OPCODE_ATOMAND: - op = LLVMAtomicRMWBinOpAnd; - break; - case TGSI_OPCODE_ATOMOR: - op = LLVMAtomicRMWBinOpOr; - break; - case TGSI_OPCODE_ATOMXOR: - op = LLVMAtomicRMWBinOpXor; - break; - case TGSI_OPCODE_ATOMUMIN: - op = LLVMAtomicRMWBinOpUMin; - break; - case TGSI_OPCODE_ATOMUMAX: - op = LLVMAtomicRMWBinOpUMax; - break; - case TGSI_OPCODE_ATOMIMIN: - op = LLVMAtomicRMWBinOpMin; - break; - case TGSI_OPCODE_ATOMIMAX: - op = LLVMAtomicRMWBinOpMax; - break; - default: - unreachable("unknown atomic opcode"); - } - - result = ac_build_atomic_rmw(&ctx->ac, op, ptr, arg, sync_scope); - } - emit_data->output[emit_data->chan] = - LLVMBuildBitCast(builder, result, ctx->f32, ""); -} - -static void atomic_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction * inst = emit_data->inst; - struct ac_image_args args = {}; - unsigned num_data = 0; - LLVMValueRef vindex = ctx->i32_0; - LLVMValueRef voffset = ctx->i32_0; - - if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) { - atomic_emit_memory(ctx, emit_data); - return; - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order - * of arguments, which is reversed relative to TGSI (and GLSL) - */ - args.data[num_data++] = - ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 3, 0)); - } - - args.data[num_data++] = - ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 2, 0)); - - args.cache_policy = get_cache_policy(ctx, inst, true, false, false); - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); - voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); - } else { - image_fetch_rsrc(bld_base, &inst->Src[0], false, true, - inst->Memory.Texture, &args.resource); - image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); - vindex = args.coords[0]; /* for buffers only */ - } - - if (inst->Src[0].Register.File != TGSI_FILE_BUFFER && - inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - LLVMValueRef buf_args[7]; - unsigned num_args = 0; - - buf_args[num_args++] = args.data[0]; - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - buf_args[num_args++] = args.data[1]; - - buf_args[num_args++] = args.resource; - buf_args[num_args++] = vindex; - buf_args[num_args++] = voffset; - buf_args[num_args++] = ctx->i32_0; /* soffset */ - buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0); - - char intrinsic_name[64]; - snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name); - emit_data->output[emit_data->chan] = - ac_to_float(&ctx->ac, - ac_build_intrinsic(&ctx->ac, intrinsic_name, - ctx->i32, buf_args, num_args, 0)); - return; - } - - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { - LLVMValueRef buf_args[7]; - unsigned num_args = 0; - - buf_args[num_args++] = args.data[0]; - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - buf_args[num_args++] = args.data[1]; - - buf_args[num_args++] = args.resource; - buf_args[num_args++] = vindex; - buf_args[num_args++] = voffset; - buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false; - - char intrinsic_name[40]; - snprintf(intrinsic_name, sizeof(intrinsic_name), - "llvm.amdgcn.buffer.atomic.%s", action->intr_name); - emit_data->output[emit_data->chan] = - ac_to_float(&ctx->ac, - ac_build_intrinsic(&ctx->ac, intrinsic_name, - ctx->i32, buf_args, num_args, 0)); - } else { - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - args.opcode = ac_image_atomic_cmpswap; - } else { - args.opcode = ac_image_atomic; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break; - case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break; - case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break; - case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break; - case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break; - case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break; - case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break; - case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break; - case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break; - case TGSI_OPCODE_ATOMINC_WRAP: - args.atomic = ac_atomic_inc_wrap; - break; - case TGSI_OPCODE_ATOMDEC_WRAP: - args.atomic = ac_atomic_dec_wrap; - break; - default: unreachable("unhandled image atomic"); - } - } - - args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); - emit_data->output[emit_data->chan] = - ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args)); - } -} - -static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, - unsigned target, LLVMValueRef out) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->info.chip_class == GFX9 && - (target == TGSI_TEXTURE_1D_ARRAY || - target == TGSI_TEXTURE_SHADOW1D_ARRAY)) { - LLVMValueRef layers = - LLVMBuildExtractElement(builder, out, - LLVMConstInt(ctx->i32, 2, 0), ""); - out = LLVMBuildInsertElement(builder, out, layers, - ctx->i32_1, ""); - } - - /* Divide the number of layers by 6 to get the number of cubes. */ - if (target == TGSI_TEXTURE_CUBE_ARRAY || - target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0); - - LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, ""); - z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), ""); - - out = LLVMBuildInsertElement(builder, out, z, imm2, ""); - } - return out; -} - -static void resq_emit( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - const struct tgsi_full_instruction *inst = emit_data->inst; - const struct tgsi_full_src_register *reg = - &inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0]; - - if (reg->Register.File == TGSI_FILE_BUFFER) { - LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false); - - emit_data->output[emit_data->chan] = - LLVMBuildExtractElement(builder, rsrc, - LLVMConstInt(ctx->i32, 2, 0), ""); - return; - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && - inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { - LLVMValueRef rsrc; - - tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL); - /* Read the size from the buffer descriptor directly. */ - emit_data->output[emit_data->chan] = - get_buffer_size(bld_base, rsrc); - return; - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ && - inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { - LLVMValueRef rsrc; - - image_fetch_rsrc(bld_base, reg, false, false, inst->Memory.Texture, &rsrc); - emit_data->output[emit_data->chan] = - get_buffer_size(bld_base, rsrc); - return; - } - - unsigned target; - - if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { - target = inst->Texture.Texture; - } else { - if (inst->Memory.Texture == TGSI_TEXTURE_3D) - target = TGSI_TEXTURE_2D_ARRAY; - else - target = inst->Memory.Texture; - } - - struct ac_image_args args = {}; - args.opcode = ac_image_get_resinfo; - args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - args.dmask = 0xf; - args.attributes = AC_FUNC_ATTR_READNONE; - - if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { - tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL); - args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); - } else { - image_fetch_rsrc(bld_base, reg, false, false, target, &args.resource); - args.lod = ctx->i32_0; - } - - emit_data->output[emit_data->chan] = - fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args)); - - if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ && - (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA)) { - LLVMValueRef samples = - ac_build_image_get_sample_count(&ctx->ac, args.resource); - - emit_data->output[emit_data->chan] = - LLVMBuildInsertElement(ctx->ac.builder, - emit_data->output[emit_data->chan], - samples, - LLVMConstInt(ctx->i32, 3, 0), ""); - } -} - -/** - * Load an image view, fmask view. or sampler state descriptor. - */ -LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, - LLVMValueRef list, LLVMValueRef index, - enum ac_descriptor_type type) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - switch (type) { - case AC_DESC_IMAGE: - /* The image is at [0:7]. */ - index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); - break; - case AC_DESC_BUFFER: - /* The buffer is in [4:7]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), - ctx->i32_1); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - break; - case AC_DESC_FMASK: - /* The FMASK is at [8:15]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0), - ctx->i32_1); - break; - case AC_DESC_SAMPLER: - /* The sampler state is at [12:15]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0), - LLVMConstInt(ctx->i32, 3, 0)); - list = LLVMBuildPointerCast(builder, list, - ac_array_in_const32_addr_space(ctx->v4i32), ""); - break; - case AC_DESC_PLANE_0: - case AC_DESC_PLANE_1: - case AC_DESC_PLANE_2: - /* Only used for the multiplane image support for Vulkan. Should - * never be reached in radeonsi. - */ - unreachable("Plane descriptor requested in radeonsi."); - } - - return ac_build_load_to_sgpr(&ctx->ac, list, index); -} - -/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. - * - * GFX6-GFX7: - * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic - * filtering manually. The driver sets img7 to a mask clearing - * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do: - * s_and_b32 samp0, samp0, img7 - * - * GFX8: - * The ANISO_OVERRIDE sampler field enables this fix in TA. - */ -static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx, - LLVMValueRef res, LLVMValueRef samp) -{ - LLVMValueRef img7, samp0; - - if (ctx->screen->info.chip_class >= GFX8) - return samp; - - img7 = LLVMBuildExtractElement(ctx->ac.builder, res, - LLVMConstInt(ctx->i32, 7, 0), ""); - samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp, - ctx->i32_0, ""); - samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, ""); - return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0, - ctx->i32_0, ""); -} - -static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data, - LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, - LLVMValueRef *fmask_ptr) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images); - const struct tgsi_full_instruction *inst = emit_data->inst; - const struct tgsi_full_src_register *reg; - unsigned target = inst->Texture.Texture; - unsigned sampler_src; - LLVMValueRef index; - - sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1; - reg = &emit_data->inst->Src[sampler_src]; - - if (reg->Register.Indirect) { - index = si_get_bounded_indirect_index(ctx, - ®->Indirect, - reg->Register.Index, - ctx->num_samplers); - index = LLVMBuildAdd(ctx->ac.builder, index, - LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), ""); - } else { - index = LLVMConstInt(ctx->i32, - si_get_sampler_slot(reg->Register.Index), 0); - } - - if (reg->Register.File != TGSI_FILE_SAMPLER) { - /* Bindless descriptors are accessible from a different pair of - * user SGPR indices. - */ - list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images); - index = lp_build_emit_fetch_src(bld_base, reg, - TGSI_TYPE_UNSIGNED, 0); - - /* Since bindless handle arithmetic can contain an unsigned integer - * wraparound and si_load_sampler_desc assumes there isn't any, - * use GEP without "inbounds" (inside ac_build_pointer_add) - * to prevent incorrect code generation and hangs. - */ - index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->i32, 2, 0), ""); - list = ac_build_pointer_add(&ctx->ac, list, index); - index = ctx->i32_0; - } - - if (target == TGSI_TEXTURE_BUFFER) - *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER); - else - *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE); - - if (samp_ptr) - *samp_ptr = NULL; - if (fmask_ptr) - *fmask_ptr = NULL; - - if (target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - if (fmask_ptr) - *fmask_ptr = si_load_sampler_desc(ctx, list, index, - AC_DESC_FMASK); - } else if (target != TGSI_TEXTURE_BUFFER) { - if (samp_ptr) { - *samp_ptr = si_load_sampler_desc(ctx, list, index, - AC_DESC_SAMPLER); - *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr); - } - } -} - -/* Gather4 should follow the same rules as bilinear filtering, but the hardware - * incorrectly forces nearest filtering if the texture format is integer. - * The only effect it has on Gather4, which always returns 4 texels for - * bilinear filtering, is that the final coordinates are off by 0.5 of - * the texel size. - * - * The workaround is to subtract 0.5 from the unnormalized coordinates, - * or (0.5 / size) from the normalized coordinates. - * - * However, cube textures with 8_8_8_8 data formats require a different - * workaround of overriding the num format to USCALED/SSCALED. This would lose - * precision in 32-bit data formats, so it needs to be applied dynamically at - * runtime. In this case, return an i1 value that indicates whether the - * descriptor was overridden (and hence a fixup of the sampler result is needed). - */ -static LLVMValueRef -si_lower_gather4_integer(struct si_shader_context *ctx, - struct ac_image_args *args, - unsigned target, - enum tgsi_return_type return_type) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef wa_8888 = NULL; - LLVMValueRef half_texel[2]; - - assert(return_type == TGSI_RETURN_TYPE_SINT || - return_type == TGSI_RETURN_TYPE_UINT); - - if (target == TGSI_TEXTURE_CUBE || - target == TGSI_TEXTURE_CUBE_ARRAY) { - LLVMValueRef formats; - LLVMValueRef data_format; - LLVMValueRef wa_formats; - - formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, ""); - - data_format = LLVMBuildLShr(builder, formats, - LLVMConstInt(ctx->i32, 20, false), ""); - data_format = LLVMBuildAnd(builder, data_format, - LLVMConstInt(ctx->i32, (1u << 6) - 1, false), ""); - wa_8888 = LLVMBuildICmp( - builder, LLVMIntEQ, data_format, - LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), - ""); - - uint32_t wa_num_format = - return_type == TGSI_RETURN_TYPE_UINT ? - S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) : - S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED); - wa_formats = LLVMBuildAnd(builder, formats, - LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), - ""); - wa_formats = LLVMBuildOr(builder, wa_formats, - LLVMConstInt(ctx->i32, wa_num_format, false), ""); - - formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, ""); - args->resource = LLVMBuildInsertElement( - builder, args->resource, formats, ctx->i32_1, ""); - } - - if (target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT) { - assert(!wa_8888); - half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); - } else { - struct ac_image_args resinfo = {}; - struct lp_build_if_state if_ctx; - - if (wa_8888) { - /* Skip the texture size query entirely if we don't need it. */ - lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, "")); - } - - /* Query the texture size. */ - resinfo.opcode = ac_image_get_resinfo; - resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - resinfo.resource = args->resource; - resinfo.sampler = args->sampler; - resinfo.lod = ctx->ac.i32_0; - resinfo.dmask = 0xf; - resinfo.attributes = AC_FUNC_ATTR_READNONE; - - LLVMValueRef texsize = - fix_resinfo(ctx, target, - ac_build_image_opcode(&ctx->ac, &resinfo)); - - /* Compute -0.5 / size. */ - for (unsigned c = 0; c < 2; c++) { - half_texel[c] = - LLVMBuildExtractElement(builder, texsize, - LLVMConstInt(ctx->i32, c, 0), ""); - half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, ""); - half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]); - half_texel[c] = LLVMBuildFMul(builder, half_texel[c], - LLVMConstReal(ctx->f32, -0.5), ""); - } - - if (wa_8888) { - lp_build_endif(&if_ctx); - - LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; - - for (unsigned c = 0; c < 2; c++) { - LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; - half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, - values, bb); - } - } - } - - for (unsigned c = 0; c < 2; c++) { - LLVMValueRef tmp; - tmp = ac_to_float(&ctx->ac, args->coords[c]); - tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); - args->coords[c] = ac_to_integer(&ctx->ac, tmp); - } - - return wa_8888; -} - -/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the - * result after the gather operation. - */ -static LLVMValueRef -si_fix_gather4_integer_result(struct si_shader_context *ctx, - LLVMValueRef result, - enum tgsi_return_type return_type, - LLVMValueRef wa) -{ - LLVMBuilderRef builder = ctx->ac.builder; - - assert(return_type == TGSI_RETURN_TYPE_SINT || - return_type == TGSI_RETURN_TYPE_UINT); - - for (unsigned chan = 0; chan < 4; ++chan) { - LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false); - LLVMValueRef value; - LLVMValueRef wa_value; - - value = LLVMBuildExtractElement(builder, result, chanv, ""); - - if (return_type == TGSI_RETURN_TYPE_UINT) - wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, ""); - else - wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, ""); - wa_value = ac_to_float(&ctx->ac, wa_value); - value = LLVMBuildSelect(builder, wa, wa_value, value, ""); - - result = LLVMBuildInsertElement(builder, result, value, chanv, ""); - } - - return result; -} - -static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_instruction *inst = emit_data->inst; - unsigned opcode = inst->Instruction.Opcode; - unsigned target = inst->Texture.Texture; - struct ac_image_args args = {}; - int ref_pos = tgsi_util_get_shadow_ref_src_index(target); - unsigned chan; - bool has_offset = inst->Texture.NumOffsets > 0; - LLVMValueRef fmask_ptr = NULL; - - tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr); - - if (target == TGSI_TEXTURE_BUFFER) { - LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); - unsigned num_channels = - util_last_bit(inst->Dst[0].Register.WriteMask); - LLVMValueRef result = - ac_build_buffer_load_format(&ctx->ac, - args.resource, - vindex, - ctx->i32_0, - num_channels, 0, true); - emit_data->output[emit_data->chan] = - ac_build_expand_to_vec4(&ctx->ac, result, num_channels); - return; - } - - /* Fetch and project texture coordinates */ - args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W); - for (chan = 0; chan < 3; chan++) { - args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); - if (opcode == TGSI_OPCODE_TXP) - args.coords[chan] = ac_build_fdiv(&ctx->ac, - args.coords[chan], args.coords[3]); - } - - if (opcode == TGSI_OPCODE_TXP) - args.coords[3] = ctx->ac.f32_1; - - /* Pack offsets. */ - if (has_offset && - opcode != TGSI_OPCODE_TXF && - opcode != TGSI_OPCODE_TXF_LZ) { - /* The offsets are six-bit signed integers packed like this: - * X=[5:0], Y=[13:8], and Z=[21:16]. - */ - LLVMValueRef offset[3], pack; - - assert(inst->Texture.NumOffsets == 1); - - for (chan = 0; chan < 3; chan++) { - offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan); - offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->i32, 0x3f, 0), ""); - if (chan) - offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan], - LLVMConstInt(ctx->i32, chan*8, 0), ""); - } - - pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); - pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - args.offset = pack; - } - - /* Pack LOD bias value */ - if (opcode == TGSI_OPCODE_TXB) - args.bias = args.coords[3]; - if (opcode == TGSI_OPCODE_TXB2) - args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - - /* Pack depth comparison value */ - if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { - LLVMValueRef z; - - if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - } else { - assert(ref_pos >= 0); - z = args.coords[ref_pos]; - } - - /* Section 8.23.1 (Depth Texture Comparison Mode) of the - * OpenGL 4.5 spec says: - * - * "If the texture’s internal format indicates a fixed-point - * depth texture, then D_t and D_ref are clamped to the - * range [0, 1]; otherwise no clamping is performed." - * - * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here for GFX8-9; GFX10 has - * an explicitly clamped 32-bit float format. - */ - if (ctx->screen->info.chip_class >= GFX8 && - ctx->screen->info.chip_class <= GFX9) { - LLVMValueRef upgraded; - LLVMValueRef clamped; - upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, - LLVMConstInt(ctx->i32, 3, false), ""); - upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, - LLVMConstInt(ctx->i32, 29, false), ""); - upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, ""); - clamped = ac_build_clamp(&ctx->ac, z); - z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); - } - - args.compare = z; - } - - /* Pack user derivatives */ - if (opcode == TGSI_OPCODE_TXD) { - int param, num_src_deriv_channels, num_dst_deriv_channels; - - switch (target) { - case TGSI_TEXTURE_3D: - num_src_deriv_channels = 3; - num_dst_deriv_channels = 3; - break; - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - num_src_deriv_channels = 2; - num_dst_deriv_channels = 2; - break; - case TGSI_TEXTURE_CUBE: - case TGSI_TEXTURE_SHADOWCUBE: - case TGSI_TEXTURE_CUBE_ARRAY: - case TGSI_TEXTURE_SHADOWCUBE_ARRAY: - /* Cube derivatives will be converted to 2D. */ - num_src_deriv_channels = 3; - num_dst_deriv_channels = 3; - break; - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - num_src_deriv_channels = 1; - - /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->info.chip_class == GFX9) { - num_dst_deriv_channels = 2; - } else { - num_dst_deriv_channels = 1; - } - break; - default: - unreachable("invalid target"); - } - - for (param = 0; param < 2; param++) { - for (chan = 0; chan < num_src_deriv_channels; chan++) - args.derivs[param * num_dst_deriv_channels + chan] = - lp_build_emit_fetch(bld_base, inst, param+1, chan); - - /* Fill in the rest with zeros. */ - for (chan = num_src_deriv_channels; - chan < num_dst_deriv_channels; chan++) - args.derivs[param * num_dst_deriv_channels + chan] = - ctx->ac.f32_0; - } - } - - if (target == TGSI_TEXTURE_CUBE || - target == TGSI_TEXTURE_CUBE_ARRAY || - target == TGSI_TEXTURE_SHADOWCUBE || - target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { - ac_prepare_cube_coords(&ctx->ac, - opcode == TGSI_OPCODE_TXD, - target == TGSI_TEXTURE_CUBE_ARRAY || - target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, - opcode == TGSI_OPCODE_LODQ, - args.coords, args.derivs); - } else if (tgsi_is_array_sampler(target) && - opcode != TGSI_OPCODE_TXF && - opcode != TGSI_OPCODE_TXF_LZ && - ctx->screen->info.chip_class <= GFX8) { - unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; - args.coords[array_coord] = ac_build_round(&ctx->ac, args.coords[array_coord]); - } - - /* 1D textures are allocated and used as 2D on GFX9. */ - if (ctx->screen->info.chip_class == GFX9) { - LLVMValueRef filler; - - /* Use 0.5, so that we don't sample the border color. */ - if (opcode == TGSI_OPCODE_TXF || - opcode == TGSI_OPCODE_TXF_LZ) - filler = ctx->i32_0; - else - filler = LLVMConstReal(ctx->f32, 0.5); - - if (target == TGSI_TEXTURE_1D || - target == TGSI_TEXTURE_SHADOW1D) { - args.coords[1] = filler; - } else if (target == TGSI_TEXTURE_1D_ARRAY || - target == TGSI_TEXTURE_SHADOW1D_ARRAY) { - args.coords[2] = args.coords[1]; - args.coords[1] = filler; - } - } - - /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL) - args.lod = args.coords[3]; - else if (opcode == TGSI_OPCODE_TXL2) - args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - else if (opcode == TGSI_OPCODE_TXF) { - if (target == TGSI_TEXTURE_2D_MSAA) { - /* No LOD, but move sample index into the right place. */ - args.coords[2] = args.coords[3]; - } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) { - args.lod = args.coords[3]; - } - } - - if ((target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA) && - !(ctx->screen->debug_flags & DBG(NO_FMASK))) { - ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords, - target == TGSI_TEXTURE_2D_ARRAY_MSAA); - } - - if (opcode == TGSI_OPCODE_TXF || - opcode == TGSI_OPCODE_TXF_LZ) { - /* add tex offsets */ - if (inst->Texture.NumOffsets) { - const struct tgsi_texture_offset *off = inst->TexOffsets; - - assert(inst->Texture.NumOffsets == 1); - - switch (target) { - case TGSI_TEXTURE_3D: - args.coords[2] = - LLVMBuildAdd(ctx->ac.builder, args.coords[2], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], ""); - /* fall through */ - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_SHADOW2D: - case TGSI_TEXTURE_RECT: - case TGSI_TEXTURE_SHADOWRECT: - case TGSI_TEXTURE_2D_ARRAY: - case TGSI_TEXTURE_SHADOW2D_ARRAY: - args.coords[1] = - LLVMBuildAdd(ctx->ac.builder, args.coords[1], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], ""); - /* fall through */ - case TGSI_TEXTURE_1D: - case TGSI_TEXTURE_SHADOW1D: - case TGSI_TEXTURE_1D_ARRAY: - case TGSI_TEXTURE_SHADOW1D_ARRAY: - args.coords[0] = - LLVMBuildAdd(ctx->ac.builder, args.coords[0], - ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], ""); - break; - /* texture offsets do not apply to other texture targets */ - } - } - } - - if (opcode == TGSI_OPCODE_TG4) { - unsigned gather_comp = 0; - - /* DMASK was repurposed for GATHER4. 4 components are always - * returned and DMASK works like a swizzle - it selects - * the component to fetch. The only valid DMASK values are - * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns - * (red,red,red,red) etc.) The ISA document doesn't mention - * this. - */ - - /* Get the component index from src1.x for Gather4. */ - if (!tgsi_is_shadow_target(target)) { - LLVMValueRef comp_imm; - struct tgsi_src_register src1 = inst->Src[1].Register; - - assert(src1.File == TGSI_FILE_IMMEDIATE); - - comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX]; - gather_comp = LLVMConstIntGetZExtValue(comp_imm); - gather_comp = CLAMP(gather_comp, 0, 3); - } - - args.dmask = 1 << gather_comp; - } else { - args.dmask = 0xf; - } - - args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - args.unorm = target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT; - args.opcode = ac_image_sample; - - switch (opcode) { - case TGSI_OPCODE_TXF: - case TGSI_OPCODE_TXF_LZ: - args.opcode = opcode == TGSI_OPCODE_TXF_LZ || - target == TGSI_TEXTURE_2D_MSAA || - target == TGSI_TEXTURE_2D_ARRAY_MSAA ? - ac_image_load : ac_image_load_mip; - break; - case TGSI_OPCODE_LODQ: - args.opcode = ac_image_get_lod; - break; - case TGSI_OPCODE_TEX: - case TGSI_OPCODE_TEX2: - case TGSI_OPCODE_TXP: - if (ctx->type != PIPE_SHADER_FRAGMENT) - args.level_zero = true; - break; - case TGSI_OPCODE_TEX_LZ: - args.level_zero = true; - break; - case TGSI_OPCODE_TXB: - case TGSI_OPCODE_TXB2: - assert(ctx->type == PIPE_SHADER_FRAGMENT); - break; - case TGSI_OPCODE_TXL: - case TGSI_OPCODE_TXL2: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_TG4: - args.opcode = ac_image_gather4; - args.level_zero = true; - break; - default: - assert(0); - return; - } - - /* The hardware needs special lowering for Gather4 with integer formats. */ - LLVMValueRef gather4_int_result_workaround = NULL; - - if (ctx->screen->info.chip_class <= GFX8 && - opcode == TGSI_OPCODE_TG4) { - assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN); - - if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT || - inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) { - gather4_int_result_workaround = - si_lower_gather4_integer(ctx, &args, target, - inst->Texture.ReturnType); - } - } - - args.attributes = AC_FUNC_ATTR_READNONE; - LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args); - - if (gather4_int_result_workaround) { - result = si_fix_gather4_integer_result(ctx, result, - inst->Texture.ReturnType, - gather4_int_result_workaround); - } - - emit_data->output[emit_data->chan] = result; -} - -static void si_llvm_emit_txqs( - const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef rsrc; - - tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL); - - rsrc = LLVMBuildBitCast(ctx->ac.builder, rsrc, ctx->v8i32, ""); - emit_data->output[emit_data->chan] = - ac_build_image_get_sample_count(&ctx->ac, rsrc); -} - -static LLVMValueRef si_llvm_emit_fbfetch(struct si_shader_context *ctx) -{ - struct ac_image_args args = {}; - LLVMValueRef ptr, image, fmask; - - /* Ignore src0, because KHR_blend_func_extended disallows multiple render - * targets. - */ - - /* Load the image descriptor. */ - STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0); - ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers); - ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr, - ac_array_in_const32_addr_space(ctx->v8i32), ""); - image = ac_build_load_to_sgpr(&ctx->ac, ptr, - LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); - - unsigned chan = 0; - - args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16); - - if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) - args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16); - - /* Get the current render target layer index. */ - if (ctx->shader->key.mono.u.ps.fbfetch_layered) - args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11); - - if (ctx->shader->key.mono.u.ps.fbfetch_msaa) - args.coords[chan++] = si_get_sample_id(ctx); - - if (ctx->shader->key.mono.u.ps.fbfetch_msaa && - !(ctx->screen->debug_flags & DBG(NO_FMASK))) { - fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, - LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); - - ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, - ctx->shader->key.mono.u.ps.fbfetch_layered); - } - - args.opcode = ac_image_load; - args.resource = image; - args.dmask = 0xf; - args.attributes = AC_FUNC_ATTR_READNONE; - - if (ctx->shader->key.mono.u.ps.fbfetch_msaa) - args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? - ac_image_2darraymsaa : ac_image_2dmsaa; - else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D) - args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? - ac_image_1darray : ac_image_1d; - else - args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? - ac_image_2darray : ac_image_2d; - - return ac_build_image_opcode(&ctx->ac, &args); -} - -static void si_tgsi_emit_fbfetch(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - emit_data->output[emit_data->chan] = si_llvm_emit_fbfetch(ctx); -} - -LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi) -{ - struct si_shader_context *ctx = si_shader_context_from_abi(abi); - - return si_llvm_emit_fbfetch(ctx); -} - -/** - * Setup actions for TGSI memory opcode, including texture opcodes. - */ -void si_shader_context_init_mem(struct si_shader_context *ctx) -{ - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - - bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit; - bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic; - bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs; - - bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_tgsi_emit_fbfetch; - - bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit; - bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit; - bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit; - - bld_base->op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add"; - bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap"; - bld_base->op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap"; - bld_base->op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and"; - bld_base->op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or"; - bld_base->op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor"; - bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin"; - bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax"; - bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin"; - bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax"; - bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMINC_WRAP].intr_name = "inc"; - bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].emit = atomic_emit; - bld_base->op_actions[TGSI_OPCODE_ATOMDEC_WRAP].intr_name = "dec"; -} diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c deleted file mode 100644 index 1443432d593..00000000000 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c +++ /dev/null @@ -1,1165 +0,0 @@ -/* - * Copyright 2016 Advanced Micro Devices, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "si_shader_internal.h" -#include "si_pipe.h" -#include "ac_llvm_util.h" -#include "util/u_memory.h" - -struct si_llvm_diagnostics { - struct pipe_debug_callback *debug; - unsigned retval; -}; - -static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context) -{ - struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context; - LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di); - const char *severity_str = NULL; - - switch (severity) { - case LLVMDSError: - severity_str = "error"; - break; - case LLVMDSWarning: - severity_str = "warning"; - break; - case LLVMDSRemark: - case LLVMDSNote: - default: - return; - } - - char *description = LLVMGetDiagInfoDescription(di); - - pipe_debug_message(diag->debug, SHADER_INFO, - "LLVM diagnostic (%s): %s", severity_str, description); - - if (severity == LLVMDSError) { - diag->retval = 1; - fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description); - } - - LLVMDisposeMessage(description); -} - -/** - * Compile an LLVM module to machine code. - * - * @returns 0 for success, 1 for failure - */ -unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary, - struct ac_llvm_compiler *compiler, - struct pipe_debug_callback *debug, - bool less_optimized, unsigned wave_size) -{ - struct ac_compiler_passes *passes = compiler->passes; - - if (wave_size == 32) - passes = compiler->passes_wave32; - else if (less_optimized && compiler->low_opt_passes) - passes = compiler->low_opt_passes; - - struct si_llvm_diagnostics diag; - LLVMContextRef llvm_ctx; - - diag.debug = debug; - diag.retval = 0; - - /* Setup Diagnostic Handler*/ - llvm_ctx = LLVMGetModuleContext(M); - - LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag); - - /* Compile IR. */ - if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer, - &binary->elf_size)) - diag.retval = 1; - - if (diag.retval != 0) - pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed"); - return diag.retval; -} - -void si_shader_binary_clean(struct si_shader_binary *binary) -{ - free((void *)binary->elf_buffer); - binary->elf_buffer = NULL; - - free(binary->llvm_ir_string); - binary->llvm_ir_string = NULL; -} - -LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - switch (type) { - case TGSI_TYPE_UNSIGNED: - case TGSI_TYPE_SIGNED: - return ctx->ac.i32; - case TGSI_TYPE_UNSIGNED64: - case TGSI_TYPE_SIGNED64: - return ctx->ac.i64; - case TGSI_TYPE_DOUBLE: - return ctx->ac.f64; - case TGSI_TYPE_UNTYPED: - case TGSI_TYPE_FLOAT: - return ctx->ac.f32; - default: break; - } - return 0; -} - -LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base, - enum tgsi_opcode_type type, LLVMValueRef value) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type); - - if (dst_type) - return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, ""); - else - return value; -} - -/** - * Return a value that is equal to the given i32 \p index if it lies in [0,num) - * or an undefined value in the same interval otherwise. - */ -LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, - LLVMValueRef index, - unsigned num) -{ - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0); - LLVMValueRef cc; - - if (util_is_power_of_two_or_zero(num)) { - index = LLVMBuildAnd(builder, index, c_max, ""); - } else { - /* In theory, this MAX pattern should result in code that is - * as good as the bit-wise AND above. - * - * In practice, LLVM generates worse code (at the time of - * writing), because its value tracking is not strong enough. - */ - cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, ""); - index = LLVMBuildSelect(builder, cc, index, c_max, ""); - } - - return index; -} - -static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef swizzles[4]; - - swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0); - swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0); - swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0); - swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0); - - return LLVMBuildShuffleVector(ctx->ac.builder, - value, - LLVMGetUndef(LLVMTypeOf(value)), - LLVMConstVector(swizzles, 4), ""); -} - -/** - * Return the description of the array covering the given temporary register - * index. - */ -static unsigned -get_temp_array_id(struct lp_build_tgsi_context *bld_base, - unsigned reg_index, - const struct tgsi_ind_register *reg) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY]; - unsigned i; - - if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays) - return reg->ArrayID; - - for (i = 0; i < num_arrays; i++) { - const struct tgsi_array_info *array = &ctx->temp_arrays[i]; - - if (reg_index >= array->range.First && reg_index <= array->range.Last) - return i + 1; - } - - return 0; -} - -static struct tgsi_declaration_range -get_array_range(struct lp_build_tgsi_context *bld_base, - unsigned File, unsigned reg_index, - const struct tgsi_ind_register *reg) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - struct tgsi_declaration_range range; - - if (File == TGSI_FILE_TEMPORARY) { - unsigned array_id = get_temp_array_id(bld_base, reg_index, reg); - if (array_id) - return ctx->temp_arrays[array_id - 1].range; - } - - range.First = 0; - range.Last = bld_base->info->file_max[File]; - return range; -} - -/** - * For indirect registers, construct a pointer directly to the requested - * element using getelementptr if possible. - * - * Returns NULL if the insertelement/extractelement fallback for array access - * must be used. - */ -static LLVMValueRef -get_pointer_into_array(struct si_shader_context *ctx, - unsigned file, - unsigned swizzle, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - unsigned array_id; - struct tgsi_array_info *array; - LLVMValueRef idxs[2]; - LLVMValueRef index; - LLVMValueRef alloca; - - if (file != TGSI_FILE_TEMPORARY) - return NULL; - - array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect); - if (!array_id) - return NULL; - - alloca = ctx->temp_array_allocas[array_id - 1]; - if (!alloca) - return NULL; - - array = &ctx->temp_arrays[array_id - 1]; - - if (!(array->writemask & (1 << swizzle))) - return ctx->undef_alloca; - - index = si_get_indirect_index(ctx, reg_indirect, 1, - reg_index - ctx->temp_arrays[array_id - 1].range.First); - - /* Ensure that the index is within a valid range, to guard against - * VM faults and overwriting critical data (e.g. spilled resource - * descriptors). - * - * TODO It should be possible to avoid the additional instructions - * if LLVM is changed so that it guarantuees: - * 1. the scratch space descriptor isolates the current wave (this - * could even save the scratch offset SGPR at the cost of an - * additional SALU instruction) - * 2. the memory for allocas must be allocated at the _end_ of the - * scratch space (after spilled registers) - */ - index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1); - - index = ac_build_imad(&ctx->ac, index, - LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0), - LLVMConstInt(ctx->i32, - util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0)); - idxs[0] = ctx->i32_0; - idxs[1] = index; - return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, ""); -} - -LLVMValueRef -si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base, - LLVMTypeRef type, - LLVMValueRef ptr, - LLVMValueRef ptr2) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMValueRef values[2] = { - ac_to_integer(&ctx->ac, ptr), - ac_to_integer(&ctx->ac, ptr2), - }; - LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2); - return LLVMBuildBitCast(ctx->ac.builder, result, type, ""); -} - -static LLVMValueRef -emit_array_fetch(struct lp_build_tgsi_context *bld_base, - unsigned File, enum tgsi_opcode_type type, - struct tgsi_declaration_range range, - unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - unsigned i, size = range.Last - range.First + 1; - LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size); - LLVMValueRef result = LLVMGetUndef(vec); - unsigned swizzle = swizzle_in; - struct tgsi_full_src_register tmp_reg = {}; - tmp_reg.Register.File = File; - if (tgsi_type_is_64bit(type)) - swizzle |= (swizzle_in + 1) << 16; - - for (i = 0; i < size; ++i) { - tmp_reg.Register.Index = i + range.First; - - LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle); - result = LLVMBuildInsertElement(ctx->ac.builder, result, temp, - LLVMConstInt(ctx->i32, i, 0), "array_vector"); - } - return result; -} - -static LLVMValueRef -load_value_from_array(struct lp_build_tgsi_context *bld_base, - unsigned file, - enum tgsi_opcode_type type, - unsigned swizzle, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef ptr; - - ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect); - if (ptr) { - LLVMValueRef val = LLVMBuildLoad(builder, ptr, ""); - if (tgsi_type_is_64bit(type)) { - LLVMValueRef ptr_hi, val_hi; - ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, ""); - val_hi = LLVMBuildLoad(builder, ptr_hi, ""); - val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - val, val_hi); - } - - return val; - } else { - struct tgsi_declaration_range range = - get_array_range(bld_base, file, reg_index, reg_indirect); - LLVMValueRef index = - si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); - LLVMValueRef array = - emit_array_fetch(bld_base, file, type, range, swizzle); - return LLVMBuildExtractElement(builder, array, index, ""); - } -} - -static void -store_value_to_array(struct lp_build_tgsi_context *bld_base, - LLVMValueRef value, - unsigned file, - unsigned chan_index, - unsigned reg_index, - const struct tgsi_ind_register *reg_indirect) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef ptr; - - ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect); - if (ptr) { - LLVMBuildStore(builder, value, ptr); - } else { - unsigned i, size; - struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect); - LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First); - LLVMValueRef array = - emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index); - LLVMValueRef temp_ptr; - - array = LLVMBuildInsertElement(builder, array, value, index, ""); - - size = range.Last - range.First + 1; - for (i = 0; i < size; ++i) { - switch(file) { - case TGSI_FILE_OUTPUT: - temp_ptr = ctx->outputs[i + range.First][chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - if (range.First + i >= ctx->temps_count) - continue; - temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index]; - break; - - default: - continue; - } - value = LLVMBuildExtractElement(builder, array, - LLVMConstInt(ctx->i32, i, 0), ""); - LLVMBuildStore(builder, value, temp_ptr); - } - } -} - -/* If this is true, preload FS inputs at the beginning of shaders. Otherwise, - * reload them at each use. This must be true if the shader is using - * derivatives and KILL, because KILL can leave the WQM and then a lazy - * input load isn't in the WQM anymore. - */ -static bool si_preload_fs_inputs(struct si_shader_context *ctx) -{ - struct si_shader_selector *sel = ctx->shader->selector; - - return sel->info.uses_derivatives && - sel->info.uses_kill; -} - -static LLVMValueRef -get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index, - unsigned chan) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - - assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]); - return ctx->outputs[index][chan]; -} - -LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef result = NULL, ptr, ptr2; - unsigned swizzle = swizzle_in & 0xffff; - - if (swizzle_in == ~0) { - LLVMValueRef values[TGSI_NUM_CHANNELS]; - unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan); - } - return ac_build_gather_values(&ctx->ac, values, - TGSI_NUM_CHANNELS); - } - - if (reg->Register.Indirect) { - LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type, - swizzle, reg->Register.Index, ®->Indirect); - return bitcast(bld_base, type, load); - } - - switch(reg->Register.File) { - case TGSI_FILE_IMMEDIATE: { - LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); - if (tgsi_type_is_64bit(type)) { - result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2)); - result = LLVMConstInsertElement(result, - ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], - ctx->i32_0); - result = LLVMConstInsertElement(result, - ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)], - ctx->i32_1); - return LLVMConstBitCast(result, ctype); - } else { - return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype); - } - } - - case TGSI_FILE_INPUT: { - unsigned index = reg->Register.Index; - LLVMValueRef input[4]; - - /* I don't think doing this for vertex shaders is beneficial. - * For those, we want to make sure the VMEM loads are executed - * only once. Fragment shaders don't care much, because - * v_interp instructions are much cheaper than VMEM loads. - */ - if (!si_preload_fs_inputs(ctx) && - ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT) - ctx->load_input(ctx, index, &ctx->input_decls[index], input); - else - memcpy(input, &ctx->inputs[index * 4], sizeof(input)); - - result = input[swizzle]; - - if (tgsi_type_is_64bit(type)) { - ptr = result; - ptr2 = input[swizzle_in >> 16]; - return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - ptr, ptr2); - } - break; - } - - case TGSI_FILE_TEMPORARY: - if (reg->Register.Index >= ctx->temps_count) - return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); - ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; - if (tgsi_type_is_64bit(type)) { - ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)]; - return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - LLVMBuildLoad(builder, ptr, ""), - LLVMBuildLoad(builder, ptr2, "")); - } - result = LLVMBuildLoad(builder, ptr, ""); - break; - - case TGSI_FILE_OUTPUT: - ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle); - if (tgsi_type_is_64bit(type)) { - ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16)); - return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - LLVMBuildLoad(builder, ptr, ""), - LLVMBuildLoad(builder, ptr2, "")); - } - result = LLVMBuildLoad(builder, ptr, ""); - break; - - default: - return LLVMGetUndef(tgsi2llvmtype(bld_base, type)); - } - - return bitcast(bld_base, type, result); -} - -static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_src_register *reg, - enum tgsi_opcode_type type, - unsigned swizzle_in) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef cval = ctx->system_values[reg->Register.Index]; - unsigned swizzle = swizzle_in & 0xffff; - - if (tgsi_type_is_64bit(type)) { - LLVMValueRef lo, hi; - - assert(swizzle == 0 || swizzle == 2); - - lo = LLVMBuildExtractElement( - builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); - hi = LLVMBuildExtractElement( - builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), ""); - - return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type), - lo, hi); - } - - if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) { - cval = LLVMBuildExtractElement( - builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), ""); - } else { - assert(swizzle == 0); - } - - return bitcast(bld_base, type, cval); -} - -static void emit_declaration(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_declaration *decl) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - LLVMBuilderRef builder = ctx->ac.builder; - unsigned first, last, i; - switch(decl->Declaration.File) { - case TGSI_FILE_ADDRESS: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - ctx->addrs[idx][chan] = ac_build_alloca_undef( - &ctx->ac, ctx->i32, ""); - } - } - break; - } - - case TGSI_FILE_TEMPORARY: - { - char name[18] = ""; - LLVMValueRef array_alloca = NULL; - unsigned decl_size; - unsigned writemask = decl->Declaration.UsageMask; - first = decl->Range.First; - last = decl->Range.Last; - decl_size = 4 * ((last - first) + 1); - - if (decl->Declaration.Array) { - unsigned id = decl->Array.ArrayID - 1; - unsigned array_size; - - writemask &= ctx->temp_arrays[id].writemask; - ctx->temp_arrays[id].writemask = writemask; - array_size = ((last - first) + 1) * util_bitcount(writemask); - - /* If the array has more than 16 elements, store it - * in memory using an alloca that spans the entire - * array. - * - * Otherwise, store each array element individually. - * We will then generate vectors (per-channel, up to - * <16 x float> if the usagemask is a single bit) for - * indirect addressing. - * - * Note that 16 is the number of vector elements that - * LLVM will store in a register, so theoretically an - * array with up to 4 * 16 = 64 elements could be - * handled this way, but whether that's a good idea - * depends on VGPR register pressure elsewhere. - * - * FIXME: We shouldn't need to have the non-alloca - * code path for arrays. LLVM should be smart enough to - * promote allocas into registers when profitable. - */ - if (array_size > 16 || - !ctx->screen->llvm_has_working_vgpr_indexing) { - array_alloca = ac_build_alloca_undef(&ctx->ac, - LLVMArrayType(ctx->f32, - array_size), "array"); - ctx->temp_array_allocas[id] = array_alloca; - } - } - - if (!ctx->temps_count) { - ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1; - ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef)); - } - if (!array_alloca) { - for (i = 0; i < decl_size; ++i) { -#ifndef NDEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); -#endif - ctx->temps[first * TGSI_NUM_CHANNELS + i] = - ac_build_alloca_undef(&ctx->ac, - ctx->f32, - name); - } - } else { - LLVMValueRef idxs[2] = { - ctx->i32_0, - NULL - }; - unsigned j = 0; - - if (writemask != TGSI_WRITEMASK_XYZW && - !ctx->undef_alloca) { - /* Create a dummy alloca. We use it so that we - * have a pointer that is safe to load from if - * a shader ever reads from a channel that - * it never writes to. - */ - ctx->undef_alloca = ac_build_alloca_undef( - &ctx->ac, ctx->f32, "undef"); - } - - for (i = 0; i < decl_size; ++i) { - LLVMValueRef ptr; - if (writemask & (1 << (i % 4))) { -#ifndef NDEBUG - snprintf(name, sizeof(name), "TEMP%d.%c", - first + i / 4, "xyzw"[i % 4]); -#endif - idxs[1] = LLVMConstInt(ctx->i32, j, 0); - ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name); - j++; - } else { - ptr = ctx->undef_alloca; - } - ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr; - } - } - break; - } - case TGSI_FILE_INPUT: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - if (ctx->load_input && - ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) { - ctx->input_decls[idx] = *decl; - ctx->input_decls[idx].Range.First = idx; - ctx->input_decls[idx].Range.Last = idx; - ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First; - - if (si_preload_fs_inputs(ctx) || - bld_base->info->processor != PIPE_SHADER_FRAGMENT) - ctx->load_input(ctx, idx, &ctx->input_decls[idx], - &ctx->inputs[idx * 4]); - } - } - } - break; - - case TGSI_FILE_SYSTEM_VALUE: - { - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - si_load_system_value(ctx, idx, decl); - } - } - break; - - case TGSI_FILE_OUTPUT: - { - char name[16] = ""; - unsigned idx; - for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { - unsigned chan; - assert(idx < RADEON_LLVM_MAX_OUTPUTS); - if (ctx->outputs[idx][0]) - continue; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { -#ifndef NDEBUG - snprintf(name, sizeof(name), "OUT%d.%c", - idx, "xyzw"[chan % 4]); -#endif - ctx->outputs[idx][chan] = ac_build_alloca_undef( - &ctx->ac, ctx->f32, name); - } - } - break; - } - - case TGSI_FILE_MEMORY: - si_tgsi_declare_compute_memory(ctx, decl); - break; - - default: - break; - } -} - -void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_instruction *inst, - const struct tgsi_opcode_info *info, - unsigned index, - LLVMValueRef dst[4]) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - const struct tgsi_full_dst_register *reg = &inst->Dst[index]; - LLVMBuilderRef builder = ctx->ac.builder; - LLVMValueRef temp_ptr, temp_ptr2 = NULL; - bool is_vec_store = false; - enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); - - if (dst[0]) { - LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); - is_vec_store = (k == LLVMVectorTypeKind); - } - - if (is_vec_store) { - LLVMValueRef values[4] = {}; - uint32_t writemask = reg->Register.WriteMask; - while (writemask) { - unsigned chan = u_bit_scan(&writemask); - LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0); - values[chan] = LLVMBuildExtractElement(ctx->ac.builder, - dst[0], index, ""); - } - bld_base->emit_store(bld_base, inst, info, index, values); - return; - } - - uint32_t writemask = reg->Register.WriteMask; - while (writemask) { - unsigned chan_index = u_bit_scan(&writemask); - LLVMValueRef value = dst[chan_index]; - - if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) - continue; - if (inst->Instruction.Saturate) - value = ac_build_clamp(&ctx->ac, value); - - if (reg->Register.File == TGSI_FILE_ADDRESS) { - temp_ptr = ctx->addrs[reg->Register.Index][chan_index]; - LLVMBuildStore(builder, value, temp_ptr); - continue; - } - - if (!tgsi_type_is_64bit(dtype)) - value = ac_to_float(&ctx->ac, value); - - if (reg->Register.Indirect) { - unsigned file = reg->Register.File; - unsigned reg_index = reg->Register.Index; - store_value_to_array(bld_base, value, file, chan_index, - reg_index, ®->Indirect); - } else { - switch(reg->Register.File) { - case TGSI_FILE_OUTPUT: - temp_ptr = ctx->outputs[reg->Register.Index][chan_index]; - if (tgsi_type_is_64bit(dtype)) - temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1]; - break; - - case TGSI_FILE_TEMPORARY: - { - if (reg->Register.Index >= ctx->temps_count) - continue; - - temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; - if (tgsi_type_is_64bit(dtype)) - temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; - - break; - } - default: - return; - } - if (!tgsi_type_is_64bit(dtype)) - LLVMBuildStore(builder, value, temp_ptr); - else { - LLVMValueRef ptr = LLVMBuildBitCast(builder, value, - LLVMVectorType(ctx->i32, 2), ""); - LLVMValueRef val2; - value = LLVMBuildExtractElement(builder, ptr, - ctx->i32_0, ""); - val2 = LLVMBuildExtractElement(builder, ptr, - ctx->i32_1, ""); - - LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr); - LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2); - } - } - } -} - -static int get_line(int pc) -{ - /* Subtract 1 so that the number shown is that of the corresponding - * opcode in the TGSI dump, e.g. an if block has the same suffix as - * the instruction number of the corresponding TGSI IF. - */ - return pc - 1; -} - -static void bgnloop_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc)); -} - -static void brk_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_break(&ctx->ac); -} - -static void cont_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_continue(&ctx->ac); -} - -static void else_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_else(&ctx->ac, get_line(bld_base->pc)); -} - -static void endif_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_endif(&ctx->ac, get_line(bld_base->pc)); -} - -static void endloop_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_endloop(&ctx->ac, get_line(bld_base->pc)); -} - -static void if_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); -} - -static void uif_emit(const struct lp_build_tgsi_action *action, - struct lp_build_tgsi_context *bld_base, - struct lp_build_emit_data *emit_data) -{ - struct si_shader_context *ctx = si_shader_context(bld_base); - ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc)); -} - -static void emit_immediate(struct lp_build_tgsi_context *bld_base, - const struct tgsi_full_immediate *imm) -{ - unsigned i; - struct si_shader_context *ctx = si_shader_context(bld_base); - - for (i = 0; i < 4; ++i) { - ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] = - LLVMConstInt(ctx->i32, imm->u[i].Uint, false ); - } - - ctx->imms_num++; -} - -void si_llvm_context_init(struct si_shader_context *ctx, - struct si_screen *sscreen, - struct ac_llvm_compiler *compiler, - unsigned wave_size, - unsigned ballot_mask_bits) -{ - struct lp_type type; - - /* Initialize the gallivm object: - * We are only using the module, context, and builder fields of this struct. - * This should be enough for us to be able to pass our gallivm struct to the - * helper functions in the gallivm module. - */ - memset(ctx, 0, sizeof(*ctx)); - ctx->screen = sscreen; - ctx->compiler = compiler; - - ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class, - sscreen->info.family, - AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH, - wave_size, ballot_mask_bits); - - ctx->gallivm.context = ctx->ac.context; - ctx->gallivm.module = ctx->ac.module; - ctx->gallivm.builder = ctx->ac.builder; - - struct lp_build_tgsi_context *bld_base = &ctx->bld_base; - - type.floating = true; - type.fixed = false; - type.sign = true; - type.norm = false; - type.width = 32; - type.length = 1; - - lp_build_context_init(&bld_base->base, &ctx->gallivm, type); - lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); - lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); - type.width *= 2; - lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type); - lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type)); - lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type)); - - bld_base->soa = 1; - bld_base->emit_swizzle = emit_swizzle; - bld_base->emit_declaration = emit_declaration; - bld_base->emit_immediate = emit_immediate; - - bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; - bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit; - bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; - bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; - bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit; - bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; - bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; - bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; - - si_shader_context_init_alu(ctx); - si_shader_context_init_mem(ctx); - - ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context); - ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context); - ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context); - ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context); - ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context); - ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128); - ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context); - ctx->v2i32 = LLVMVectorType(ctx->i32, 2); - ctx->v4i32 = LLVMVectorType(ctx->i32, 4); - ctx->v4f32 = LLVMVectorType(ctx->f32, 4); - ctx->v8i32 = LLVMVectorType(ctx->i32, 8); - - ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0); - ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0); - ctx->i1false = LLVMConstInt(ctx->i1, 0, 0); - ctx->i1true = LLVMConstInt(ctx->i1, 1, 0); -} - -/* Set the context to a certain TGSI shader. Can be called repeatedly - * to change the shader. */ -void si_llvm_context_set_ir(struct si_shader_context *ctx, - struct si_shader *shader, - struct nir_shader *nir) -{ - struct si_shader_selector *sel = shader->selector; - const struct tgsi_shader_info *info = &sel->info; - - ctx->shader = shader; - ctx->type = sel->type; - ctx->bld_base.info = info; - - /* Clean up the old contents. */ - FREE(ctx->temp_arrays); - ctx->temp_arrays = NULL; - FREE(ctx->temp_array_allocas); - ctx->temp_array_allocas = NULL; - - FREE(ctx->imms); - ctx->imms = NULL; - ctx->imms_num = 0; - - FREE(ctx->temps); - ctx->temps = NULL; - ctx->temps_count = 0; - - ctx->num_const_buffers = util_last_bit(info->const_buffers_declared); - ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared); - - ctx->num_samplers = util_last_bit(info->samplers_declared); - ctx->num_images = util_last_bit(info->images_declared); - - if (nir) - return; - - if (info->array_max[TGSI_FILE_TEMPORARY] > 0) { - int size = info->array_max[TGSI_FILE_TEMPORARY]; - - ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0])); - ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0])); - - tgsi_scan_arrays(sel->tokens, TGSI_FILE_TEMPORARY, size, - ctx->temp_arrays); - } - if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) { - int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1; - ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef)); - } - - /* Re-set these to start with a clean slate. */ - ctx->bld_base.num_instructions = 0; - ctx->bld_base.pc = 0; - memset(ctx->input_decls, 0, sizeof(ctx->input_decls)); - memset(ctx->inputs, 0, sizeof(ctx->inputs)); - memset(ctx->outputs, 0, sizeof(ctx->outputs)); - - ctx->bld_base.emit_store = si_llvm_emit_store; - ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch; - ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch; - ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch; - ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch; - ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; -} - -void si_llvm_create_func(struct si_shader_context *ctx, - const char *name, - LLVMTypeRef *return_types, unsigned num_return_elems) -{ - LLVMTypeRef ret_type; - enum ac_llvm_calling_convention call_conv; - enum pipe_shader_type real_shader_type; - - if (num_return_elems) - ret_type = LLVMStructTypeInContext(ctx->ac.context, - return_types, - num_return_elems, true); - else - ret_type = ctx->voidt; - - real_shader_type = ctx->type; - - /* LS is merged into HS (TCS), and ES is merged into GS. */ - if (ctx->screen->info.chip_class >= GFX9) { - if (ctx->shader->key.as_ls) - real_shader_type = PIPE_SHADER_TESS_CTRL; - else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg) - real_shader_type = PIPE_SHADER_GEOMETRY; - } - - switch (real_shader_type) { - case PIPE_SHADER_VERTEX: - case PIPE_SHADER_TESS_EVAL: - call_conv = AC_LLVM_AMDGPU_VS; - break; - case PIPE_SHADER_TESS_CTRL: - call_conv = AC_LLVM_AMDGPU_HS; - break; - case PIPE_SHADER_GEOMETRY: - call_conv = AC_LLVM_AMDGPU_GS; - break; - case PIPE_SHADER_FRAGMENT: - call_conv = AC_LLVM_AMDGPU_PS; - break; - case PIPE_SHADER_COMPUTE: - call_conv = AC_LLVM_AMDGPU_CS; - break; - default: - unreachable("Unhandle shader type"); - } - - /* Setup the function */ - ctx->return_type = ret_type; - ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name, - ret_type, ctx->gallivm.module); -} - -void si_llvm_optimize_module(struct si_shader_context *ctx) -{ - /* Dump LLVM IR before any optimization passes */ - if (ctx->screen->debug_flags & DBG(PREOPT_IR) && - si_can_dump_shader(ctx->screen, ctx->type)) - LLVMDumpModule(ctx->gallivm.module); - - /* Run the pass */ - LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module); - LLVMDisposeBuilder(ctx->ac.builder); -} - -void si_llvm_dispose(struct si_shader_context *ctx) -{ - LLVMDisposeModule(ctx->gallivm.module); - LLVMContextDispose(ctx->gallivm.context); - FREE(ctx->temp_arrays); - ctx->temp_arrays = NULL; - FREE(ctx->temp_array_allocas); - ctx->temp_array_allocas = NULL; - FREE(ctx->temps); - ctx->temps = NULL; - ctx->temps_count = 0; - FREE(ctx->imms); - ctx->imms = NULL; - ctx->imms_num = 0; - ac_llvm_context_dispose(&ctx->ac); -} diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 941a397525e..bf4a22de13b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -27,7 +27,6 @@ #include "compiler/nir/nir_serialize.h" #include "nir/tgsi_to_nir.h" -#include "tgsi/tgsi_parse.h" #include "util/hash_table.h" #include "util/crc32.h" #include "util/u_async_debug.h" @@ -51,11 +50,7 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es, unsigned ir_size; void *ir_binary; - if (sel->tokens) { - ir_binary = sel->tokens; - ir_size = tgsi_num_tokens(sel->tokens) * - sizeof(struct tgsi_token); - } else if (sel->nir_binary) { + if (sel->nir_binary) { ir_binary = sel->nir_binary; ir_size = sel->nir_size; } else { @@ -2153,7 +2148,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, main_part->key.as_ngg = key->as_ngg; main_part->is_monolithic = false; - if (si_compile_tgsi_shader(sscreen, compiler_state->compiler, + if (si_compile_shader(sscreen, compiler_state->compiler, main_part, &compiler_state->debug) != 0) { FREE(main_part); return false; @@ -2516,7 +2511,7 @@ static void si_init_shader_selector_async(void *job, int thread_index) sel->type == PIPE_SHADER_GEOMETRY)) shader->key.as_ngg = 1; - if (sel->tokens || sel->nir) { + if (sel->nir) { si_get_ir_cache_key(sel, shader->key.as_ngg, shader->key.as_es, ir_sha1_cache_key); } @@ -2531,7 +2526,7 @@ static void si_init_shader_selector_async(void *job, int thread_index) simple_mtx_unlock(&sscreen->shader_cache_mutex); /* Compile the shader if it hasn't been loaded from the cache. */ - if (si_compile_tgsi_shader(sscreen, compiler, shader, + if (si_compile_shader(sscreen, compiler, shader, debug) != 0) { FREE(shader); fprintf(stderr, "radeonsi: can't compile a main shader part\n"); @@ -2695,44 +2690,17 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->so = state->stream_output; - if (state->type == PIPE_SHADER_IR_TGSI && - !sscreen->options.enable_nir) { - sel->tokens = tgsi_dup_tokens(state->tokens); - if (!sel->tokens) { - FREE(sel); - return NULL; - } - - tgsi_scan_shader(state->tokens, &sel->info); - tgsi_scan_tess_ctrl(state->tokens, &sel->info, &sel->tcs_info); - - /* Fixup for TGSI: Set which opcode uses which (i,j) pair. */ - if (sel->info.uses_persp_opcode_interp_centroid) - sel->info.uses_persp_centroid = true; - - if (sel->info.uses_linear_opcode_interp_centroid) - sel->info.uses_linear_centroid = true; - - if (sel->info.uses_persp_opcode_interp_offset || - sel->info.uses_persp_opcode_interp_sample) - sel->info.uses_persp_center = true; - - if (sel->info.uses_linear_opcode_interp_offset || - sel->info.uses_linear_opcode_interp_sample) - sel->info.uses_linear_center = true; + if (state->type == PIPE_SHADER_IR_TGSI) { + sel->nir = tgsi_to_nir(state->tokens, ctx->screen); } else { - if (state->type == PIPE_SHADER_IR_TGSI) { - sel->nir = tgsi_to_nir(state->tokens, ctx->screen); - } else { - assert(state->type == PIPE_SHADER_IR_NIR); - sel->nir = state->ir.nir; - } - - si_nir_scan_shader(sel->nir, &sel->info); - si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); - si_nir_adjust_driver_locations(sel->nir); + assert(state->type == PIPE_SHADER_IR_NIR); + sel->nir = state->ir.nir; } + si_nir_scan_shader(sel->nir, &sel->info); + si_nir_scan_tess_ctrl(sel->nir, &sel->tcs_info); + si_nir_adjust_driver_locations(sel->nir); + sel->type = sel->info.processor; p_atomic_inc(&sscreen->num_shaders_created); si_get_active_slot_masks(&sel->info, @@ -3304,7 +3272,6 @@ void si_destroy_shader_selector(struct si_context *sctx, util_queue_fence_destroy(&sel->ready); simple_mtx_destroy(&sel->mutex); - free(sel->tokens); ralloc_free(sel->nir); free(sel->nir_binary); free(sel); diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index ffec8770823..e5f333942b7 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -552,12 +552,6 @@ TODO: document the other workarounds. - - - - -- 2.30.2