* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Tom Stellard <thomas.stellard@amd.com>
- * Michel Dänzer <michel.daenzer@amd.com>
- * Christian König <christian.koenig@amd.com>
*/
#include "gallivm/lp_bld_const.h"
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_exp_param.h"
+#include "ac_shader_util.h"
#include "si_shader_internal.h"
#include "si_pipe.h"
#include "sid.h"
*/
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14
-enum {
- CONST_ADDR_SPACE = 2,
- LOCAL_ADDR_SPACE = 3,
-};
+static bool llvm_type_is_64bit(struct si_shader_context *ctx,
+ LLVMTypeRef type)
+{
+ if (type == ctx->ac.i64 || type == ctx->ac.f64)
+ return true;
+
+ return false;
+}
static bool is_merged_shader(struct si_shader *shader)
{
- if (shader->selector->screen->b.chip_class <= VI)
+ if (shader->selector->screen->info.chip_class <= VI)
return false;
return shader->key.as_ls ||
/**
* Get the value of a shader input parameter and extract a bitfield.
*/
-static LLVMValueRef unpack_param(struct si_shader_context *ctx,
- unsigned param, unsigned rshift,
- unsigned bitwidth)
+static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx,
+ LLVMValueRef value, unsigned rshift,
+ unsigned bitwidth)
{
- LLVMValueRef value = LLVMGetParam(ctx->main_fn,
- param);
-
if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
value = ac_to_integer(&ctx->ac, value);
return value;
}
+static LLVMValueRef unpack_param(struct si_shader_context *ctx,
+ unsigned param, unsigned rshift,
+ unsigned bitwidth)
+{
+ LLVMValueRef value = LLVMGetParam(ctx->main_fn, param);
+
+ return unpack_llvm_param(ctx, value, rshift, bitwidth);
+}
+
static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
{
switch (ctx->type) {
case PIPE_SHADER_TESS_CTRL:
- return unpack_param(ctx, ctx->param_tcs_rel_ids, 0, 8);
+ return unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 0, 8);
case PIPE_SHADER_TESS_EVAL:
return LLVMGetParam(ctx->main_fn,
return LLVMConstInt(ctx->i32, stride * 4, 0);
case PIPE_SHADER_TESS_CTRL:
- if (ctx->screen->b.chip_class >= GFX9 &&
+ if (ctx->screen->info.chip_class >= GFX9 &&
ctx->shader->is_monolithic) {
stride = util_last_bit64(ctx->shader->key.part.tcs.ls->outputs_written);
return LLVMConstInt(ctx->i32, stride * 4, 0);
input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
vertex_index, voffset,
- true);
+ 4, true);
}
/* Break up the vec4 into individual components */
return LLVMGetParam(ctx->main_fn,
ctx->param_vs_prim_id);
case PIPE_SHADER_TESS_CTRL:
- return LLVMGetParam(ctx->main_fn,
- ctx->param_tcs_patch_id);
+ return ctx->abi.tcs_patch_id;
case PIPE_SHADER_TESS_EVAL:
- return LLVMGetParam(ctx->main_fn,
- ctx->param_tes_patch_id);
+ return ctx->abi.tes_patch_id;
case PIPE_SHADER_GEOMETRY:
- return LLVMGetParam(ctx->main_fn,
- ctx->param_gs_prim_id);
+ return ctx->abi.gs_prim_id;
default:
assert(0);
return ctx->i32_0;
return si_llvm_bound_index(ctx, result, num);
}
+static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context *ctx,
+ LLVMValueRef vertex_dw_stride,
+ LLVMValueRef base_addr,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned input_index,
+ ubyte *name,
+ ubyte *index,
+ bool is_patch)
+{
+ if (vertex_dw_stride) {
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, vertex_index,
+ vertex_dw_stride, ""), "");
+ }
+
+ if (param_index) {
+ base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMBuildMul(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->i32, 4, 0), ""), "");
+ }
+
+ int param = is_patch ?
+ si_shader_io_get_unique_index_patch(name[input_index],
+ index[input_index]) :
+ si_shader_io_get_unique_index(name[input_index],
+ index[input_index]);
+
+ /* Add the base address of the element. */
+ return LLVMBuildAdd(ctx->ac.builder, base_addr,
+ LLVMConstInt(ctx->i32, param * 4, 0), "");
+}
/**
* Calculate a dword address given an input or output register and a stride.
{
struct tgsi_shader_info *info = &ctx->shader->selector->info;
ubyte *name, *index, *array_first;
- int first, param;
+ int input_index;
struct tgsi_full_dst_register reg;
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef ind_index = NULL;
/* Set the register description. The address computation is the same
* for sources and destinations. */
/* If the register is 2-dimensional (e.g. an array of vertices
* in a primitive), calculate the base address of the vertex. */
if (reg.Register.Dimension) {
- LLVMValueRef index;
-
if (reg.Dimension.Indirect)
- index = si_get_indirect_index(ctx, ®.DimIndirect,
+ vertex_index = si_get_indirect_index(ctx, ®.DimIndirect,
1, reg.Dimension.Index);
else
- index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
-
- base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
- LLVMBuildMul(ctx->ac.builder, index,
- vertex_dw_stride, ""), "");
+ vertex_index = LLVMConstInt(ctx->i32, reg.Dimension.Index, 0);
}
/* Get information about the register. */
if (reg.Register.Indirect) {
/* Add the relative address of the element. */
- LLVMValueRef ind_index;
-
if (reg.Indirect.ArrayID)
- first = array_first[reg.Indirect.ArrayID];
+ input_index = array_first[reg.Indirect.ArrayID];
else
- first = reg.Register.Index;
+ input_index = reg.Register.Index;
ind_index = si_get_indirect_index(ctx, ®.Indirect,
- 1, reg.Register.Index - first);
-
- base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
- LLVMBuildMul(ctx->ac.builder, ind_index,
- LLVMConstInt(ctx->i32, 4, 0), ""), "");
-
- param = reg.Register.Dimension ?
- si_shader_io_get_unique_index(name[first], index[first]) :
- si_shader_io_get_unique_index_patch(name[first], index[first]);
+ 1, reg.Register.Index - input_index);
} else {
- param = reg.Register.Dimension ?
- si_shader_io_get_unique_index(name[reg.Register.Index],
- index[reg.Register.Index]) :
- si_shader_io_get_unique_index_patch(name[reg.Register.Index],
- index[reg.Register.Index]);
+ input_index = reg.Register.Index;
}
- /* Add the base address of the element. */
- return LLVMBuildAdd(ctx->ac.builder, base_addr,
- LLVMConstInt(ctx->i32, param * 4, 0), "");
+ return get_dw_address_from_generic_indices(ctx, vertex_dw_stride,
+ base_addr, vertex_index,
+ ind_index, input_index,
+ name, index,
+ !reg.Register.Dimension);
}
/* The offchip buffer layout for TCS->TES is
return base_addr;
}
+/* This is a generic helper that can be shared by the NIR and TGSI backends */
+static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
+ struct si_shader_context *ctx,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned param_base,
+ ubyte *name,
+ ubyte *index,
+ bool is_patch)
+{
+ unsigned param_index_base;
+
+ param_index_base = is_patch ?
+ si_shader_io_get_unique_index_patch(name[param_base], index[param_base]) :
+ si_shader_io_get_unique_index(name[param_base], index[param_base]);
+
+ if (param_index) {
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->i32, param_index_base, 0),
+ "");
+ } else {
+ param_index = LLVMConstInt(ctx->i32, param_index_base, 0);
+ }
+
+ return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
+ vertex_index, param_index);
+}
+
static LLVMValueRef get_tcs_tes_buffer_address_from_reg(
struct si_shader_context *ctx,
const struct tgsi_full_dst_register *dst,
struct tgsi_full_src_register reg;
LLVMValueRef vertex_index = NULL;
LLVMValueRef param_index = NULL;
- unsigned param_index_base, param_base;
+ unsigned param_base;
reg = src ? *src : tgsi_full_src_register_from_dst(dst);
} else {
param_base = reg.Register.Index;
- param_index = ctx->i32_0;
}
- param_index_base = reg.Register.Dimension ?
- si_shader_io_get_unique_index(name[param_base], index[param_base]) :
- si_shader_io_get_unique_index_patch(name[param_base], index[param_base]);
-
- param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
- LLVMConstInt(ctx->i32, param_index_base, 0),
- "");
-
- return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
- vertex_index, param_index);
+ return get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
+ param_index, param_base,
+ name, index, !reg.Register.Dimension);
}
static LLVMValueRef buffer_load(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type, unsigned swizzle,
+ LLVMTypeRef type, unsigned swizzle,
LLVMValueRef buffer, LLVMValueRef offset,
LLVMValueRef base, bool can_speculate)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef value, value2;
- LLVMTypeRef llvm_type = tgsi2llvmtype(bld_base, type);
- LLVMTypeRef vec_type = LLVMVectorType(llvm_type, 4);
+ LLVMTypeRef vec_type = LLVMVectorType(type, 4);
if (swizzle == ~0) {
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
}
- if (!tgsi_type_is_64bit(type)) {
+ if (!llvm_type_is_64bit(ctx, type)) {
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset,
0, 1, 0, can_speculate, false);
* \param dw_addr address in dwords
*/
static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
- enum tgsi_opcode_type type, unsigned swizzle,
+ LLVMTypeRef type, unsigned swizzle,
LLVMValueRef dw_addr)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
TGSI_NUM_CHANNELS);
}
+ /* Split 64-bit loads. */
+ if (llvm_type_is_64bit(ctx, type)) {
+ LLVMValueRef lo, hi;
+
+ lo = lds_load(bld_base, ctx->i32, swizzle, dw_addr);
+ hi = lds_load(bld_base, ctx->i32, swizzle + 1, dw_addr);
+ return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+ }
+
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
LLVMConstInt(ctx->i32, swizzle, 0));
- value = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
- if (tgsi_type_is_64bit(type)) {
- LLVMValueRef value2;
- dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
- ctx->i32_1);
- value2 = ac_build_load(&ctx->ac, ctx->lds, dw_addr);
- return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
- }
+ value = ac_lds_load(&ctx->ac, dw_addr);
- return bitcast(bld_base, type, value);
+ return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
}
/**
* \param dw_addr address in dwords
* \param value value to store
*/
-static void lds_store(struct lp_build_tgsi_context *bld_base,
+static void lds_store(struct si_shader_context *ctx,
unsigned dw_offset_imm, LLVMValueRef dw_addr,
LLVMValueRef value)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
- dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
+ dw_addr = lp_build_add(&ctx->bld_base.uint_bld, dw_addr,
LLVMConstInt(ctx->i32, dw_offset_imm, 0));
- value = ac_to_integer(&ctx->ac, value);
- ac_build_indexed_store(&ctx->ac, ctx->lds,
- dw_addr, value);
+ ac_lds_store(&ctx->ac, dw_addr, value);
}
static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,
dw_addr = get_tcs_in_current_patch_offset(ctx);
dw_addr = get_dw_address(ctx, NULL, reg, stride, dw_addr);
- return lds_load(bld_base, type, swizzle, dw_addr);
+ return lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
+}
+
+static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_input)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
+ LLVMValueRef dw_addr, stride;
+
+ driver_location = driver_location / 4;
+
+ if (load_input) {
+ stride = get_tcs_in_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_in_current_patch_offset(ctx);
+ } else {
+ if (is_patch) {
+ stride = NULL;
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ } else {
+ stride = get_tcs_out_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ }
+ }
+
+ if (param_index) {
+ /* Add the constant index to the indirect index */
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->i32, const_index, 0), "");
+ } else {
+ param_index = LLVMConstInt(ctx->i32, const_index, 0);
+ }
+
+ dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
+ vertex_index, param_index,
+ driver_location,
+ info->input_semantic_name,
+ info->input_semantic_index,
+ is_patch);
+
+ LLVMValueRef value[4];
+ for (unsigned i = 0; i < num_components + component; i++) {
+ value[i] = lds_load(bld_base, ctx->i32, i, dw_addr);
+ }
+
+ return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
static LLVMValueRef fetch_output_tcs(
dw_addr = get_dw_address(ctx, NULL, reg, NULL, dw_addr);
}
- return lds_load(bld_base, type, swizzle, dw_addr);
+ return lds_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle, dw_addr);
}
static LLVMValueRef fetch_input_tes(
base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
addr = get_tcs_tes_buffer_address_from_reg(ctx, NULL, reg);
- return buffer_load(bld_base, type, swizzle, buffer, base, addr, true);
+ return buffer_load(bld_base, tgsi2llvmtype(bld_base, type), swizzle,
+ buffer, base, addr, true);
+}
+
+LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ unsigned component,
+ unsigned num_components,
+ bool is_patch,
+ bool is_compact,
+ bool load_input)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ LLVMValueRef buffer, base, addr;
+
+ driver_location = driver_location / 4;
+
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
+
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+
+ if (param_index) {
+ /* Add the constant index to the indirect index */
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->i32, const_index, 0), "");
+ } else {
+ param_index = LLVMConstInt(ctx->i32, const_index, 0);
+ }
+
+ addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
+ param_index, driver_location,
+ info->input_semantic_name,
+ info->input_semantic_index,
+ is_patch);
+
+ /* TODO: This will generate rather ordinary llvm code, although it
+ * should be easy for the optimiser to fix up. In future we might want
+ * to refactor buffer_load(), but for now this maximises code sharing
+ * between the NIR and TGSI backends.
+ */
+ LLVMValueRef value[4];
+ for (unsigned i = component; i < num_components + component; i++) {
+ value[i] = buffer_load(&ctx->bld_base, ctx->i32, i, buffer, base, addr, true);
+ }
+
+ return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
/* Skip LDS stores if there is no LDS read of this output. */
if (!skip_lds_store)
- lds_store(bld_base, chan_index, dw_addr, value);
+ lds_store(ctx, chan_index, dw_addr, value);
value = ac_to_integer(&ctx->ac, value);
values[chan_index] = value;
}
}
-static LLVMValueRef fetch_input_gs(
- struct lp_build_tgsi_context *bld_base,
- const struct tgsi_full_src_register *reg,
- enum tgsi_opcode_type type,
- unsigned swizzle)
+static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
+ LLVMValueRef vertex_index,
+ LLVMValueRef param_index,
+ unsigned const_index,
+ unsigned location,
+ unsigned driver_location,
+ LLVMValueRef src,
+ unsigned component,
+ bool is_patch,
+ bool is_compact,
+ unsigned writemask)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ LLVMValueRef dw_addr, stride;
+ LLVMValueRef buffer, base, addr;
+ LLVMValueRef values[4];
+ bool skip_lds_store;
+ bool is_tess_factor = false, is_tess_inner = false;
+
+ driver_location = driver_location / 4;
+
+ if (param_index) {
+ /* Add the constant index to the indirect index */
+ param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
+ LLVMConstInt(ctx->i32, const_index, 0), "");
+ } else {
+ if (const_index != 0)
+ param_index = LLVMConstInt(ctx->i32, const_index, 0);
+ }
+
+ if (!is_patch) {
+ stride = get_tcs_out_vertex_dw_stride(ctx);
+ dw_addr = get_tcs_out_current_patch_offset(ctx);
+ dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr,
+ vertex_index, param_index,
+ driver_location,
+ info->output_semantic_name,
+ info->output_semantic_index,
+ is_patch);
+
+ skip_lds_store = !info->reads_pervertex_outputs;
+ } else {
+ dw_addr = get_tcs_out_current_patch_data_offset(ctx);
+ dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr,
+ vertex_index, param_index,
+ driver_location,
+ info->output_semantic_name,
+ info->output_semantic_index,
+ is_patch);
+
+ skip_lds_store = !info->reads_perpatch_outputs;
+
+ if (!param_index) {
+ int name = info->output_semantic_name[driver_location];
+
+ /* Always write tess factors into LDS for the TCS epilog. */
+ if (name == TGSI_SEMANTIC_TESSINNER ||
+ name == TGSI_SEMANTIC_TESSOUTER) {
+ /* The epilog doesn't read LDS if invocation 0 defines tess factors. */
+ skip_lds_store = !info->reads_tessfactor_outputs &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs;
+ is_tess_factor = true;
+ is_tess_inner = name == TGSI_SEMANTIC_TESSINNER;
+ }
+ }
+ }
+
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
+
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+
+ addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index,
+ param_index, driver_location,
+ info->output_semantic_name,
+ info->output_semantic_index,
+ is_patch);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ if (!(writemask & (1 << chan)))
+ continue;
+ LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
+
+ /* Skip LDS stores if there is no LDS read of this output. */
+ if (!skip_lds_store)
+ ac_lds_store(&ctx->ac, dw_addr, value);
+
+ value = ac_to_integer(&ctx->ac, value);
+ values[chan] = value;
+
+ if (writemask != 0xF && !is_tess_factor) {
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
+ addr, base,
+ 4 * chan, 1, 0, true, false);
+ }
+
+ /* Write tess factors into VGPRs for the epilog. */
+ if (is_tess_factor &&
+ ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {
+ if (!is_tess_inner) {
+ LLVMBuildStore(ctx->ac.builder, value, /* outer */
+ ctx->invoc0_tess_factors[chan]);
+ } else if (chan < 2) {
+ LLVMBuildStore(ctx->ac.builder, value, /* inner */
+ ctx->invoc0_tess_factors[4 + chan]);
+ }
+ }
+ }
+
+ if (writemask == 0xF && !is_tess_factor) {
+ LLVMValueRef value = lp_build_gather_values(&ctx->gallivm,
+ values, 4);
+ ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
+ base, 0, 1, 0, true, false);
+ }
+}
+
+LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
+ unsigned input_index,
+ unsigned vtx_offset_param,
+ LLVMTypeRef type,
+ unsigned swizzle)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
struct si_shader *shader = ctx->shader;
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
LLVMValueRef vtx_offset, soffset;
struct tgsi_shader_info *info = &shader->selector->info;
- unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
- unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
+ unsigned semantic_name = info->input_semantic_name[input_index];
+ unsigned semantic_index = info->input_semantic_index[input_index];
unsigned param;
LLVMValueRef value;
- if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
- return get_primitive_id(ctx, swizzle);
-
- if (!reg->Register.Dimension)
- return NULL;
-
param = si_shader_io_get_unique_index(semantic_name, semantic_index);
/* GFX9 has the ESGS ring in LDS. */
- if (ctx->screen->b.chip_class >= GFX9) {
- unsigned index = reg->Dimension.Index;
+ if (ctx->screen->info.chip_class >= GFX9) {
+ unsigned index = vtx_offset_param;
switch (index / 2) {
case 0:
LLVMValueRef values[TGSI_NUM_CHANNELS];
unsigned chan;
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
- values[chan] = fetch_input_gs(bld_base, reg, type, chan);
+ values[chan] = si_llvm_load_input_gs(abi, input_index, vtx_offset_param,
+ type, chan);
}
return lp_build_gather_values(&ctx->gallivm, values,
TGSI_NUM_CHANNELS);
}
/* Get the vertex offset parameter on GFX6. */
- unsigned vtx_offset_param = reg->Dimension.Index;
- if (vtx_offset_param < 2) {
- vtx_offset_param += ctx->param_gs_vtx0_offset;
- } else {
- assert(vtx_offset_param < 6);
- vtx_offset_param += ctx->param_gs_vtx2_offset - 2;
- }
- vtx_offset = lp_build_mul_imm(uint,
- LLVMGetParam(ctx->main_fn,
- vtx_offset_param),
- 4);
+ LLVMValueRef gs_vtx_offset = ctx->gs_vtx_offset[vtx_offset_param];
+
+ vtx_offset = lp_build_mul_imm(uint, gs_vtx_offset, 4);
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle) * 256, 0);
value = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1, ctx->i32_0,
vtx_offset, soffset, 0, 1, 0, true, false);
- if (tgsi_type_is_64bit(type)) {
+ if (llvm_type_is_64bit(ctx, type)) {
LLVMValueRef value2;
soffset = LLVMConstInt(ctx->i32, (param * 4 + swizzle + 1) * 256, 0);
value2 = ac_build_buffer_load(&ctx->ac, ctx->esgs_ring, 1,
ctx->i32_0, vtx_offset, soffset,
0, 1, 0, true, false);
- return si_llvm_emit_fetch_64bit(bld_base, type,
- value, value2);
+ return si_llvm_emit_fetch_64bit(bld_base, type, value, value2);
}
- return bitcast(bld_base, type, value);
+ return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
+}
+
+static LLVMValueRef fetch_input_gs(
+ struct lp_build_tgsi_context *bld_base,
+ const struct tgsi_full_src_register *reg,
+ enum tgsi_opcode_type type,
+ unsigned swizzle)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+
+ unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
+ if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
+ return get_primitive_id(ctx, swizzle);
+
+ if (!reg->Register.Dimension)
+ return NULL;
+
+ return si_llvm_load_input_gs(&ctx->abi, reg->Register.Index,
+ reg->Dimension.Index,
+ tgsi2llvmtype(bld_base, type),
+ swizzle);
}
static int lookup_interp_param_index(unsigned interpolate, unsigned location)
return lp_build_gather_values(&ctx->gallivm, pos, 4);
}
+static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi,
+ LLVMTypeRef type,
+ unsigned num_components)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct lp_build_context *bld = &ctx->bld_base.base;
+
+ LLVMValueRef coord[4] = {
+ LLVMGetParam(ctx->main_fn, ctx->param_tes_u),
+ LLVMGetParam(ctx->main_fn, ctx->param_tes_v),
+ ctx->ac.f32_0,
+ ctx->ac.f32_0
+ };
+
+ /* For triangles, the vector should be (u, v, 1-u-v). */
+ if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
+ PIPE_PRIM_TRIANGLES)
+ coord[2] = lp_build_sub(bld, ctx->ac.f32_1,
+ lp_build_add(bld, coord[0], coord[1]));
+
+ return lp_build_gather_values(&ctx->gallivm, coord, 4);
+}
+
+static LLVMValueRef load_tess_level(struct si_shader_context *ctx,
+ unsigned semantic_name)
+{
+ LLVMValueRef buffer, base, addr;
+
+ int param = si_shader_io_get_unique_index_patch(semantic_name, 0);
+
+ buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
+
+ base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
+ addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
+ LLVMConstInt(ctx->i32, param, 0));
+
+ return buffer_load(&ctx->bld_base, ctx->f32,
+ ~0, buffer, base, addr, true);
+
+}
+
+static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi,
+ unsigned varying_id)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ unsigned semantic_name;
+
+ switch (varying_id) {
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ semantic_name = TGSI_SEMANTIC_TESSINNER;
+ break;
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ semantic_name = TGSI_SEMANTIC_TESSOUTER;
+ break;
+ default:
+ unreachable("unknown tess level");
+ }
+
+ return load_tess_level(ctx, semantic_name);
+
+}
+
+static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ if (ctx->type == PIPE_SHADER_TESS_CTRL)
+ return unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
+ else if (ctx->type == PIPE_SHADER_TESS_EVAL)
+ return get_num_tcs_out_vertices(ctx);
+ else
+ unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
+}
+
void si_load_system_value(struct si_shader_context *ctx,
unsigned index,
const struct tgsi_full_declaration *decl)
{
- struct lp_build_context *bld = &ctx->bld_base.base;
LLVMValueRef value = 0;
assert(index < RADEON_LLVM_MAX_SYSTEM_VALUES);
case TGSI_SEMANTIC_INVOCATIONID:
if (ctx->type == PIPE_SHADER_TESS_CTRL)
- value = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
+ value = unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 8, 5);
else if (ctx->type == PIPE_SHADER_GEOMETRY)
- value = LLVMGetParam(ctx->main_fn,
- ctx->param_gs_instance_id);
+ value = ctx->abi.gs_invocation_id;
else
assert(!"INVOCATIONID not implemented");
break;
break;
case TGSI_SEMANTIC_TESSCOORD:
- {
- LLVMValueRef coord[4] = {
- LLVMGetParam(ctx->main_fn, ctx->param_tes_u),
- LLVMGetParam(ctx->main_fn, ctx->param_tes_v),
- ctx->ac.f32_0,
- ctx->ac.f32_0
- };
-
- /* For triangles, the vector should be (u, v, 1-u-v). */
- if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] ==
- PIPE_PRIM_TRIANGLES)
- coord[2] = lp_build_sub(bld, ctx->ac.f32_1,
- lp_build_add(bld, coord[0], coord[1]));
-
- value = lp_build_gather_values(&ctx->gallivm, coord, 4);
+ value = si_load_tess_coord(&ctx->abi, NULL, 4);
break;
- }
case TGSI_SEMANTIC_VERTICESIN:
- if (ctx->type == PIPE_SHADER_TESS_CTRL)
- value = unpack_param(ctx, ctx->param_tcs_out_lds_layout, 26, 6);
- else if (ctx->type == PIPE_SHADER_TESS_EVAL)
- value = get_num_tcs_out_vertices(ctx);
- else
- assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
+ value = si_load_patch_vertices_in(&ctx->abi);
break;
case TGSI_SEMANTIC_TESSINNER:
case TGSI_SEMANTIC_TESSOUTER:
- {
- LLVMValueRef buffer, base, addr;
- int param = si_shader_io_get_unique_index_patch(decl->Semantic.Name, 0);
-
- buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
-
- base = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
- addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
- LLVMConstInt(ctx->i32, param, 0));
-
- value = buffer_load(&ctx->bld_base, TGSI_TYPE_FLOAT,
- ~0, buffer, base, addr, true);
-
+ value = load_tess_level(ctx, decl->Semantic.Name);
break;
- }
case TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI:
case TGSI_SEMANTIC_DEFAULT_TESSINNER_SI:
{
struct si_shader_selector *sel = ctx->shader->selector;
- LLVMTypeRef i8p = LLVMPointerType(ctx->i8, LOCAL_ADDR_SPACE);
+ LLVMTypeRef i8p = LLVMPointerType(ctx->i8, AC_LOCAL_ADDR_SPACE);
LLVMValueRef var;
assert(decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED);
assert(decl->Range.First == decl->Range.Last);
- assert(!ctx->shared_memory);
+ assert(!ctx->ac.lds);
var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
LLVMArrayType(ctx->i8, sel->local_size),
"compute_lds",
- LOCAL_ADDR_SPACE);
+ AC_LOCAL_ADDR_SPACE);
LLVMSetAlignment(var, 4);
- ctx->shared_memory = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
+ ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
}
static LLVMValueRef load_const_buffer_desc(struct si_shader_context *ctx, int i)
lo = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle);
hi = fetch_constant(bld_base, reg, TGSI_TYPE_UNSIGNED, swizzle + 1);
- return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
+ return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
+ lo, hi);
}
idx = reg->Register.Index * 4 + swizzle;
* code reducing SIMD wave occupancy from 8 to 2 in many cases.
*
* Using s_buffer_load_dword (x1) seems to be the best option right now.
+ *
+ * LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
+ * a descriptor and s_buffer_load_dword using it, so we can't expand
+ * the pointer into a full descriptor like below. We have to use
+ * s_load_dword instead. The only case when LLVM 5.0 would select
+ * s_buffer_load_dword (that we have to prevent) is when we use use
+ * a literal offset where we don't need bounds checking.
*/
-#if 0 /* keep this codepath disabled */
- if (!reg->Register.Indirect) {
+ if (ctx->screen->info.chip_class == SI &&
+ HAVE_LLVM < 0x0600 &&
+ !reg->Register.Indirect) {
addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
return bitcast(bld_base, type, result);
}
-#endif
/* Do the bounds checking with a descriptor, because
* doing computation and manual bounds checking of 64-bit
}
/* Initialize arguments for the shader export intrinsic */
-static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_init_export_args(struct si_shader_context *ctx,
LLVMValueRef *values,
unsigned target,
struct ac_export_args *args)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *base = &bld_base->base;
+ LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef val[4];
unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR;
}
args->compr = false;
- args->out[0] = base->undef;
- args->out[1] = base->undef;
- args->out[2] = base->undef;
- args->out[3] = base->undef;
+ args->out[0] = f32undef;
+ args->out[1] = f32undef;
+ args->out[2] = f32undef;
+ args->out[3] = f32undef;
switch (spi_shader_col_format) {
case V_028714_SPI_SHADER_ZERO:
case V_028714_SPI_SHADER_SNORM16_ABGR:
for (chan = 0; chan < 4; chan++) {
/* Clamp between [-1, 1]. */
- val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MIN,
+ val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MIN,
values[chan],
LLVMConstReal(ctx->f32, 1));
- val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_MAX,
+ val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_MAX,
val[chan],
LLVMConstReal(ctx->f32, -1));
/* Convert to a signed integer in [-32767, 32767]. */
/* Clamp. */
for (chan = 0; chan < 4; chan++) {
val[chan] = ac_to_integer(&ctx->ac, values[chan]);
- val[chan] = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_UMIN,
+ val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base, TGSI_OPCODE_UMIN,
val[chan],
chan == 3 ? max_alpha : max_rgb);
}
/* Clamp. */
for (chan = 0; chan < 4; chan++) {
val[chan] = ac_to_integer(&ctx->ac, values[chan]);
- val[chan] = lp_build_emit_llvm_binary(bld_base,
+ val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
TGSI_OPCODE_IMIN,
val[chan], chan == 3 ? max_alpha : max_rgb);
- val[chan] = lp_build_emit_llvm_binary(bld_base,
+ val[chan] = lp_build_emit_llvm_binary(&ctx->bld_base,
TGSI_OPCODE_IMAX,
val[chan], chan == 3 ? min_alpha : min_rgb);
}
return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, "");
}
-static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_emit_clipvertex(struct si_shader_context *ctx,
struct ac_export_args *pos, LLVMValueRef *out_elts)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *base = &bld_base->base;
unsigned reg_index;
unsigned chan;
unsigned const_chan;
base_elt = buffer_load_const(ctx, const_resource,
addr);
args->out[chan] =
- lp_build_add(base, args->out[chan],
- lp_build_mul(base, base_elt,
+ lp_build_add(&ctx->bld_base.base, args->out[chan],
+ lp_build_mul(&ctx->bld_base.base, base_elt,
out_elts[const_chan]));
}
}
{
struct ac_export_args args;
- si_llvm_init_export_args(&ctx->bld_base, values,
+ si_llvm_init_export_args(ctx, values,
V_008DFC_SQ_EXP_PARAM + index, &args);
ac_build_export(&ctx->ac, &args);
}
}
/* Generate export instructions for hardware VS shader stage */
-static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
+static void si_llvm_export_vs(struct si_shader_context *ctx,
struct si_shader_output_values *outputs,
unsigned noutput)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
struct si_shader *shader = ctx->shader;
struct ac_export_args pos_args[4] = {};
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
for (i = 0; i < noutput; i++) {
switch (outputs[i].semantic_name) {
case TGSI_SEMANTIC_POSITION:
- si_llvm_init_export_args(bld_base, outputs[i].values,
+ si_llvm_init_export_args(ctx, outputs[i].values,
V_008DFC_SQ_EXP_POS, &pos_args[0]);
break;
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_CLIPDIST:
if (!shader->key.opt.clip_disable) {
unsigned index = 2 + outputs[i].semantic_index;
- si_llvm_init_export_args(bld_base, outputs[i].values,
+ si_llvm_init_export_args(ctx, outputs[i].values,
V_008DFC_SQ_EXP_POS + index,
&pos_args[index]);
}
break;
case TGSI_SEMANTIC_CLIPVERTEX:
if (!shader->key.opt.clip_disable) {
- si_llvm_emit_clipvertex(bld_base, pos_args,
+ si_llvm_emit_clipvertex(ctx, pos_args,
outputs[i].values);
}
break;
pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
}
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
/* GFX9 has the layer in out.z[10:0] and the viewport
* index in out.z[19:16].
*/
LLVMValueRef lds_vertex_stride, lds_vertex_offset, lds_base;
uint64_t inputs;
- invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
+ invocation_id = unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 8, 5);
buffer = desc_from_addr_base64k(ctx, ctx->param_tcs_offchip_addr_base64k);
buffer_offset = LLVMGetParam(ctx->main_fn, ctx->param_tcs_offchip_offset);
invocation_id,
LLVMConstInt(ctx->i32, i, 0));
- LLVMValueRef value = lds_load(bld_base, TGSI_TYPE_SIGNED, ~0,
+ LLVMValueRef value = lds_load(bld_base, ctx->ac.i32, ~0,
lds_ptr);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
for (i = 0; i < outer_comps; i++) {
outer[i] = out[i] =
- lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_outer);
+ lds_load(bld_base, ctx->ac.i32, i, lds_outer);
}
for (i = 0; i < inner_comps; i++) {
inner[i] = out[outer_comps+i] =
- lds_load(bld_base, TGSI_TYPE_SIGNED, i, lds_inner);
+ lds_load(bld_base, ctx->ac.i32, i, lds_inner);
}
}
/* Store the dynamic HS control word. */
offset = 0;
- if (ctx->screen->b.chip_class <= VI) {
+ if (ctx->screen->info.chip_class <= VI) {
ac_build_buffer_store_dword(&ctx->ac, buffer,
LLVMConstInt(ctx->i32, 0x80000000, 0),
1, ctx->i32_0, tf_base,
}
/* This only writes the tessellation factor levels. */
-static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
si_copy_tcs_inputs(bld_base);
rel_patch_id = get_rel_patch_id(ctx);
- invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
+ invocation_id = unpack_llvm_param(ctx, ctx->abi.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
LLVMBasicBlockRef blocks[2] = {
LLVMGetInsertBlock(builder),
ctx->merged_wrap_if_state.entry_block
LLVMValueRef ret = ctx->return_value;
unsigned vgpr;
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_layout,
8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
ret = si_insert_input_ret(ctx, ret, ctx->param_tcs_offchip_addr_base64k,
tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
/* Leave a hole corresponding to the two input VGPRs. This ensures that
- * the invocation_id output does not alias the param_tcs_rel_ids input,
+ * the invocation_id output does not alias the tcs_rel_ids input,
* which saves a V_MOV on gfx9.
*/
vgpr += 2;
8 + GFX9_SGPR_TCS_SAMPLERS_AND_IMAGES);
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
- ret = si_insert_input_ret_float(ctx, ret,
- ctx->param_tcs_patch_id, vgpr++);
- ret = si_insert_input_ret_float(ctx, ret,
- ctx->param_tcs_rel_ids, vgpr++);
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
+ ac_to_float(&ctx->ac, ctx->abi.tcs_patch_id),
+ vgpr++, "");
+ ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
+ ac_to_float(&ctx->ac, ctx->abi.tcs_rel_ids),
+ vgpr++, "");
ctx->return_value = ret;
}
ctx->return_value = ret;
}
-static void si_llvm_emit_ls_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader *shader = ctx->shader;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned i, chan;
/* Write outputs to LDS. The next shader (TCS aka HS) will read
* its inputs from it. */
for (i = 0; i < info->num_outputs; i++) {
- LLVMValueRef *out_ptr = ctx->outputs[i];
unsigned name = info->output_semantic_name[i];
unsigned index = info->output_semantic_index[i];
LLVMConstInt(ctx->i32, param * 4, 0), "");
for (chan = 0; chan < 4; chan++) {
- lds_store(bld_base, chan, dw_addr,
- LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], ""));
+ if (!(info->output_usagemask[i] & (1 << chan)))
+ continue;
+
+ lds_store(ctx, chan, dw_addr,
+ LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
}
}
- if (ctx->screen->b.chip_class >= GFX9)
+ if (ctx->screen->info.chip_class >= GFX9)
si_set_ls_return_value_for_tcs(ctx);
}
-static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
+static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader *es = ctx->shader;
struct tgsi_shader_info *info = &es->selector->info;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
unsigned chan;
int i;
- if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
+ if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
}
for (i = 0; i < info->num_outputs; i++) {
- LLVMValueRef *out_ptr = ctx->outputs[i];
int param;
if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
info->output_semantic_index[i]);
for (chan = 0; chan < 4; chan++) {
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
out_val = ac_to_integer(&ctx->ac, out_val);
/* GFX9 has the ESGS ring in LDS. */
- if (ctx->screen->b.chip_class >= GFX9) {
- lds_store(bld_base, param * 4 + chan, lds_base, out_val);
+ if (ctx->screen->info.chip_class >= GFX9) {
+ lds_store(ctx, param * 4 + chan, lds_base, out_val);
continue;
}
}
}
- if (ctx->screen->b.chip_class >= GFX9)
+ if (ctx->screen->info.chip_class >= GFX9)
si_set_es_return_value_for_gs(ctx);
}
static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
{
- if (ctx->screen->b.chip_class >= GFX9)
+ if (ctx->screen->info.chip_class >= GFX9)
return unpack_param(ctx, ctx->param_merged_wave_info, 16, 8);
else
return LLVMGetParam(ctx->main_fn, ctx->param_gs_wave_id);
}
-static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
+static void emit_gs_epilogue(struct si_shader_context *ctx)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
-
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
si_get_gs_wave_id(ctx));
- if (ctx->screen->b.chip_class >= GFX9)
+ if (ctx->screen->info.chip_class >= GFX9)
lp_build_endif(&ctx->merged_wrap_if_state);
}
+static void si_llvm_emit_gs_epilogue(struct ac_shader_abi *abi,
+ unsigned max_outputs,
+ LLVMValueRef *addrs)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info UNUSED *info = &ctx->shader->selector->info;
+
+ assert(info->num_outputs <= max_outputs);
+
+ emit_gs_epilogue(ctx);
+}
+
+static void si_tgsi_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ emit_gs_epilogue(ctx);
+}
+
static void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi,
unsigned max_outputs,
LLVMValueRef *addrs)
i++;
}
- si_llvm_export_vs(&ctx->bld_base, outputs, i);
+ si_llvm_export_vs(ctx, outputs, i);
FREE(outputs);
}
struct ac_export_args args[10];
};
-unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
- bool writes_samplemask)
-{
- if (writes_z) {
- /* Z needs 32 bits. */
- if (writes_samplemask)
- return V_028710_SPI_SHADER_32_ABGR;
- else if (writes_stencil)
- return V_028710_SPI_SHADER_32_GR;
- else
- return V_028710_SPI_SHADER_32_R;
- } else if (writes_stencil || writes_samplemask) {
- /* Both stencil and sample mask need only 16 bits. */
- return V_028710_SPI_SHADER_UINT16_ABGR;
- } else {
- return V_028710_SPI_SHADER_ZERO;
- }
-}
-
static void si_export_mrt_z(struct lp_build_tgsi_context *bld_base,
LLVMValueRef depth, LLVMValueRef stencil,
LLVMValueRef samplemask, struct si_ps_exports *exp)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *base = &bld_base->base;
struct ac_export_args args;
- unsigned mask = 0;
- unsigned format = si_get_spi_shader_z_format(depth != NULL,
- stencil != NULL,
- samplemask != NULL);
-
- assert(depth || stencil || samplemask);
-
- args.valid_mask = 1; /* whether the EXEC mask is valid */
- args.done = 1; /* DONE bit */
-
- /* Specify the target we are exporting */
- args.target = V_008DFC_SQ_EXP_MRTZ;
-
- args.compr = 0; /* COMP flag */
- args.out[0] = base->undef; /* R, depth */
- args.out[1] = base->undef; /* G, stencil test value[0:7], stencil op value[8:15] */
- args.out[2] = base->undef; /* B, sample mask */
- args.out[3] = base->undef; /* A, alpha to mask */
-
- if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
- assert(!depth);
- args.compr = 1; /* COMPR flag */
-
- if (stencil) {
- /* Stencil should be in X[23:16]. */
- stencil = ac_to_integer(&ctx->ac, stencil);
- stencil = LLVMBuildShl(ctx->ac.builder, stencil,
- LLVMConstInt(ctx->i32, 16, 0), "");
- args.out[0] = ac_to_float(&ctx->ac, stencil);
- mask |= 0x3;
- }
- if (samplemask) {
- /* SampleMask should be in Y[15:0]. */
- args.out[1] = samplemask;
- mask |= 0xc;
- }
- } else {
- if (depth) {
- args.out[0] = depth;
- mask |= 0x1;
- }
- if (stencil) {
- args.out[1] = stencil;
- mask |= 0x2;
- }
- if (samplemask) {
- args.out[2] = samplemask;
- mask |= 0x4;
- }
- }
-
- /* SI (except OLAND and HAINAN) has a bug that it only looks
- * at the X writemask component. */
- if (ctx->screen->b.chip_class == SI &&
- ctx->screen->b.family != CHIP_OLAND &&
- ctx->screen->b.family != CHIP_HAINAN)
- mask |= 0x1;
- /* Specify which components to enable */
- args.enabled_channels = mask;
+ ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
memcpy(&exp->args[exp->num++], &args, sizeof(args));
}
/* Get the export arguments, also find out what the last one is. */
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
- si_llvm_init_export_args(bld_base, color,
+ si_llvm_init_export_args(ctx, color,
V_008DFC_SQ_EXP_MRT + c, &args[c]);
if (args[c].enabled_channels)
last = c;
struct ac_export_args args;
/* Export */
- si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+ si_llvm_init_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + index,
&args);
if (is_last) {
args.valid_mask = 1; /* whether the EXEC mask is valid */
ctx->return_value = ret;
}
-void si_emit_waitcnt(struct si_shader_context *ctx, unsigned simm16)
-{
- LLVMValueRef args[1] = {
- LLVMConstInt(ctx->i32, simm16, 0)
- };
- lp_build_intrinsic(ctx->ac.builder, "llvm.amdgcn.s.waitcnt",
- ctx->voidt, args, 1, 0);
-}
-
static void membar_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
waitcnt &= LGKM_CNT;
if (waitcnt != NOOP_WAITCNT)
- si_emit_waitcnt(ctx, waitcnt);
+ ac_build_waitcnt(&ctx->ac, waitcnt);
}
static void clock_emit(
LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->i32_1, "");
}
-LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int num_elements)
-{
- return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
- CONST_ADDR_SPACE);
-}
-
static void si_llvm_emit_ddxy(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
}
/* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(
- const struct lp_build_tgsi_action *action,
- struct lp_build_tgsi_context *bld_base,
- struct lp_build_emit_data *emit_data)
+static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
+ unsigned stream,
+ LLVMValueRef *addrs)
{
- struct si_shader_context *ctx = si_shader_context(bld_base);
- struct lp_build_context *uint = &bld_base->uint_bld;
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
+ struct tgsi_shader_info *info = &ctx->shader->selector->info;
+ struct lp_build_context *uint = &ctx->bld_base.uint_bld;
struct si_shader *shader = ctx->shader;
- struct tgsi_shader_info *info = &shader->selector->info;
struct lp_build_if_state if_state;
LLVMValueRef soffset = LLVMGetParam(ctx->main_fn,
ctx->param_gs2vs_offset);
LLVMValueRef can_emit;
unsigned chan, offset;
int i;
- unsigned stream;
-
- stream = si_llvm_get_stream(bld_base, emit_data);
/* Write vertex attribute values to GSVS ring */
gs_next_vertex = LLVMBuildLoad(ctx->ac.builder,
offset = 0;
for (i = 0; i < info->num_outputs; i++) {
- LLVMValueRef *out_ptr = ctx->outputs[i];
-
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)) ||
((info->output_streams[i] >> (2 * chan)) & 3) != stream)
continue;
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[chan], "");
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
LLVMValueRef voffset =
LLVMConstInt(ctx->i32, offset *
shader->selector->gs_max_out_vertices, 0);
lp_build_endif(&if_state);
}
-/* Cut one primitive from the geometry shader */
-static void si_llvm_emit_primitive(
+/* Emit one vertex from the geometry shader */
+static void si_tgsi_emit_vertex(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
- unsigned stream;
+ unsigned stream = si_llvm_get_stream(bld_base, emit_data);
+
+ si_llvm_emit_vertex(&ctx->abi, stream, ctx->outputs[0]);
+}
+
+/* Cut one primitive from the geometry shader */
+static void si_llvm_emit_primitive(struct ac_shader_abi *abi,
+ unsigned stream)
+{
+ struct si_shader_context *ctx = si_shader_context_from_abi(abi);
/* Signal primitive cut */
- stream = si_llvm_get_stream(bld_base, emit_data);
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
si_get_gs_wave_id(ctx));
}
+/* Cut one primitive from the geometry shader */
+static void si_tgsi_emit_primitive(
+ const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+
+ si_llvm_emit_primitive(&ctx->abi, si_llvm_get_stream(bld_base, emit_data));
+}
+
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
* The real barrier instruction isn’t needed, because an entire patch
* always fits into a single wave.
*/
- if (ctx->screen->b.chip_class == SI &&
+ if (ctx->screen->info.chip_class == SI &&
ctx->type == PIPE_SHADER_TESS_CTRL) {
- si_emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
+ ac_build_waitcnt(&ctx->ac, LGKM_CNT & VM_CNT);
return;
}
LLVMValueRef P = LLVMGetParam(ctx->main_fn, i);
/* The combination of:
- * - ByVal
+ * - noalias
* - dereferenceable
* - invariant.load
* allows the optimization passes to move loads and reduces
* SGPR spilling significantly.
*/
+ lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
+
if (LLVMGetTypeKind(LLVMTypeOf(P)) == LLVMPointerTypeKind) {
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_BYVAL);
lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_NOALIAS);
ac_add_attr_dereferenceable(P, UINT64_MAX);
- } else
- lp_add_function_attr(ctx->main_fn, i + 1, LP_FUNC_ATTR_INREG);
+ }
}
for (i = 0; i < fninfo->num_params; ++i) {
"no-signed-zeros-fp-math",
"true");
- if (ctx->screen->b.debug_flags & DBG(UNSAFE_MATH)) {
+ if (ctx->screen->debug_flags & DBG(UNSAFE_MATH)) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
"less-precise-fpmad",
}
}
-static void declare_lds_as_pointer(struct si_shader_context *ctx)
-{
- unsigned lds_size = ctx->screen->b.chip_class >= CIK ? 65536 : 32768;
- ctx->lds = LLVMBuildIntToPtr(ctx->ac.builder, ctx->i32_0,
- LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
- "lds");
-}
-
static unsigned si_get_max_workgroup_size(const struct si_shader *shader)
{
switch (shader->selector->type) {
case PIPE_SHADER_TESS_CTRL:
/* Return this so that LLVM doesn't remove s_barrier
* instructions on chips where we use s_barrier. */
- return shader->selector->screen->b.chip_class >= CIK ? 128 : 64;
+ return shader->selector->screen->info.chip_class >= CIK ? 128 : 64;
case PIPE_SHADER_GEOMETRY:
- return shader->selector->screen->b.chip_class >= GFX9 ? 128 : 64;
+ return shader->selector->screen->info.chip_class >= GFX9 ? 128 : 64;
case PIPE_SHADER_COMPUTE:
break; /* see below */
unsigned const_and_shader_buffers =
add_arg(fninfo, ARG_SGPR,
- si_const_array(const_shader_buf_type, 0));
+ ac_array_in_const_addr_space(const_shader_buf_type));
unsigned samplers_and_images =
add_arg(fninfo, ARG_SGPR,
- si_const_array(ctx->v8i32,
- SI_NUM_IMAGES + SI_NUM_SAMPLERS * 2));
+ ac_array_in_const_addr_space(ctx->v8i32));
if (assign_params) {
ctx->param_const_and_shader_buffers = const_and_shader_buffers;
struct si_function_info *fninfo)
{
ctx->param_rw_buffers = add_arg(fninfo, ARG_SGPR,
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS));
+ ac_array_in_const_addr_space(ctx->v4i32));
ctx->param_bindless_samplers_and_images = add_arg(fninfo, ARG_SGPR,
- si_const_array(ctx->v8i32, 0));
+ ac_array_in_const_addr_space(ctx->v8i32));
}
static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx,
struct si_function_info *fninfo)
{
ctx->param_vertex_buffers = add_arg(fninfo, ARG_SGPR,
- si_const_array(ctx->v4i32, SI_NUM_VERTEX_BUFFERS));
+ ac_array_in_const_addr_space(ctx->v4i32));
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.base_vertex);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.start_instance);
add_arg_assign(fninfo, ARG_SGPR, ctx->i32, &ctx->abi.draw_id);
ctx->param_tes_u = add_arg(fninfo, ARG_VGPR, ctx->f32);
ctx->param_tes_v = add_arg(fninfo, ARG_VGPR, ctx->f32);
ctx->param_tes_rel_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
- ctx->param_tes_patch_id = add_arg(fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tes_patch_id);
}
enum {
si_init_function_info(&fninfo);
/* Set MERGED shaders. */
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
if (shader->key.as_ls || type == PIPE_SHADER_TESS_CTRL)
type = SI_SHADER_MERGED_VERTEX_TESSCTRL; /* LS or HS */
else if (shader->key.as_es || type == PIPE_SHADER_GEOMETRY)
declare_vs_specific_input_sgprs(ctx, &fninfo);
if (shader->key.as_es) {
- assert(!shader->selector->nir);
ctx->param_es2gs_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
} else if (shader->key.as_ls) {
- assert(!shader->selector->nir);
/* no extra parameters */
} else {
if (shader->is_gs_copy_shader) {
ctx->param_tcs_factor_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_rel_ids);
/* param_tcs_offchip_offset and param_tcs_factor_offset are
* placed after the user SGPRs.
ctx->type == PIPE_SHADER_TESS_CTRL);
/* VGPRs (first TCS, then VS) */
- ctx->param_tcs_patch_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_tcs_rel_ids = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_patch_id);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.tcs_rel_ids);
if (ctx->type == PIPE_SHADER_VERTEX) {
declare_vs_input_vgprs(ctx, &fninfo,
/* VGPRs (first GS, then VS/TES) */
ctx->param_gs_vtx01_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
ctx->param_gs_vtx23_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_prim_id);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_invocation_id);
ctx->param_gs_vtx45_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
if (ctx->type == PIPE_SHADER_VERTEX) {
ctx->param_gs_wave_id = add_arg(&fninfo, ARG_SGPR, ctx->i32);
/* VGPRs */
- ctx->param_gs_vtx0_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_vtx1_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_prim_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_vtx2_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_vtx3_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_vtx4_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_vtx5_offset = add_arg(&fninfo, ARG_VGPR, ctx->i32);
- ctx->param_gs_instance_id = add_arg(&fninfo, ARG_VGPR, ctx->i32);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[0]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[1]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_prim_id);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[2]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[3]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[4]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->gs_vtx_offset[5]);
+ add_arg_assign(&fninfo, ARG_VGPR, ctx->i32, &ctx->abi.gs_invocation_id);
break;
case PIPE_SHADER_FRAGMENT:
if (shader->key.as_ls ||
ctx->type == PIPE_SHADER_TESS_CTRL ||
/* GFX9 has the ESGS ring buffer in LDS. */
- (ctx->screen->b.chip_class >= GFX9 &&
- (shader->key.as_es ||
- ctx->type == PIPE_SHADER_GEOMETRY)))
- declare_lds_as_pointer(ctx);
+ type == SI_SHADER_MERGED_VERTEX_OR_TESSEVAL_GEOMETRY)
+ ac_declare_lds_as_pointer(&ctx->ac);
}
/**
LLVMValueRef buf_ptr = LLVMGetParam(ctx->main_fn,
ctx->param_rw_buffers);
- if (ctx->screen->b.chip_class <= VI &&
+ if (ctx->screen->info.chip_class <= VI &&
(ctx->shader->key.as_es || ctx->type == PIPE_SHADER_GEOMETRY)) {
unsigned ring =
ctx->type == PIPE_SHADER_GEOMETRY ? SI_GS_RING_ESGS
r600_resource_reference(&shader->bo, NULL);
shader->bo = (struct r600_resource*)
- pipe_buffer_create(&sscreen->b.b, 0,
- PIPE_USAGE_IMMUTABLE,
- align(bo_size, SI_CPDMA_ALIGNMENT));
+ si_aligned_buffer_create(&sscreen->b,
+ sscreen->cpdma_prefetch_writes_memory ?
+ 0 : R600_RESOURCE_FLAG_READ_ONLY,
+ PIPE_USAGE_IMMUTABLE,
+ align(bo_size, SI_CPDMA_ALIGNMENT),
+ 256);
if (!shader->bo)
return -ENOMEM;
/* Upload. */
- ptr = sscreen->b.ws->buffer_map(shader->bo->buf, NULL,
+ ptr = sscreen->ws->buffer_map(shader->bo->buf, NULL,
PIPE_TRANSFER_READ_WRITE |
PIPE_TRANSFER_UNSYNCHRONIZED);
else if (mainb->rodata_size > 0)
memcpy(ptr, mainb->rodata, mainb->rodata_size);
- sscreen->b.ws->buffer_unmap(shader->bo->buf);
+ sscreen->ws->buffer_unmap(shader->bo->buf);
return 0;
}
const struct si_shader_config *conf = &shader->config;
unsigned num_inputs = shader->selector ? shader->selector->info.num_inputs : 0;
unsigned code_size = si_get_shader_binary_size(shader);
- unsigned lds_increment = sscreen->b.chip_class >= CIK ? 512 : 256;
+ unsigned lds_increment = sscreen->info.chip_class >= CIK ? 512 : 256;
unsigned lds_per_wave = 0;
unsigned max_simd_waves;
- switch (sscreen->b.family) {
+ switch (sscreen->info.family) {
/* These always have 8 waves: */
case CHIP_POLARIS10:
case CHIP_POLARIS11:
/* Compute the per-SIMD wave counts. */
if (conf->num_sgprs) {
- if (sscreen->b.chip_class >= VI)
+ if (sscreen->info.chip_class >= VI)
max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
else
max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
if (!check_debug_option ||
- si_can_dump_shader(&sscreen->b, processor)) {
+ si_can_dump_shader(sscreen, processor)) {
if (processor == PIPE_SHADER_FRAGMENT) {
fprintf(file, "*** SHADER CONFIG ***\n"
"SPI_PS_INPUT_ADDR = 0x%04x\n"
FILE *file, bool check_debug_option)
{
if (!check_debug_option ||
- si_can_dump_shader(&sscreen->b, processor))
+ si_can_dump_shader(sscreen, processor))
si_dump_shader_key(processor, shader, file);
if (!check_debug_option && shader->binary.llvm_ir_string) {
}
if (!check_debug_option ||
- (si_can_dump_shader(&sscreen->b, processor) &&
- !(sscreen->b.debug_flags & DBG(NO_ASM)))) {
+ (si_can_dump_shader(sscreen, processor) &&
+ !(sscreen->debug_flags & DBG(NO_ASM)))) {
fprintf(file, "\n%s:\n", si_get_shader_name(shader, processor));
if (shader->prolog)
const char *name)
{
int r = 0;
- unsigned count = p_atomic_inc_return(&sscreen->b.num_compilations);
+ unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
- if (si_can_dump_shader(&sscreen->b, processor)) {
+ if (si_can_dump_shader(sscreen, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
- if (!(sscreen->b.debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
+ if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
fprintf(stderr, "%s LLVM IR:\n\n", name);
ac_dump_module(mod);
fprintf(stderr, "\n");
return NULL;
}
+ /* We can leave the fence as permanently signaled because the GS copy
+ * shader only becomes visible globally after it has been compiled. */
+ util_queue_fence_init(&shader->ready);
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
}
if (stream == 0)
- si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
+ si_llvm_export_vs(&ctx, outputs, gsinfo->num_outputs);
LLVMBuildBr(builder, end_bb);
}
debug, PIPE_SHADER_GEOMETRY,
"GS Copy Shader");
if (!r) {
- if (si_can_dump_shader(&sscreen->b, PIPE_SHADER_GEOMETRY))
+ if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx.shader, debug,
PIPE_SHADER_GEOMETRY, stderr, true);
break;
case PIPE_SHADER_TESS_CTRL:
- if (shader->selector->screen->b.chip_class >= GFX9) {
+ if (shader->selector->screen->info.chip_class >= GFX9) {
si_dump_shader_key_vs(key, &key->part.tcs.ls_prolog,
"part.tcs.ls_prolog", f);
}
if (shader->is_gs_copy_shader)
break;
- if (shader->selector->screen->b.chip_class >= GFX9 &&
+ if (shader->selector->screen->info.chip_class >= GFX9 &&
key->part.gs.es->type == PIPE_SHADER_VERTEX) {
si_dump_shader_key_vs(key, &key->part.gs.vs_prolog,
"part.gs.vs_prolog", f);
bld_base->op_actions[TGSI_OPCODE_READ_INVOC].fetch_args = read_invoc_fetch_args;
bld_base->op_actions[TGSI_OPCODE_READ_INVOC].emit = read_lane_emit;
- bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
- bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
+ bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_tgsi_emit_vertex;
+ bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_tgsi_emit_primitive;
bld_base->op_actions[TGSI_OPCODE_BARRIER].emit = si_llvm_emit_barrier;
}
}
}
-static void si_init_exec_full_mask(struct si_shader_context *ctx)
-{
- LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
- lp_build_intrinsic(ctx->ac.builder,
- "llvm.amdgcn.init.exec", ctx->voidt,
- &full_mask, 1, LP_FUNC_ATTR_CONVERGENT);
-}
-
static void si_init_exec_from_input(struct si_shader_context *ctx,
unsigned param, unsigned bitoffset)
{
case PIPE_SHADER_VERTEX:
ctx->load_input = declare_input_vs;
if (shader->key.as_ls)
- bld_base->emit_epilogue = si_llvm_emit_ls_epilogue;
+ ctx->abi.emit_outputs = si_llvm_emit_ls_epilogue;
else if (shader->key.as_es)
- bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else {
+ ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
+ else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
- }
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_CTRL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tcs;
+ ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = fetch_output_tcs;
bld_base->emit_store = store_output_tcs;
- bld_base->emit_epilogue = si_llvm_emit_tcs_epilogue;
+ ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
+ ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
+ ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_TESS_EVAL:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_tes;
+ ctx->abi.load_tess_varyings = si_nir_load_input_tes;
+ ctx->abi.load_tess_coord = si_load_tess_coord;
+ ctx->abi.load_tess_level = si_load_tess_level;
+ ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
if (shader->key.as_es)
- bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
- else {
+ ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
+ else
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
- bld_base->emit_epilogue = si_tgsi_emit_epilogue;
- }
+ bld_base->emit_epilogue = si_tgsi_emit_epilogue;
break;
case PIPE_SHADER_GEOMETRY:
bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
- bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
+ ctx->abi.load_inputs = si_nir_load_input_gs;
+ ctx->abi.emit_vertex = si_llvm_emit_vertex;
+ ctx->abi.emit_primitive = si_llvm_emit_primitive;
+ ctx->abi.emit_outputs = si_llvm_emit_gs_epilogue;
+ bld_base->emit_epilogue = si_tgsi_emit_gs_epilogue;
break;
case PIPE_SHADER_FRAGMENT:
ctx->load_input = declare_input_fs;
* For monolithic merged shaders, the first shader is wrapped in an
* if-block together with its prolog in si_build_wrapper_function.
*/
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
if (!is_monolithic &&
sel->info.num_instructions > 1 && /* not empty shader */
(shader->key.as_es || shader->key.as_ls) &&
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
ctx->type == PIPE_SHADER_GEOMETRY) {
if (!is_monolithic)
- si_init_exec_full_mask(ctx);
+ ac_init_exec_full_mask(&ctx->ac);
/* The barrier must execute for all shaders in a
* threadgroup.
key->vs_prolog.num_input_sgprs = num_input_sgprs;
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
key->vs_prolog.as_ls = shader_out->key.as_ls;
+ key->vs_prolog.as_es = shader_out->key.as_es;
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
key->vs_prolog.as_ls = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 2;
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
+ key->vs_prolog.as_es = 1;
key->vs_prolog.num_merged_next_stage_vgprs = 5;
}
si_init_function_info(&fninfo);
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
num_sgprs = 8 + GFX9_GS_NUM_USER_SGPR;
num_vgprs = 5; /* ES inputs are not needed by GS */
} else {
* with registers here. The main shader part will set the correct EXEC
* mask.
*/
- if (ctx->screen->b.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
- si_init_exec_full_mask(ctx);
+ if (ctx->screen->info.chip_class >= GFX9 && !key->gs_prolog.is_monolithic)
+ ac_init_exec_full_mask(&ctx->ac);
/* Copy inputs to outputs. This should be no-op, as the registers match,
* but it will prevent the compiler from overwriting them unintentionally.
LLVMValueRef vtx_in[6], vtx_out[6];
LLVMValueRef prim_id, rotate;
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
for (unsigned i = 0; i < 3; i++) {
vtx_in[i*2] = unpack_param(ctx, gfx9_vtx_params[i], 0, 16);
vtx_in[i*2+1] = unpack_param(ctx, gfx9_vtx_params[i], 16, 16);
vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, "");
}
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
for (unsigned i = 0; i < 3; i++) {
LLVMValueRef hi, out;
si_get_max_workgroup_size(ctx->shader));
if (is_merged_shader(ctx->shader))
- si_init_exec_full_mask(ctx);
+ ac_init_exec_full_mask(&ctx->ac);
/* Record the arguments of the function as if they were an output of
* a previous part.
param_size = ac_get_type_size(param_type) / 4;
is_sgpr = ac_is_sgpr_param(param);
- if (is_sgpr) {
-#if HAVE_LLVM < 0x0400
- LLVMRemoveAttribute(param, LLVMByValAttribute);
-#else
- unsigned kind_id = LLVMGetEnumAttributeKindForName("byval", 5);
- LLVMRemoveEnumAttributeAtIndex(parts[part], param_idx + 1, kind_id);
-#endif
+ if (is_sgpr)
lp_add_function_attr(parts[part], param_idx + 1, LP_FUNC_ATTR_INREG);
- }
assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
assert(is_sgpr || out_idx >= num_out_sgpr);
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
- if (si_can_dump_shader(&sscreen->b, sel->info.processor) &&
- !(sscreen->b.debug_flags & DBG(NO_TGSI))) {
+ if (si_can_dump_shader(sscreen, sel->info.processor) &&
+ !(sscreen->debug_flags & DBG(NO_TGSI))) {
if (sel->tokens)
tgsi_dump(sel->tokens, 0);
else
si_build_wrapper_function(&ctx, parts + !need_prolog,
1 + need_prolog, need_prolog, 0);
} else if (is_monolithic && ctx.type == PIPE_SHADER_TESS_CTRL) {
- if (sscreen->b.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX9) {
struct si_shader_selector *ls = shader->key.part.tcs.ls;
LLVMValueRef parts[4];
bool vs_needs_prolog =
si_build_wrapper_function(&ctx, parts, 2, 0, 0);
}
} else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
- if (ctx.screen->b.chip_class >= GFX9) {
+ if (ctx.screen->info.chip_class >= GFX9) {
struct si_shader_selector *es = shader->key.part.gs.es;
LLVMValueRef es_prolog = NULL;
LLVMValueRef es_main = NULL;
union si_shader_part_key vs_prolog_key;
si_get_vs_prolog_key(&es->info,
shader->info.num_input_sgprs,
- &shader->key.part.tcs.ls_prolog,
+ &shader->key.part.gs.vs_prolog,
shader, &vs_prolog_key);
vs_prolog_key.vs_prolog.is_monolithic = true;
si_build_vs_prolog_function(&ctx, &vs_prolog_key);
si_optimize_vs_outputs(&ctx);
if ((debug && debug->debug_message) ||
- si_can_dump_shader(&sscreen->b, ctx.type))
+ si_can_dump_shader(sscreen, ctx.type))
si_count_scratch_private_memory(&ctx);
/* Compile to bytecode. */
if (sel->type == PIPE_SHADER_COMPUTE) {
unsigned wave_size = 64;
unsigned max_vgprs = 256;
- unsigned max_sgprs = sscreen->b.chip_class >= VI ? 800 : 512;
+ unsigned max_sgprs = sscreen->info.chip_class >= VI ? 800 : 512;
unsigned max_sgprs_per_wave = 128;
unsigned max_block_threads = si_get_max_workgroup_size(shader);
unsigned min_waves_per_cu = DIV_ROUND_UP(max_block_threads, wave_size);
switch (type) {
case PIPE_SHADER_VERTEX:
+ shader.key.as_ls = key->vs_prolog.as_ls;
+ shader.key.as_es = key->vs_prolog.as_es;
break;
case PIPE_SHADER_TESS_CTRL:
assert(!prolog);
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
{
LLVMValueRef ptr[2], list;
+ bool is_merged_shader =
+ ctx->screen->info.chip_class >= GFX9 &&
+ (ctx->type == PIPE_SHADER_TESS_CTRL ||
+ ctx->type == PIPE_SHADER_GEOMETRY ||
+ ctx->shader->key.as_ls || ctx->shader->key.as_es);
/* Get the pointer to rw buffers. */
- ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
- ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
+ ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
+ ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI);
list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
- si_const_array(ctx->v4i32, SI_NUM_RW_BUFFERS), "");
+ ac_array_in_const_addr_space(ctx->v4i32), "");
return list;
}
si_init_exec_from_input(ctx, 3, 0);
if (key->vs_prolog.as_ls &&
- (ctx->screen->b.family == CHIP_VEGA10 ||
- ctx->screen->b.family == CHIP_RAVEN)) {
+ ctx->screen->has_ls_vgpr_init_bug) {
/* If there are no HS threads, SPI loads the LS VGPRs
* starting at VGPR 0. Shift them back to where they
* belong.
si_init_function_info(&fninfo);
- if (ctx->screen->b.chip_class >= GFX9) {
+ if (ctx->screen->info.chip_class >= GFX9) {
add_arg(&fninfo, ARG_SGPR, ctx->i64);
ctx->param_tcs_offchip_offset = add_arg(&fninfo, ARG_SGPR, ctx->i32);
add_arg(&fninfo, ARG_SGPR, ctx->i32); /* wave info */
/* Create the function. */
si_create_function(ctx, "tcs_epilog", NULL, 0, &fninfo,
- ctx->screen->b.chip_class >= CIK ? 128 : 64);
- declare_lds_as_pointer(ctx);
+ ctx->screen->info.chip_class >= CIK ? 128 : 64);
+ ac_declare_lds_as_pointer(&ctx->ac);
func = ctx->main_fn;
LLVMValueRef invoc0_tess_factors[6];
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- if (sscreen->b.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX9) {
struct si_shader *ls_main_part =
shader->key.part.tcs.ls->main_shader_part_ls;
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- if (sscreen->b.chip_class >= GFX9) {
+ if (sscreen->info.chip_class >= GFX9) {
struct si_shader *es_main_part =
shader->key.part.gs.es->main_shader_part_es;
* Make sure we have at least 4k of LDS in use to avoid the bug.
* It applies to workgroup sizes of more than one wavefront.
*/
- if (sscreen->b.family == CHIP_BONAIRE ||
- sscreen->b.family == CHIP_KABINI ||
- sscreen->b.family == CHIP_MULLINS)
+ if (sscreen->info.family == CHIP_BONAIRE ||
+ sscreen->info.family == CHIP_KABINI ||
+ sscreen->info.family == CHIP_MULLINS)
*lds_size = MAX2(*lds_size, 8);
}
r600_resource_reference(&shader->bo, NULL);
if (!shader->is_binary_shared)
- si_radeon_shader_binary_clean(&shader->binary);
+ ac_shader_binary_clean(&shader->binary);
free(shader->shader_log);
}