From 625dcbbc45665459737c9d028f268fd6782472f3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 23 Mar 2018 11:20:24 +0100 Subject: [PATCH] amd/common: pass address components individually to ac_build_image_intrinsic MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This is in preparation for the new image intrinsics. Acked-by: Marek Olšák --- src/amd/common/ac_llvm_build.c | 101 ++++- src/amd/common/ac_llvm_build.h | 14 +- src/amd/common/ac_nir_to_llvm.c | 365 ++++++------------ .../auxiliary/gallivm/lp_bld_tgsi_action.h | 2 +- .../drivers/radeonsi/si_shader_tgsi_mem.c | 222 ++++------- 5 files changed, 295 insertions(+), 409 deletions(-) diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c index 77b07989430..22aac7cbdb7 100644 --- a/src/amd/common/ac_llvm_build.c +++ b/src/amd/common/ac_llvm_build.c @@ -37,6 +37,7 @@ #include "util/bitscan.h" #include "util/macros.h" #include "util/u_atomic.h" +#include "util/u_math.h" #include "sid.h" #include "shader_enums.h" @@ -1445,14 +1446,61 @@ void ac_build_export_null(struct ac_llvm_context *ctx) ac_build_export(ctx, &args); } +static unsigned ac_num_coords(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + return 1; + case ac_image_2d: + case ac_image_1darray: + return 2; + case ac_image_3d: + case ac_image_cube: + case ac_image_2darray: + case ac_image_2dmsaa: + return 3; + case ac_image_2darraymsaa: + return 4; + default: + unreachable("ac_num_coords: bad dim"); + } +} + +static unsigned ac_num_derivs(enum ac_image_dim dim) +{ + switch (dim) { + case ac_image_1d: + case ac_image_1darray: + return 2; + case ac_image_2d: + case ac_image_2darray: + case ac_image_cube: + return 4; + case ac_image_3d: + return 6; + case ac_image_2dmsaa: + case ac_image_2darraymsaa: + default: + unreachable("derivatives not supported"); + } +} + LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a) { - LLVMValueRef args[11]; - unsigned num_args = 0; + LLVMValueRef args[16]; const char *name = NULL; char intr_name[128], type[64]; + assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || + !a->level_zero); + assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) || + a->lod); + assert((a->bias ? 1 : 0) + + (a->lod ? 1 : 0) + + (a->level_zero ? 1 : 0) + + (a->derivs[0] ? 1 : 0) <= 1); + bool sample = a->opcode == ac_image_sample || a->opcode == ac_image_gather4 || a->opcode == ac_image_get_lod; @@ -1463,10 +1511,38 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, if (a->opcode == ac_image_get_lod) da = false; + unsigned num_coords = + a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0; + LLVMValueRef addr; + unsigned num_addr = 0; + + if (a->offset) + args[num_addr++] = ac_to_integer(ctx, a->offset); + if (a->bias) + args[num_addr++] = ac_to_integer(ctx, a->bias); + if (a->compare) + args[num_addr++] = ac_to_integer(ctx, a->compare); + if (a->derivs[0]) { + unsigned num_derivs = ac_num_derivs(a->dim); + for (unsigned i = 0; i < num_derivs; ++i) + args[num_addr++] = ac_to_integer(ctx, a->derivs[i]); + } + for (unsigned i = 0; i < num_coords; ++i) + args[num_addr++] = ac_to_integer(ctx, a->coords[i]); + if (a->lod) + args[num_addr++] = ac_to_integer(ctx, a->lod); + + unsigned pad_goal = util_next_power_of_two(num_addr); + while (num_addr < pad_goal) + args[num_addr++] = LLVMGetUndef(ctx->i32); + + addr = ac_build_gather_values(ctx, args, num_addr); + + unsigned num_args = 0; if (sample) - args[num_args++] = ac_to_float(ctx, a->addr); + args[num_args++] = ac_to_float(ctx, addr); else - args[num_args++] = a->addr; + args[num_args++] = ac_to_integer(ctx, addr); args[num_args++] = a->resource; if (sample) @@ -1505,12 +1581,15 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type, sizeof(type)); + bool lod_suffix = + a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4); + snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32", name, a->compare ? ".c" : "", a->bias ? ".b" : - a->lod ? ".l" : - a->deriv ? ".d" : + lod_suffix ? ".l" : + a->derivs[0] ? ".d" : a->level_zero ? ".lz" : "", a->offset ? ".o" : "", type); @@ -2481,12 +2560,10 @@ void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask, fmask_load.dmask = 0xf; fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d; - LLVMValueRef fmask_addr[4]; - memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3); - fmask_addr[3] = LLVMGetUndef(ac->i32); - - fmask_load.addr = ac_build_gather_values(ac, fmask_addr, - is_array_tex ? 4 : 2); + fmask_load.coords[0] = addr[0]; + fmask_load.coords[1] = addr[1]; + if (is_array_tex) + fmask_load.coords[2] = addr[2]; LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load); fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value, diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h index 328eddc9a77..1691a809381 100644 --- a/src/amd/common/ac_llvm_build.h +++ b/src/amd/common/ac_llvm_build.h @@ -331,18 +331,18 @@ enum ac_image_dim { struct ac_image_args { enum ac_image_opcode opcode; enum ac_image_dim dim; - bool level_zero; - bool bias; - bool lod; - bool deriv; - bool compare; - bool offset; LLVMValueRef resource; LLVMValueRef sampler; - LLVMValueRef addr; + LLVMValueRef offset; + LLVMValueRef bias; + LLVMValueRef compare; + LLVMValueRef derivs[6]; + LLVMValueRef coords[4]; + LLVMValueRef lod; // also used by ac_image_get_resinfo unsigned dmask; bool unorm; + bool level_zero; }; LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c index de3754d72be..a0e18379996 100644 --- a/src/amd/common/ac_nir_to_llvm.c +++ b/src/amd/common/ac_nir_to_llvm.c @@ -1152,12 +1152,9 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, const nir_tex_instr *instr) { enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type); - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; LLVMValueRef compare_cube_wa = NULL; LLVMValueRef result; - int c; - unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare; //TODO Rect { @@ -1166,11 +1163,11 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array); txq_args.opcode = ac_image_get_resinfo; txq_args.dmask = 0xf; - txq_args.addr = ctx->i32_0; + txq_args.lod = ctx->i32_0; txq_args.resource = args->resource; LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args); - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, false), ""); half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, ""); @@ -1180,19 +1177,14 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, } } - LLVMValueRef orig_coords = args->addr; + LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - tmp = LLVMBuildExtractElement(ctx->builder, coord, index, ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, ""); - tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); - tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, ""); - coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, ""); + tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, ""); + args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], ""); } - /* * Apparantly cube has issue with integer types that the workaround doesn't solve, * so this tests if the format is 8_8_8_8 and an integer type do an alternate @@ -1236,16 +1228,18 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, ""); /* don't modify the coordinates for this case */ - coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, ""); + for (unsigned c = 0; c < 2; ++c) + args->coords[c] = LLVMBuildSelect( + ctx->builder, compare_cube_wa, + orig_coords[c], args->coords[c], ""); } - args->addr = coord; result = ac_build_image_opcode(ctx, args); if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { LLVMValueRef tmp, tmp2; /* if the cube workaround is in place, f2i the result. */ - for (c = 0; c < 4; c++) { + for (unsigned c = 0; c < 4; c++) { tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), ""); if (stype == GLSL_TYPE_UINT) tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, ""); @@ -1263,7 +1257,6 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr, - bool lod_is_zero, struct ac_image_args *args) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { @@ -1272,14 +1265,14 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, if (ctx->abi->gfx9_stride_size_workaround) { return ac_build_buffer_load_format_gfx9_safe(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); } else { return ac_build_buffer_load_format(&ctx->ac, args->resource, - args->addr, + args->coords[0], ctx->ac.i32_0, util_last_bit(mask), false, true); @@ -1287,37 +1280,28 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, } args->opcode = ac_image_sample; - args->compare = instr->is_shadow; switch (instr->op) { case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_samples_identical: - args->opcode = lod_is_zero || + args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip; - args->compare = false; - args->offset = false; - break; - case nir_texop_txb: - args->bias = true; - break; - case nir_texop_txl: - if (lod_is_zero) - args->level_zero = true; - else - args->lod = true; + args->level_zero = false; break; case nir_texop_txs: case nir_texop_query_levels: args->opcode = ac_image_get_resinfo; + if (!args->lod) + args->lod = ctx->ac.i32_0; + args->level_zero = false; break; case nir_texop_tex: - if (ctx->stage != MESA_SHADER_FRAGMENT) + if (ctx->stage != MESA_SHADER_FRAGMENT) { + assert(!args->lod); args->level_zero = true; - break; - case nir_texop_txd: - args->deriv = true; + } break; case nir_texop_tg4: args->opcode = ac_image_gather4; @@ -1325,8 +1309,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, break; case nir_texop_lod: args->opcode = ac_image_get_lod; - args->compare = false; - args->offset = false; break; default: break; @@ -2081,23 +2063,18 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx, LLVMValueRef sample_index, LLVMValueRef fmask_desc_ptr) { - LLVMValueRef fmask_load_address[4]; + struct ac_image_args args = {0}; LLVMValueRef res; - fmask_load_address[0] = coord_x; - fmask_load_address[1] = coord_y; - if (coord_z) { - fmask_load_address[2] = coord_z; - fmask_load_address[3] = LLVMGetUndef(ctx->i32); - } - - struct ac_image_args args = {0}; + args.coords[0] = coord_x; + args.coords[1] = coord_y; + if (coord_z) + args.coords[2] = coord_z; args.opcode = ac_image_load; args.dim = coord_z ? ac_image_2darray : ac_image_2d; args.resource = fmask_desc_ptr; args.dmask = 0xf; - args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2); res = ac_build_image_opcode(ctx, &args); @@ -2447,7 +2424,7 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; return ac_build_image_opcode(&ctx->ac, &args); } @@ -2471,7 +2448,7 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, args.dmask = 0xf; args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false); args.opcode = ac_image_get_resinfo; - args.addr = ctx->ac.i32_0; + args.lod = ctx->ac.i32_0; res = ac_build_image_opcode(&ctx->ac, &args); @@ -3217,38 +3194,6 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, desc_type, image, write, bindless); } -static void set_tex_fetch_args(struct ac_llvm_context *ctx, - struct ac_image_args *args, - const nir_tex_instr *instr, - nir_texop op, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) -{ - unsigned is_rect = 0; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args->addr = ac_build_gather_values(ctx, param, count); - else - args->addr = param[0]; - - args->resource = res_ptr; - args->sampler = samp_ptr; - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) { - args->addr = param[0]; - return; - } - - args->dmask = dmask; - args->unorm = is_rect; - args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); -} - /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL. * * SI-CI: @@ -3313,43 +3258,41 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) { LLVMValueRef result = NULL; struct ac_image_args args = { 0 }; - unsigned dmask = 0xf; - LLVMValueRef address[16]; - LLVMValueRef coords[5]; - LLVMValueRef coord = NULL, lod = NULL, comparator = NULL; - LLVMValueRef bias = NULL, offsets = NULL; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL; + LLVMValueRef fmask_ptr = NULL, sample_index = NULL; LLVMValueRef ddx = NULL, ddy = NULL; - LLVMValueRef derivs[6]; - unsigned chan, count = 0; - unsigned const_src = 0, num_deriv_comp = 0; - bool lod_is_zero = false; + unsigned offset_src = 0; - tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr); for (unsigned i = 0; i < instr->num_srcs; i++) { switch (instr->src[i].src_type) { - case nir_tex_src_coord: - coord = get_src(ctx, instr->src[i].src); + case nir_tex_src_coord: { + LLVMValueRef coord = get_src(ctx, instr->src[i].src); + for (unsigned chan = 0; chan < instr->coord_components; ++chan) + args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); break; + } case nir_tex_src_projector: break; case nir_tex_src_comparator: - comparator = get_src(ctx, instr->src[i].src); + if (instr->is_shadow) + args.compare = get_src(ctx, instr->src[i].src); break; case nir_tex_src_offset: - offsets = get_src(ctx, instr->src[i].src); - const_src = i; + args.offset = get_src(ctx, instr->src[i].src); + offset_src = i; break; case nir_tex_src_bias: - bias = get_src(ctx, instr->src[i].src); + if (instr->op == nir_texop_txb) + args.bias = get_src(ctx, instr->src[i].src); break; case nir_tex_src_lod: { nir_const_value *val = nir_src_as_const_value(instr->src[i].src); if (val && val->i32[0] == 0) - lod_is_zero = true; - lod = get_src(ctx, instr->src[i].src); + args.level_zero = true; + else + args.lod = get_src(ctx, instr->src[i].src); break; } case nir_tex_src_ms_index: @@ -3359,7 +3302,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) break; case nir_tex_src_ddx: ddx = get_src(ctx, instr->src[i].src); - num_deriv_comp = instr->src[i].src.ssa->num_components; break; case nir_tex_src_ddy: ddy = get_src(ctx, instr->src[i].src); @@ -3373,13 +3315,13 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { - result = get_buffer_size(ctx, res_ptr, true); + result = get_buffer_size(ctx, args.resource, true); goto write_result; } if (instr->op == nir_texop_texture_samples) { LLVMValueRef res, samples, is_msaa; - res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, ""); + res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), ""); is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, @@ -3401,18 +3343,14 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) goto write_result; } - if (coord) - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan); - - if (offsets && instr->op != nir_texop_txf) { + if (args.offset && instr->op != nir_texop_txf) { LLVMValueRef offset[3], pack; - for (chan = 0; chan < 3; ++chan) + for (unsigned chan = 0; chan < 3; ++chan) offset[chan] = ctx->ac.i32_0; - args.offset = true; - for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) { - offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan); + unsigned num_components = ac_get_llvm_num_components(args.offset); + for (unsigned chan = 0; chan < num_components; chan++) { + offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan); offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), ""); if (chan) @@ -3421,31 +3359,18 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) } pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; - + args.offset = pack; } - /* pack LOD bias value */ - if (instr->op == nir_texop_txb && bias) { - address[count++] = bias; - } - - /* Pack depth comparison value */ - if (instr->is_shadow && comparator) { - LLVMValueRef z = ac_to_float(&ctx->ac, - ac_llvm_extract_elem(&ctx->ac, comparator, 0)); - - /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, - * so the depth comparison value isn't clamped for Z16 and - * Z24 anymore. Do it manually here. - * - * It's unnecessary if the original texture format was - * Z32_FLOAT, but we don't know that here. - */ - if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) - z = ac_build_clamp(&ctx->ac, z); - address[count++] = z; - } + /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT, + * so the depth comparison value isn't clamped for Z16 and + * Z24 anymore. Do it manually here. + * + * It's unnecessary if the original texture format was + * Z32_FLOAT, but we don't know that here. + */ + if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference) + args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare)); /* pack derivatives */ if (ddx || ddy) { @@ -3453,7 +3378,6 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) switch (instr->sampler_dim) { case GLSL_SAMPLER_DIM_3D: case GLSL_SAMPLER_DIM_CUBE: - num_deriv_comp = 3; num_src_deriv_channels = 3; num_dest_deriv_channels = 3; break; @@ -3461,121 +3385,76 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) default: num_src_deriv_channels = 2; num_dest_deriv_channels = 2; - num_deriv_comp = 2; break; case GLSL_SAMPLER_DIM_1D: num_src_deriv_channels = 1; if (ctx->ac.chip_class >= GFX9) { num_dest_deriv_channels = 2; - num_deriv_comp = 2; } else { num_dest_deriv_channels = 1; - num_deriv_comp = 1; } break; } for (unsigned i = 0; i < num_src_deriv_channels; i++) { - derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i)); - derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i)); + args.derivs[i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddx, i)); + args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, + ac_llvm_extract_elem(&ctx->ac, ddy, i)); } for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) { - derivs[i] = ctx->ac.f32_0; - derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; + args.derivs[i] = ctx->ac.f32_0; + args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0; } } - if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) { - for (chan = 0; chan < instr->coord_components; chan++) - coords[chan] = ac_to_float(&ctx->ac, coords[chan]); + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) { + for (unsigned chan = 0; chan < instr->coord_components; chan++) + args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]); if (instr->coord_components == 3) - coords[3] = LLVMGetUndef(ctx->ac.f32); + args.coords[3] = LLVMGetUndef(ctx->ac.f32); ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array, - instr->op == nir_texop_lod, coords, derivs); - if (num_deriv_comp) - num_deriv_comp--; + instr->op == nir_texop_lod, args.coords, args.derivs); } - if (ddx || ddy) { - for (unsigned i = 0; i < num_deriv_comp * 2; i++) - address[count++] = derivs[i]; - } - - /* Pack texture coordinates */ - if (coord) { - address[count++] = coords[0]; - if (instr->coord_components > 1) { - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) { - coords[1] = apply_round_slice(&ctx->ac, coords[1]); - } - address[count++] = coords[1]; - } - if (instr->coord_components > 2) { - if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D || - instr->sampler_dim == GLSL_SAMPLER_DIM_MS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || - instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && - instr->is_array && - instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { - coords[2] = apply_round_slice(&ctx->ac, coords[2]); - } - address[count++] = coords[2]; - } - - if (ctx->ac.chip_class >= GFX9) { - LLVMValueRef filler; - if (instr->op == nir_texop_txf) - filler = ctx->ac.i32_0; - else - filler = LLVMConstReal(ctx->ac.f32, 0.5); - - if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { - /* No nir_texop_lod, because it does not take a slice - * even with array textures. */ - if (instr->is_array && instr->op != nir_texop_lod ) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; - } else - address[count++] = filler; - } - } + /* Texture coordinates fixups */ + if (instr->coord_components > 2 && + (instr->sampler_dim == GLSL_SAMPLER_DIM_2D || + instr->sampler_dim == GLSL_SAMPLER_DIM_MS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS || + instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) && + instr->is_array && + instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) { + args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]); } - /* Pack LOD */ - if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) { - address[count++] = lod; - } else if (instr->op == nir_texop_txf_ms && sample_index) { - address[count++] = sample_index; - } else if(instr->op == nir_texop_txs) { - count = 0; - if (lod) - address[count++] = lod; + if (ctx->ac.chip_class >= GFX9 && + instr->sampler_dim == GLSL_SAMPLER_DIM_1D && + instr->op != nir_texop_lod) { + LLVMValueRef filler; + if (instr->op == nir_texop_txf) + filler = ctx->ac.i32_0; else - address[count++] = ctx->ac.i32_0; - } + filler = LLVMConstReal(ctx->ac.f32, 0.5); - for (chan = 0; chan < count; chan++) { - address[chan] = LLVMBuildBitCast(ctx->ac.builder, - address[chan], ctx->ac.i32, ""); + if (instr->is_array) + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } + /* Pack sample index */ + if (instr->op == nir_texop_txf_ms && sample_index) + args.coords[instr->coord_components] = sample_index; + if (instr->op == nir_texop_samples_identical) { - LLVMValueRef txf_address[4]; struct ac_image_args txf_args = { 0 }; - unsigned txf_count = count; - memcpy(txf_address, address, sizeof(txf_address)); - - if (!instr->is_array) - txf_address[2] = ctx->ac.i32_0; - txf_address[3] = ctx->ac.i32_0; - - set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf, - fmask_ptr, NULL, - txf_address, txf_count, 0xf); + memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords)); - result = build_tex_intrinsic(ctx, instr, false, &txf_args); + txf_args.dmask = 0xf; + txf_args.resource = fmask_ptr; + txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d; + result = build_tex_intrinsic(ctx, instr, &txf_args); result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, ""); result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0); @@ -3585,42 +3464,38 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr) if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS && instr->op != nir_texop_txs) { unsigned sample_chan = instr->is_array ? 3 : 2; - address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac, - address[0], - address[1], - instr->is_array ? address[2] : NULL, - address[sample_chan], - fmask_ptr); + args.coords[sample_chan] = adjust_sample_index_using_fmask( + &ctx->ac, args.coords[0], args.coords[1], + instr->is_array ? args.coords[2] : NULL, + args.coords[sample_chan], fmask_ptr); } - if (offsets && instr->op == nir_texop_txf) { + if (args.offset && instr->op == nir_texop_txf) { nir_const_value *const_offset = - nir_src_as_const_value(instr->src[const_src].src); - int num_offsets = instr->src[const_src].src.ssa->num_components; + nir_src_as_const_value(instr->src[offset_src].src); + int num_offsets = instr->src[offset_src].src.ssa->num_components; assert(const_offset); num_offsets = MIN2(num_offsets, instr->coord_components); - if (num_offsets > 2) - address[2] = LLVMBuildAdd(ctx->ac.builder, - address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), ""); - if (num_offsets > 1) - address[1] = LLVMBuildAdd(ctx->ac.builder, - address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), ""); - address[0] = LLVMBuildAdd(ctx->ac.builder, - address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), ""); - + for (unsigned i = 0; i < num_offsets; ++i) { + args.coords[i] = LLVMBuildAdd( + ctx->ac.builder, args.coords[i], + LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), ""); + } + args.offset = NULL; } /* TODO TG4 support */ + args.dmask = 0xf; if (instr->op == nir_texop_tg4) { if (instr->is_shadow) - dmask = 1; + args.dmask = 1; else - dmask = 1 << instr->component; + args.dmask = 1 << instr->component; } - set_tex_fetch_args(&ctx->ac, &args, instr, instr->op, - res_ptr, samp_ptr, address, count, dmask); - result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args); + if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) + args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array); + result = build_tex_intrinsic(ctx, instr, &args); if (instr->op == nir_texop_query_levels) result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), ""); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h index c92517fee28..d30f9da539e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h @@ -50,7 +50,7 @@ struct lp_build_emit_data { * args[0] = s0.x; * args[1] = s1.x; */ - LLVMValueRef args[12]; + LLVMValueRef args[18]; /** * Number of arguments in the args array. diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index a54db9e8596..1c653839aea 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1008,32 +1008,16 @@ static void atomic_emit( static void set_tex_fetch_args(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, - unsigned target, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) + struct ac_image_args *args, + unsigned target) { - struct ac_image_args args = {}; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args.addr = lp_build_gather_values(&ctx->gallivm, param, count); - else - args.addr = param[0]; - - args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - args.resource = res_ptr; - args.sampler = samp_ptr; - args.dmask = dmask; - args.unorm = target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT; + args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + args->unorm = target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT; /* Ugly, but we seem to have no other choice right now. */ - STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args)); - memcpy(emit_data->args, &args, sizeof(args)); + STATIC_ASSERT(sizeof(*args) <= sizeof(emit_data->args)); + memcpy(emit_data->args, args, sizeof(*args)); } static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, @@ -1083,7 +1067,7 @@ static void resq_fetch_args( &emit_data->args[0]); emit_data->arg_count = 1; } else { - LLVMValueRef res_ptr; + struct ac_image_args args = {}; unsigned image_target; if (inst->Memory.Texture == TGSI_TEXTURE_3D) @@ -1092,10 +1076,10 @@ static void resq_fetch_args( image_target = inst->Memory.Texture; image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, - &res_ptr); - set_tex_fetch_args(ctx, emit_data, image_target, - res_ptr, NULL, &ctx->i32_0, 1, - 0xf); + &args.resource); + args.lod = ctx->i32_0; + args.dmask = 0xf; + set_tex_fetch_args(ctx, emit_data, &args, image_target); } } @@ -1262,22 +1246,21 @@ static void txq_fetch_args( struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; unsigned target = inst->Texture.Texture; - LLVMValueRef res_ptr; - LLVMValueRef address; + struct ac_image_args args = {}; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL); if (target == TGSI_TEXTURE_BUFFER) { /* Read the size from the buffer descriptor directly. */ - emit_data->args[0] = get_buffer_size(bld_base, res_ptr); + emit_data->args[0] = get_buffer_size(bld_base, args.resource); return; } /* Textures - set the mip level. */ - address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.dmask = 0xf; - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - NULL, &address, 1, 0xf); + set_tex_fetch_args(ctx, emit_data, &args, target); } static void txq_emit(const struct lp_build_tgsi_action *action, @@ -1310,22 +1293,17 @@ static void tex_fetch_args( const struct tgsi_full_instruction *inst = emit_data->inst; unsigned opcode = inst->Instruction.Opcode; unsigned target = inst->Texture.Texture; - LLVMValueRef coords[5], derivs[6]; - LLVMValueRef address[16]; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); + struct ac_image_args args = {}; int ref_pos = tgsi_util_get_shadow_ref_src_index(target); - unsigned count = 0; unsigned chan; - unsigned num_deriv_channels = 0; bool has_offset = inst->Texture.NumOffsets > 0; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; - unsigned dmask = 0xf; + LLVMValueRef fmask_ptr = NULL; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr); if (target == TGSI_TEXTURE_BUFFER) { emit_data->dst_type = ctx->v4f32; - emit_data->args[0] = res_ptr; + emit_data->args[0] = args.resource; emit_data->args[1] = ctx->i32_0; emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); emit_data->arg_count = 3; @@ -1333,20 +1311,19 @@ static void tex_fetch_args( } /* Fetch and project texture coordinates */ - coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + args.coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, + args.coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); if (opcode == TGSI_OPCODE_TXP) - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, - coords[chan], - coords[3]); + args.coords[chan] = lp_build_emit_llvm_binary( + bld_base, TGSI_OPCODE_DIV, + args.coords[chan], args.coords[3]); } if (opcode == TGSI_OPCODE_TXP) - coords[3] = ctx->ac.f32_1; + args.coords[3] = ctx->ac.f32_1; /* Pack offsets. */ if (has_offset && @@ -1371,14 +1348,14 @@ static void tex_fetch_args( pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; + args.offset = pack; } /* Pack LOD bias value */ if (opcode == TGSI_OPCODE_TXB) - address[count++] = coords[3]; + args.bias = args.coords[3]; if (opcode == TGSI_OPCODE_TXB2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { @@ -1388,7 +1365,7 @@ static void tex_fetch_args( z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - z = coords[ref_pos]; + z = args.coords[ref_pos]; } /* Section 8.23.1 (Depth Texture Comparison Mode) of the @@ -1405,7 +1382,7 @@ static void tex_fetch_args( if (ctx->screen->info.chip_class >= VI) { LLVMValueRef upgraded; LLVMValueRef clamped; - upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, + upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, LLVMConstInt(ctx->i32, 3, false), ""); upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->i32, 29, false), ""); @@ -1414,7 +1391,7 @@ static void tex_fetch_args( z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); } - address[count++] = z; + args.compare = z; } /* Pack user derivatives */ @@ -1425,7 +1402,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_3D: num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 3; break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -1435,7 +1411,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: num_src_deriv_channels = 2; num_dst_deriv_channels = 2; - num_deriv_channels = 2; break; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: @@ -1444,7 +1419,6 @@ static void tex_fetch_args( /* Cube derivatives will be converted to 2D. */ num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 2; break; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1455,10 +1429,8 @@ static void tex_fetch_args( /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { num_dst_deriv_channels = 2; - num_deriv_channels = 2; } else { num_dst_deriv_channels = 1; - num_deriv_channels = 1; } break; default: @@ -1467,13 +1439,13 @@ static void tex_fetch_args( for (param = 0; param < 2; param++) { for (chan = 0; chan < num_src_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = lp_build_emit_fetch(bld_base, inst, param+1, chan); /* Fill in the rest with zeros. */ for (chan = num_src_deriv_channels; chan < num_dst_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = ctx->ac.f32_0; } } @@ -1487,28 +1459,17 @@ static void tex_fetch_args( target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, opcode == TGSI_OPCODE_LODQ, - coords, derivs); + args.coords, args.derivs); } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && ctx->screen->info.chip_class <= VI) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; - coords[array_coord] = + args.coords[array_coord] = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, - &coords[array_coord], 1, 0); + &args.coords[array_coord], 1, 0); } - if (opcode == TGSI_OPCODE_TXD) - for (int i = 0; i < num_deriv_channels * 2; i++) - address[count++] = derivs[i]; - - /* Pack texture coordinates */ - address[count++] = coords[0]; - if (num_coords > 1) - address[count++] = coords[1]; - if (num_coords > 2) - address[count++] = coords[2]; - /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { LLVMValueRef filler; @@ -1522,32 +1483,31 @@ static void tex_fetch_args( if (target == TGSI_TEXTURE_1D || target == TGSI_TEXTURE_SHADOW1D) { - address[count++] = filler; + args.coords[1] = filler; } else if (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } } /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) - address[count++] = coords[3]; + if (opcode == TGSI_OPCODE_TXL) + args.lod = args.coords[3]; else if (opcode == TGSI_OPCODE_TXL2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - - if (count > 16) { - assert(!"Cannot handle more than 16 texture address parameters"); - count = 16; + args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + else if (opcode == TGSI_OPCODE_TXF) { + if (target == TGSI_TEXTURE_2D_MSAA) { + /* No LOD, but move sample index into the right place. */ + args.coords[2] = args.coords[3]; + } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) { + args.lod = args.coords[3]; + } } - for (chan = 0; chan < count; chan++) - address[chan] = ac_to_integer(&ctx->ac, address[chan]); - if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, address, + ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords, target == TGSI_TEXTURE_2D_ARRAY_MSAA); } @@ -1562,7 +1522,7 @@ static void tex_fetch_args( switch (target) { case TGSI_TEXTURE_3D: - address[2] = lp_build_add(uint_bld, address[2], + args.coords[2] = lp_build_add(uint_bld, args.coords[2], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); /* fall through */ case TGSI_TEXTURE_2D: @@ -1571,16 +1531,16 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: - address[1] = - lp_build_add(uint_bld, address[1], + args.coords[1] = + lp_build_add(uint_bld, args.coords[1], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D_ARRAY: - address[0] = - lp_build_add(uint_bld, address[0], + args.coords[0] = + lp_build_add(uint_bld, args.coords[0], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); break; /* texture offsets do not apply to other texture targets */ @@ -1588,6 +1548,8 @@ static void tex_fetch_args( } } + args.dmask = 0xf; + if (opcode == TGSI_OPCODE_TG4) { unsigned gather_comp = 0; @@ -1611,11 +1573,10 @@ static void tex_fetch_args( gather_comp = CLAMP(gather_comp, 0, 3); } - dmask = 1 << gather_comp; + args.dmask = 1 << gather_comp; } - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - samp_ptr, address, count, dmask); + set_tex_fetch_args(ctx, emit_data, &args, target); } /* Gather4 should follow the same rules as bilinear filtering, but the hardware @@ -1641,14 +1602,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef wa_8888 = NULL; - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; - /* Texture coordinates start after: - * {offset, bias, z-compare, derivatives} - * Only the offset and z-compare can occur here. - */ - unsigned coord_vgpr_index = (int)args->offset + (int)args->compare; - int c; assert(return_type == TGSI_RETURN_TYPE_SINT || return_type == TGSI_RETURN_TYPE_UINT); @@ -1691,6 +1645,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); } else { struct tgsi_full_instruction txq_inst = {}; + struct ac_image_args txq_args = {}; struct lp_build_emit_data txq_emit_data = {}; struct lp_build_if_state if_ctx; @@ -1703,13 +1658,15 @@ si_lower_gather4_integer(struct si_shader_context *ctx, txq_inst.Texture.Texture = target; txq_emit_data.inst = &txq_inst; txq_emit_data.dst_type = ctx->v4i32; - set_tex_fetch_args(ctx, &txq_emit_data, target, - args->resource, NULL, &ctx->i32_0, - 1, 0xf); + txq_args.resource = args->resource; + txq_args.sampler = args->sampler; + txq_args.lod = ctx->ac.i32_0; + txq_args.dmask = 0xf; + set_tex_fetch_args(ctx, &txq_emit_data, &txq_args, target); txq_emit(NULL, &ctx->bld_base, &txq_emit_data); /* Compute -0.5 / size. */ - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(builder, txq_emit_data.output[0], LLVMConstInt(ctx->i32, c, 0), ""); @@ -1726,7 +1683,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, values, bb); @@ -1734,19 +1691,13 @@ si_lower_gather4_integer(struct si_shader_context *ctx, } } - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - - tmp = LLVMBuildExtractElement(builder, coord, index, ""); - tmp = ac_to_float(&ctx->ac, tmp); + tmp = ac_to_float(&ctx->ac, args->coords[c]); tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); - tmp = ac_to_integer(&ctx->ac, tmp); - coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); + args->coords[c] = ac_to_integer(&ctx->ac, tmp); } - args->addr = coord; - return wa_8888; } @@ -1811,8 +1762,6 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ args.opcode = ac_image_sample; - args.compare = tgsi_is_shadow_target(target); - args.offset = inst->Texture.NumOffsets > 0; switch (opcode) { case TGSI_OPCODE_TXF: @@ -1821,13 +1770,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA ? ac_image_load : ac_image_load_mip; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_LODQ: args.opcode = ac_image_get_lod; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TEX2: @@ -1841,14 +1786,11 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: assert(ctx->type == PIPE_SHADER_FRAGMENT); - args.bias = true; break; case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXL2: - args.lod = true; break; case TGSI_OPCODE_TXD: - args.deriv = true; break; case TGSI_OPCODE_TG4: args.opcode = ac_image_gather4; @@ -1897,7 +1839,6 @@ static void si_llvm_emit_txqs( tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); - /* Read the samples from the descriptor directly. */ res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1932,36 +1873,29 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, image = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); - LLVMValueRef addr[4]; unsigned chan = 0; - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); /* Get the current render target layer index. */ if (ctx->shader->key.mono.u.ps.fbfetch_layered) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) - addr[chan++] = si_get_sample_id(ctx); - - while (chan < 4) - addr[chan++] = LLVMGetUndef(ctx->i32); + args.coords[chan++] = si_get_sample_id(ctx); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) { fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); - ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false); + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, false); } - addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr)); - args.opcode = ac_image_load; args.resource = image; - args.addr = addr_vec; args.dmask = 0xf; if (ctx->shader->key.mono.u.ps.fbfetch_msaa) args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? -- 2.30.2