src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c

   1 /*
   2  * Copyright 2017 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "si_shader_internal.h"
  26 #include "si_pipe.h"
  27 #include "sid.h"
  28 #include "tgsi/tgsi_build.h"
  29 #include "tgsi/tgsi_util.h"
  30 #include "ac_llvm_util.h"
  31
  32 static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
  33                            struct lp_build_emit_data *emit_data,
  34                            LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
  35                            LLVMValueRef *fmask_ptr);
  36
  37 /**
  38  * Given a v8i32 resource descriptor for a buffer, extract the size of the
  39  * buffer in number of elements and return it as an i32.
  40  */
  41 static LLVMValueRef get_buffer_size(
  42         struct lp_build_tgsi_context *bld_base,
  43         LLVMValueRef descriptor)
  44 {
  45         struct si_shader_context *ctx = si_shader_context(bld_base);
  46         LLVMBuilderRef builder = ctx->ac.builder;
  47         LLVMValueRef size =
  48                 LLVMBuildExtractElement(builder, descriptor,
  49                                         LLVMConstInt(ctx->i32, 2, 0), "");
  50
  51         if (ctx->screen->info.chip_class == VI) {
  52                 /* On VI, the descriptor contains the size in bytes,
  53                  * but TXQ must return the size in elements.
  54                  * The stride is always non-zero for resources using TXQ.
  55                  */
  56                 LLVMValueRef stride =
  57                         LLVMBuildExtractElement(builder, descriptor,
  58                                                 ctx->i32_1, "");
  59                 stride = LLVMBuildLShr(builder, stride,
  60                                        LLVMConstInt(ctx->i32, 16, 0), "");
  61                 stride = LLVMBuildAnd(builder, stride,
  62                                       LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
  63
  64                 size = LLVMBuildUDiv(builder, size, stride, "");
  65         }
  66
  67         return size;
  68 }
  69
  70 static LLVMValueRef
  71 shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
  72                          const struct tgsi_full_src_register *reg,
  73                          bool ubo)
  74 {
  75         LLVMValueRef index;
  76
  77         if (!reg->Register.Indirect) {
  78                 index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
  79         } else {
  80                 index = si_get_indirect_index(ctx, &reg->Indirect,
  81                                               1, reg->Register.Index);
  82         }
  83
  84         if (ubo)
  85                 return ctx->abi.load_ubo(&ctx->abi, index);
  86         else
  87                 return ctx->abi.load_ssbo(&ctx->abi, index, false);
  88 }
  89
  90 static enum ac_image_dim
  91 ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
  92 {
  93         switch (target) {
  94         case TGSI_TEXTURE_1D:
  95         case TGSI_TEXTURE_SHADOW1D:
  96                 if (screen->info.chip_class >= GFX9)
  97                         return ac_image_2d;
  98                 return ac_image_1d;
  99         case TGSI_TEXTURE_2D:
 100         case TGSI_TEXTURE_SHADOW2D:
 101         case TGSI_TEXTURE_RECT:
 102         case TGSI_TEXTURE_SHADOWRECT:
 103                 return ac_image_2d;
 104         case TGSI_TEXTURE_3D:
 105                 return ac_image_3d;
 106         case TGSI_TEXTURE_CUBE:
 107         case TGSI_TEXTURE_SHADOWCUBE:
 108         case TGSI_TEXTURE_CUBE_ARRAY:
 109         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
 110                 return ac_image_cube;
 111         case TGSI_TEXTURE_1D_ARRAY:
 112         case TGSI_TEXTURE_SHADOW1D_ARRAY:
 113                 if (screen->info.chip_class >= GFX9)
 114                         return ac_image_2darray;
 115                 return ac_image_1darray;
 116         case TGSI_TEXTURE_2D_ARRAY:
 117         case TGSI_TEXTURE_SHADOW2D_ARRAY:
 118                 return ac_image_2darray;
 119         case TGSI_TEXTURE_2D_MSAA:
 120                 return ac_image_2dmsaa;
 121         case TGSI_TEXTURE_2D_ARRAY_MSAA:
 122                 return ac_image_2darraymsaa;
 123         default:
 124                 unreachable("unhandled texture type");
 125         }
 126 }
 127
 128 static enum ac_image_dim
 129 ac_image_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
 130 {
 131         enum ac_image_dim dim = ac_texture_dim_from_tgsi_target(screen, target);
 132
 133         /* Match the resource type set in the descriptor. */
 134         if (dim == ac_image_cube ||
 135             (screen->info.chip_class <= VI && dim == ac_image_3d))
 136                 dim = ac_image_2darray;
 137         else if (target == TGSI_TEXTURE_2D && screen->info.chip_class >= GFX9) {
 138                 /* When a single layer of a 3D texture is bound, the shader
 139                  * will refer to a 2D target, but the descriptor has a 3D type.
 140                  * Since the HW ignores BASE_ARRAY in this case, we need to
 141                  * send 3 coordinates. This doesn't hurt when the underlying
 142                  * texture is non-3D.
 143                  */
 144                 dim = ac_image_3d;
 145         }
 146
 147         return dim;
 148 }
 149
 150 /**
 151  * Given a 256-bit resource descriptor, force the DCC enable bit to off.
 152  *
 153  * At least on Tonga, executing image stores on images with DCC enabled and
 154  * non-trivial can eventually lead to lockups. This can occur when an
 155  * application binds an image as read-only but then uses a shader that writes
 156  * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
 157  * program termination) in this case, but it doesn't cost much to be a bit
 158  * nicer: disabling DCC in the shader still leads to undefined results but
 159  * avoids the lockup.
 160  */
 161 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
 162                                   LLVMValueRef rsrc)
 163 {
 164         if (ctx->screen->info.chip_class <= CIK) {
 165                 return rsrc;
 166         } else {
 167                 LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
 168                 LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
 169                 LLVMValueRef tmp;
 170
 171                 tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
 172                 tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
 173                 return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
 174         }
 175 }
 176
 177 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
 178                                 LLVMValueRef list, LLVMValueRef index,
 179                                 enum ac_descriptor_type desc_type, bool dcc_off)
 180 {
 181         LLVMBuilderRef builder = ctx->ac.builder;
 182         LLVMValueRef rsrc;
 183
 184         if (desc_type == AC_DESC_BUFFER) {
 185                 index = LLVMBuildMul(builder, index,
 186                                      LLVMConstInt(ctx->i32, 2, 0), "");
 187                 index = LLVMBuildAdd(builder, index,
 188                                      ctx->i32_1, "");
 189                 list = LLVMBuildPointerCast(builder, list,
 190                                             ac_array_in_const32_addr_space(ctx->v4i32), "");
 191         } else {
 192                 assert(desc_type == AC_DESC_IMAGE);
 193         }
 194
 195         rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
 196         if (desc_type == AC_DESC_IMAGE && dcc_off)
 197                 rsrc = force_dcc_off(ctx, rsrc);
 198         return rsrc;
 199 }
 200
 201 /**
 202  * Load the resource descriptor for \p image.
 203  */
 204 static void
 205 image_fetch_rsrc(
 206         struct lp_build_tgsi_context *bld_base,
 207         const struct tgsi_full_src_register *image,
 208         bool is_store, unsigned target,
 209         LLVMValueRef *rsrc)
 210 {
 211         struct si_shader_context *ctx = si_shader_context(bld_base);
 212         LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
 213                                              ctx->param_samplers_and_images);
 214         LLVMValueRef index;
 215         bool dcc_off = is_store;
 216
 217         if (!image->Register.Indirect) {
 218                 const struct tgsi_shader_info *info = bld_base->info;
 219                 unsigned images_writemask = info->images_store |
 220                                             info->images_atomic;
 221
 222                 index = LLVMConstInt(ctx->i32,
 223                                      si_get_image_slot(image->Register.Index), 0);
 224
 225                 if (images_writemask & (1 << image->Register.Index))
 226                         dcc_off = true;
 227         } else {
 228                 /* From the GL_ARB_shader_image_load_store extension spec:
 229                  *
 230                  *    If a shader performs an image load, store, or atomic
 231                  *    operation using an image variable declared as an array,
 232                  *    and if the index used to select an individual element is
 233                  *    negative or greater than or equal to the size of the
 234                  *    array, the results of the operation are undefined but may
 235                  *    not lead to termination.
 236                  */
 237                 index = si_get_bounded_indirect_index(ctx, &image->Indirect,
 238                                                       image->Register.Index,
 239                                                       ctx->num_images);
 240                 index = LLVMBuildSub(ctx->ac.builder,
 241                                      LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
 242                                      index, "");
 243         }
 244
 245         if (image->Register.File != TGSI_FILE_IMAGE) {
 246                 /* Bindless descriptors are accessible from a different pair of
 247                  * user SGPR indices.
 248                  */
 249                 rsrc_ptr = LLVMGetParam(ctx->main_fn,
 250                                         ctx->param_bindless_samplers_and_images);
 251                 index = lp_build_emit_fetch_src(bld_base, image,
 252                                                 TGSI_TYPE_UNSIGNED, 0);
 253
 254                 /* For simplicity, bindless image descriptors use fixed
 255                  * 16-dword slots for now.
 256                  */
 257                 index = LLVMBuildMul(ctx->ac.builder, index,
 258                                      LLVMConstInt(ctx->i32, 2, 0), "");
 259         }
 260
 261         *rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
 262                                    target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
 263                                    dcc_off);
 264 }
 265
 266 static void image_fetch_coords(
 267                 struct lp_build_tgsi_context *bld_base,
 268                 const struct tgsi_full_instruction *inst,
 269                 unsigned src, LLVMValueRef desc,
 270                 LLVMValueRef *coords)
 271 {
 272         struct si_shader_context *ctx = si_shader_context(bld_base);
 273         LLVMBuilderRef builder = ctx->ac.builder;
 274         unsigned target = inst->Memory.Texture;
 275         unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
 276         LLVMValueRef tmp;
 277         int chan;
 278
 279         if (target == TGSI_TEXTURE_2D_MSAA ||
 280             target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
 281                 /* Need the sample index as well. */
 282                 num_coords++;
 283         }
 284
 285         for (chan = 0; chan < num_coords; ++chan) {
 286                 tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
 287                 tmp = ac_to_integer(&ctx->ac, tmp);
 288                 coords[chan] = tmp;
 289         }
 290
 291         if (ctx->screen->info.chip_class >= GFX9) {
 292                 /* 1D textures are allocated and used as 2D on GFX9. */
 293                 if (target == TGSI_TEXTURE_1D) {
 294                         coords[1] = ctx->i32_0;
 295                 } else if (target == TGSI_TEXTURE_1D_ARRAY) {
 296                         coords[2] = coords[1];
 297                         coords[1] = ctx->i32_0;
 298                 } else if (target == TGSI_TEXTURE_2D) {
 299                         /* The hw can't bind a slice of a 3D image as a 2D
 300                          * image, because it ignores BASE_ARRAY if the target
 301                          * is 3D. The workaround is to read BASE_ARRAY and set
 302                          * it as the 3rd address operand for all 2D images.
 303                          */
 304                         LLVMValueRef first_layer, const5, mask;
 305
 306                         const5 = LLVMConstInt(ctx->i32, 5, 0);
 307                         mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
 308                         first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
 309                         first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
 310
 311                         coords[2] = first_layer;
 312                 }
 313         }
 314 }
 315
 316 /**
 317  * Append the resource and indexing arguments for buffer intrinsics.
 318  *
 319  * \param rsrc the v4i32 buffer resource
 320  * \param index index into the buffer (stride-based)
 321  * \param offset byte offset into the buffer
 322  */
 323 static void buffer_append_args(
 324                 struct si_shader_context *ctx,
 325                 struct lp_build_emit_data *emit_data,
 326                 LLVMValueRef rsrc,
 327                 LLVMValueRef index,
 328                 LLVMValueRef offset,
 329                 bool atomic,
 330                 bool force_glc)
 331 {
 332         const struct tgsi_full_instruction *inst = emit_data->inst;
 333         LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
 334         LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
 335
 336         emit_data->args[emit_data->arg_count++] = rsrc;
 337         emit_data->args[emit_data->arg_count++] = index; /* vindex */
 338         emit_data->args[emit_data->arg_count++] = offset; /* voffset */
 339         if (!atomic) {
 340                 emit_data->args[emit_data->arg_count++] =
 341                         force_glc ||
 342                         inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
 343                         i1true : i1false; /* glc */
 344         }
 345         emit_data->args[emit_data->arg_count++] = i1false; /* slc */
 346 }
 347
 348 static void load_emit_buffer(struct si_shader_context *ctx,
 349                              struct lp_build_emit_data *emit_data,
 350                              bool can_speculate, bool allow_smem)
 351 {
 352         const struct tgsi_full_instruction *inst = emit_data->inst;
 353         uint writemask = inst->Dst[0].Register.WriteMask;
 354         uint count = util_last_bit(writemask);
 355         LLVMValueRef *args = emit_data->args;
 356
 357         /* Don't use SMEM for shader buffer loads, because LLVM doesn't
 358          * select SMEM for SI.load.const with a non-constant offset, and
 359          * constant offsets practically don't exist with shader buffers.
 360          *
 361          * Also, SI.load.const doesn't use inst_offset when it's lowered
 362          * to VMEM, so we just end up with more VALU instructions in the end
 363          * and no benefit.
 364          *
 365          * TODO: Remove this line once LLVM can select SMEM with a non-constant
 366          *       offset, and can derive inst_offset when VMEM is selected.
 367          *       After that, si_memory_barrier should invalidate sL1 for shader
 368          *       buffers.
 369          */
 370
 371         assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */
 372         emit_data->output[emit_data->chan] =
 373                 ac_build_buffer_load(&ctx->ac, args[0], count, NULL,
 374                                      args[2], NULL, 0,
 375                                      LLVMConstIntGetZExtValue(args[3]),
 376                                      LLVMConstIntGetZExtValue(args[4]),
 377                                      can_speculate, allow_smem);
 378 }
 379
 380 static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
 381                                    const struct tgsi_full_instruction *inst,
 382                                    LLVMTypeRef type, int arg)
 383 {
 384         LLVMBuilderRef builder = ctx->ac.builder;
 385         LLVMValueRef offset, ptr;
 386         int addr_space;
 387
 388         offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
 389         offset = ac_to_integer(&ctx->ac, offset);
 390
 391         ptr = ctx->ac.lds;
 392         ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
 393         addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
 394         ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
 395
 396         return ptr;
 397 }
 398
 399 static void load_emit_memory(
 400                 struct si_shader_context *ctx,
 401                 struct lp_build_emit_data *emit_data)
 402 {
 403         const struct tgsi_full_instruction *inst = emit_data->inst;
 404         unsigned writemask = inst->Dst[0].Register.WriteMask;
 405         LLVMValueRef channels[4], ptr, derived_ptr, index;
 406         int chan;
 407
 408         ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
 409
 410         for (chan = 0; chan < 4; ++chan) {
 411                 if (!(writemask & (1 << chan))) {
 412                         channels[chan] = LLVMGetUndef(ctx->f32);
 413                         continue;
 414                 }
 415
 416                 index = LLVMConstInt(ctx->i32, chan, 0);
 417                 derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
 418                 channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
 419         }
 420         emit_data->output[emit_data->chan] = ac_build_gather_values(&ctx->ac, channels, 4);
 421 }
 422
 423 /**
 424  * Return true if the memory accessed by a LOAD or STORE instruction is
 425  * read-only or write-only, respectively.
 426  *
 427  * \param shader_buffers_reverse_access_mask
 428  *      For LOAD, set this to (store | atomic) slot usage in the shader.
 429  *      For STORE, set this to (load | atomic) slot usage in the shader.
 430  * \param images_reverse_access_mask  Same as above, but for images.
 431  */
 432 static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
 433                                   const struct tgsi_shader_info *info,
 434                                   unsigned shader_buffers_reverse_access_mask,
 435                                   unsigned images_reverse_access_mask)
 436 {
 437         /* RESTRICT means NOALIAS.
 438          * If there are no writes, we can assume the accessed memory is read-only.
 439          * If there are no reads, we can assume the accessed memory is write-only.
 440          */
 441         if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) {
 442                 unsigned reverse_access_mask;
 443
 444                 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 445                         reverse_access_mask = shader_buffers_reverse_access_mask;
 446                 } else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 447                         reverse_access_mask = info->images_buffers &
 448                                               images_reverse_access_mask;
 449                 } else {
 450                         reverse_access_mask = ~info->images_buffers &
 451                                               images_reverse_access_mask;
 452                 }
 453
 454                 if (inst->Src[0].Register.Indirect) {
 455                         if (!reverse_access_mask)
 456                                 return true;
 457                 } else {
 458                         if (!(reverse_access_mask &
 459                               (1u << inst->Src[0].Register.Index)))
 460                                 return true;
 461                 }
 462         }
 463
 464         /* If there are no buffer writes (for both shader buffers & image
 465          * buffers), it implies that buffer memory is read-only.
 466          * If there are no buffer reads (for both shader buffers & image
 467          * buffers), it implies that buffer memory is write-only.
 468          *
 469          * Same for the case when there are no writes/reads for non-buffer
 470          * images.
 471          */
 472         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
 473             (inst->Memory.Texture == TGSI_TEXTURE_BUFFER &&
 474              (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
 475               tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) {
 476                 if (!shader_buffers_reverse_access_mask &&
 477                     !(info->images_buffers & images_reverse_access_mask))
 478                         return true;
 479         } else {
 480                 if (!(~info->images_buffers & images_reverse_access_mask))
 481                         return true;
 482         }
 483         return false;
 484 }
 485
 486 static void load_emit(
 487                 const struct lp_build_tgsi_action *action,
 488                 struct lp_build_tgsi_context *bld_base,
 489                 struct lp_build_emit_data *emit_data)
 490 {
 491         struct si_shader_context *ctx = si_shader_context(bld_base);
 492         const struct tgsi_full_instruction * inst = emit_data->inst;
 493         const struct tgsi_shader_info *info = &ctx->shader->selector->info;
 494         bool can_speculate = false;
 495
 496         if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
 497                 load_emit_memory(ctx, emit_data);
 498                 return;
 499         }
 500
 501         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
 502             inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
 503                 LLVMValueRef offset, tmp, rsrc;
 504
 505                 bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
 506                 rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
 507
 508                 tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
 509                 offset = ac_to_integer(&ctx->ac, tmp);
 510
 511                 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
 512                                    offset, false, false);
 513         } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
 514                    tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
 515                 LLVMValueRef rsrc;
 516                 unsigned target = inst->Memory.Texture;
 517
 518                 image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
 519                 image_fetch_coords(bld_base, inst, 1, rsrc, &emit_data->args[1]);
 520
 521                 if (target == TGSI_TEXTURE_BUFFER) {
 522                         buffer_append_args(ctx, emit_data, rsrc, emit_data->args[1],
 523                                            ctx->i32_0, false, false);
 524                 } else {
 525                         emit_data->args[0] = rsrc;
 526                 }
 527         }
 528
 529         if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
 530                 load_emit_buffer(ctx, emit_data, true, true);
 531                 return;
 532         }
 533
 534         if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
 535                 ac_build_waitcnt(&ctx->ac, VM_CNT);
 536
 537         can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
 538                           is_oneway_access_only(inst, info,
 539                                                 info->shader_buffers_store |
 540                                                 info->shader_buffers_atomic,
 541                                                 info->images_store |
 542                                                 info->images_atomic);
 543
 544         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 545                 load_emit_buffer(ctx, emit_data, can_speculate, false);
 546                 return;
 547         }
 548
 549         if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 550                 unsigned num_channels = util_last_bit(inst->Dst[0].Register.WriteMask);
 551                 LLVMValueRef result =
 552                         ac_build_buffer_load_format(&ctx->ac,
 553                                                     emit_data->args[0],
 554                                                     emit_data->args[1],
 555                                                     emit_data->args[2],
 556                                                     num_channels,
 557                                                     LLVMConstIntGetZExtValue(emit_data->args[3]),
 558                                                     can_speculate);
 559                 emit_data->output[emit_data->chan] =
 560                         ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
 561         } else {
 562                 struct ac_image_args args = {};
 563                 args.opcode = ac_image_load;
 564                 args.resource = emit_data->args[0];
 565                 memcpy(args.coords, &emit_data->args[1], sizeof(args.coords));
 566                 args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 567                 if (inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
 568                         args.cache_policy = ac_glc;
 569                 args.attributes = ac_get_load_intr_attribs(can_speculate);
 570                 args.dmask = 0xf;
 571
 572                 emit_data->output[emit_data->chan] =
 573                         ac_build_image_opcode(&ctx->ac, &args);
 574         }
 575 }
 576
 577 static void store_emit_buffer(
 578                 struct si_shader_context *ctx,
 579                 struct lp_build_emit_data *emit_data,
 580                 bool writeonly_memory)
 581 {
 582         const struct tgsi_full_instruction *inst = emit_data->inst;
 583         LLVMBuilderRef builder = ctx->ac.builder;
 584         LLVMValueRef base_data = emit_data->args[0];
 585         LLVMValueRef base_offset = emit_data->args[3];
 586         unsigned writemask = inst->Dst[0].Register.WriteMask;
 587
 588         /* If this is write-only, don't keep data in L1 to prevent
 589          * evicting L1 cache lines that may be needed by other
 590          * instructions.
 591          */
 592         if (writeonly_memory)
 593                 emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
 594
 595         while (writemask) {
 596                 int start, count;
 597                 const char *intrinsic_name;
 598                 LLVMValueRef data;
 599                 LLVMValueRef offset;
 600                 LLVMValueRef tmp;
 601
 602                 u_bit_scan_consecutive_range(&writemask, &start, &count);
 603
 604                 /* Due to an LLVM limitation, split 3-element writes
 605                  * into a 2-element and a 1-element write. */
 606                 if (count == 3) {
 607                         writemask |= 1 << (start + 2);
 608                         count = 2;
 609                 }
 610
 611                 if (count == 4) {
 612                         data = base_data;
 613                         intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
 614                 } else if (count == 2) {
 615                         LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
 616
 617                         tmp = LLVMBuildExtractElement(
 618                                 builder, base_data,
 619                                 LLVMConstInt(ctx->i32, start, 0), "");
 620                         data = LLVMBuildInsertElement(
 621                                 builder, LLVMGetUndef(v2f32), tmp,
 622                                 ctx->i32_0, "");
 623
 624                         tmp = LLVMBuildExtractElement(
 625                                 builder, base_data,
 626                                 LLVMConstInt(ctx->i32, start + 1, 0), "");
 627                         data = LLVMBuildInsertElement(
 628                                 builder, data, tmp, ctx->i32_1, "");
 629
 630                         intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
 631                 } else {
 632                         assert(count == 1);
 633                         data = LLVMBuildExtractElement(
 634                                 builder, base_data,
 635                                 LLVMConstInt(ctx->i32, start, 0), "");
 636                         intrinsic_name = "llvm.amdgcn.buffer.store.f32";
 637                 }
 638
 639                 offset = base_offset;
 640                 if (start != 0) {
 641                         offset = LLVMBuildAdd(
 642                                 builder, offset,
 643                                 LLVMConstInt(ctx->i32, start * 4, 0), "");
 644                 }
 645
 646                 emit_data->args[0] = data;
 647                 emit_data->args[3] = offset;
 648
 649                 ac_build_intrinsic(
 650                         &ctx->ac, intrinsic_name, ctx->voidt,
 651                         emit_data->args, emit_data->arg_count,
 652                         ac_get_store_intr_attribs(writeonly_memory));
 653         }
 654 }
 655
 656 static void store_emit_memory(
 657                 struct si_shader_context *ctx,
 658                 struct lp_build_emit_data *emit_data)
 659 {
 660         const struct tgsi_full_instruction *inst = emit_data->inst;
 661         LLVMBuilderRef builder = ctx->ac.builder;
 662         unsigned writemask = inst->Dst[0].Register.WriteMask;
 663         LLVMValueRef ptr, derived_ptr, data, index;
 664         int chan;
 665
 666         ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
 667
 668         for (chan = 0; chan < 4; ++chan) {
 669                 if (!(writemask & (1 << chan))) {
 670                         continue;
 671                 }
 672                 data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
 673                 index = LLVMConstInt(ctx->i32, chan, 0);
 674                 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
 675                 LLVMBuildStore(builder, data, derived_ptr);
 676         }
 677 }
 678
 679 static void store_emit(
 680                 const struct lp_build_tgsi_action *action,
 681                 struct lp_build_tgsi_context *bld_base,
 682                 struct lp_build_emit_data *emit_data)
 683 {
 684         struct si_shader_context *ctx = si_shader_context(bld_base);
 685         const struct tgsi_full_instruction * inst = emit_data->inst;
 686         const struct tgsi_shader_info *info = &ctx->shader->selector->info;
 687         struct tgsi_full_src_register resource_reg =
 688                 tgsi_full_src_register_from_dst(&inst->Dst[0]);
 689         unsigned target = inst->Memory.Texture;
 690         bool writeonly_memory = false;
 691         LLVMValueRef chans[4], rsrc;
 692
 693         if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
 694                 store_emit_memory(ctx, emit_data);
 695                 return;
 696         }
 697
 698         for (unsigned chan = 0; chan < 4; ++chan)
 699                 chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
 700
 701         emit_data->args[emit_data->arg_count++] =
 702                 ac_build_gather_values(&ctx->ac, chans, 4);
 703
 704         if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 705                 LLVMValueRef offset, tmp;
 706
 707                 rsrc = shader_buffer_fetch_rsrc(ctx, &resource_reg, false);
 708
 709                 tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
 710                 offset = ac_to_integer(&ctx->ac, tmp);
 711
 712                 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
 713                                    offset, false, false);
 714         } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
 715                    tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
 716                 /* 8bit/16bit TC L1 write corruption bug on SI.
 717                  * All store opcodes not aligned to a dword are affected.
 718                  *
 719                  * The only way to get unaligned stores in radeonsi is through
 720                  * shader images.
 721                  */
 722                 bool force_glc = ctx->screen->info.chip_class == SI;
 723
 724                 image_fetch_rsrc(bld_base, &resource_reg, true, target, &rsrc);
 725                 image_fetch_coords(bld_base, inst, 0, rsrc, &emit_data->args[2]);
 726
 727                 if (target == TGSI_TEXTURE_BUFFER) {
 728                         buffer_append_args(ctx, emit_data, rsrc, emit_data->args[2],
 729                                            ctx->i32_0, false, force_glc);
 730                 } else {
 731                         emit_data->args[1] = rsrc;
 732                 }
 733         }
 734
 735         if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
 736                 ac_build_waitcnt(&ctx->ac, VM_CNT);
 737
 738         writeonly_memory = is_oneway_access_only(inst, info,
 739                                                  info->shader_buffers_load |
 740                                                  info->shader_buffers_atomic,
 741                                                  info->images_load |
 742                                                  info->images_atomic);
 743
 744         if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
 745                 store_emit_buffer(ctx, emit_data, writeonly_memory);
 746                 return;
 747         }
 748
 749         if (target == TGSI_TEXTURE_BUFFER) {
 750                 /* If this is write-only, don't keep data in L1 to prevent
 751                  * evicting L1 cache lines that may be needed by other
 752                  * instructions.
 753                  */
 754                 if (writeonly_memory)
 755                         emit_data->args[4] = LLVMConstInt(ctx->i1, 1, 0); /* GLC = 1 */
 756
 757                 emit_data->output[emit_data->chan] = ac_build_intrinsic(
 758                         &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32",
 759                         ctx->voidt, emit_data->args,
 760                         emit_data->arg_count,
 761                         ac_get_store_intr_attribs(writeonly_memory));
 762         } else {
 763                 struct ac_image_args args = {};
 764                 args.opcode = ac_image_store;
 765                 args.data[0] = emit_data->args[0];
 766                 args.resource = emit_data->args[1];
 767                 memcpy(args.coords, &emit_data->args[2], sizeof(args.coords));
 768                 args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 769                 args.attributes = ac_get_store_intr_attribs(writeonly_memory);
 770                 args.dmask = 0xf;
 771
 772                 /* Workaround for 8bit/16bit TC L1 write corruption bug on SI.
 773                  * All store opcodes not aligned to a dword are affected.
 774                  */
 775                 if (ctx->screen->info.chip_class == SI ||
 776                     /* If this is write-only, don't keep data in L1 to prevent
 777                      * evicting L1 cache lines that may be needed by other
 778                      * instructions. */
 779                     writeonly_memory ||
 780                     inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE))
 781                         args.cache_policy = ac_glc;
 782
 783                 emit_data->output[emit_data->chan] =
 784                         ac_build_image_opcode(&ctx->ac, &args);
 785         }
 786 }
 787
 788 static void atomic_fetch_args(
 789                 struct lp_build_tgsi_context * bld_base,
 790                 struct lp_build_emit_data * emit_data)
 791 {
 792         struct si_shader_context *ctx = si_shader_context(bld_base);
 793         const struct tgsi_full_instruction * inst = emit_data->inst;
 794         LLVMValueRef data1, data2;
 795         LLVMValueRef rsrc;
 796         LLVMValueRef tmp;
 797
 798         emit_data->dst_type = ctx->f32;
 799
 800         tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
 801         data1 = ac_to_integer(&ctx->ac, tmp);
 802
 803         if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
 804                 tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
 805                 data2 = ac_to_integer(&ctx->ac, tmp);
 806         }
 807
 808         /* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
 809          * of arguments, which is reversed relative to TGSI (and GLSL)
 810          */
 811         if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
 812                 emit_data->args[emit_data->arg_count++] = data2;
 813         emit_data->args[emit_data->arg_count++] = data1;
 814
 815         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
 816                 LLVMValueRef offset;
 817
 818                 rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
 819
 820                 tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
 821                 offset = ac_to_integer(&ctx->ac, tmp);
 822
 823                 buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
 824                                    offset, true, false);
 825         } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
 826                    tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
 827                 unsigned target = inst->Memory.Texture;
 828
 829                 image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
 830                 image_fetch_coords(bld_base, inst, 1, rsrc,
 831                                    &emit_data->args[emit_data->arg_count + 1]);
 832
 833                 if (target == TGSI_TEXTURE_BUFFER) {
 834                         buffer_append_args(ctx, emit_data, rsrc,
 835                                            emit_data->args[emit_data->arg_count + 1],
 836                                            ctx->i32_0, true, false);
 837                 } else {
 838                         emit_data->args[emit_data->arg_count] = rsrc;
 839                 }
 840         }
 841 }
 842
 843 static void atomic_emit_memory(struct si_shader_context *ctx,
 844                                struct lp_build_emit_data *emit_data) {
 845         LLVMBuilderRef builder = ctx->ac.builder;
 846         const struct tgsi_full_instruction * inst = emit_data->inst;
 847         LLVMValueRef ptr, result, arg;
 848
 849         ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
 850
 851         arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
 852         arg = ac_to_integer(&ctx->ac, arg);
 853
 854         if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
 855                 LLVMValueRef new_data;
 856                 new_data = lp_build_emit_fetch(&ctx->bld_base,
 857                                                inst, 3, 0);
 858
 859                 new_data = ac_to_integer(&ctx->ac, new_data);
 860
 861                 result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
 862                                        LLVMAtomicOrderingSequentiallyConsistent,
 863                                        LLVMAtomicOrderingSequentiallyConsistent,
 864                                        false);
 865
 866                 result = LLVMBuildExtractValue(builder, result, 0, "");
 867         } else {
 868                 LLVMAtomicRMWBinOp op;
 869
 870                 switch(inst->Instruction.Opcode) {
 871                         case TGSI_OPCODE_ATOMUADD:
 872                                 op = LLVMAtomicRMWBinOpAdd;
 873                                 break;
 874                         case TGSI_OPCODE_ATOMXCHG:
 875                                 op = LLVMAtomicRMWBinOpXchg;
 876                                 break;
 877                         case TGSI_OPCODE_ATOMAND:
 878                                 op = LLVMAtomicRMWBinOpAnd;
 879                                 break;
 880                         case TGSI_OPCODE_ATOMOR:
 881                                 op = LLVMAtomicRMWBinOpOr;
 882                                 break;
 883                         case TGSI_OPCODE_ATOMXOR:
 884                                 op = LLVMAtomicRMWBinOpXor;
 885                                 break;
 886                         case TGSI_OPCODE_ATOMUMIN:
 887                                 op = LLVMAtomicRMWBinOpUMin;
 888                                 break;
 889                         case TGSI_OPCODE_ATOMUMAX:
 890                                 op = LLVMAtomicRMWBinOpUMax;
 891                                 break;
 892                         case TGSI_OPCODE_ATOMIMIN:
 893                                 op = LLVMAtomicRMWBinOpMin;
 894                                 break;
 895                         case TGSI_OPCODE_ATOMIMAX:
 896                                 op = LLVMAtomicRMWBinOpMax;
 897                                 break;
 898                         default:
 899                                 unreachable("unknown atomic opcode");
 900                 }
 901
 902                 result = LLVMBuildAtomicRMW(builder, op, ptr, arg,
 903                                        LLVMAtomicOrderingSequentiallyConsistent,
 904                                        false);
 905         }
 906         emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, "");
 907 }
 908
 909 static void atomic_emit(
 910                 const struct lp_build_tgsi_action *action,
 911                 struct lp_build_tgsi_context *bld_base,
 912                 struct lp_build_emit_data *emit_data)
 913 {
 914         struct si_shader_context *ctx = si_shader_context(bld_base);
 915         const struct tgsi_full_instruction * inst = emit_data->inst;
 916         LLVMValueRef tmp;
 917
 918         if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
 919                 atomic_emit_memory(ctx, emit_data);
 920                 return;
 921         }
 922
 923         if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
 924             inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
 925                 char intrinsic_name[40];
 926                 snprintf(intrinsic_name, sizeof(intrinsic_name),
 927                          "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
 928                 tmp = ac_build_intrinsic(
 929                         &ctx->ac, intrinsic_name, ctx->i32,
 930                         emit_data->args, emit_data->arg_count, 0);
 931                 emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
 932         } else {
 933                 unsigned num_data = inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS ? 2 : 1;
 934                 struct ac_image_args args = {};
 935
 936                 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
 937                         args.opcode = ac_image_atomic_cmpswap;
 938                 } else {
 939                         args.opcode = ac_image_atomic;
 940                         switch (inst->Instruction.Opcode) {
 941                         case TGSI_OPCODE_ATOMXCHG: args.atomic = ac_atomic_swap; break;
 942                         case TGSI_OPCODE_ATOMUADD: args.atomic = ac_atomic_add; break;
 943                         case TGSI_OPCODE_ATOMAND: args.atomic = ac_atomic_and; break;
 944                         case TGSI_OPCODE_ATOMOR: args.atomic = ac_atomic_or; break;
 945                         case TGSI_OPCODE_ATOMXOR: args.atomic = ac_atomic_xor; break;
 946                         case TGSI_OPCODE_ATOMUMIN: args.atomic = ac_atomic_umin; break;
 947                         case TGSI_OPCODE_ATOMUMAX: args.atomic = ac_atomic_umax; break;
 948                         case TGSI_OPCODE_ATOMIMIN: args.atomic = ac_atomic_smin; break;
 949                         case TGSI_OPCODE_ATOMIMAX: args.atomic = ac_atomic_smax; break;
 950                         default: unreachable("unhandled image atomic");
 951                         }
 952                 }
 953
 954                 for (unsigned i = 0; i < num_data; ++i)
 955                         args.data[i] = emit_data->args[i];
 956
 957                 args.resource = emit_data->args[num_data];
 958                 memcpy(args.coords, &emit_data->args[num_data + 1], sizeof(args.coords));
 959                 args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture);
 960
 961                 emit_data->output[emit_data->chan] =
 962                         ac_to_float(&ctx->ac, ac_build_image_opcode(&ctx->ac, &args));
 963         }
 964 }
 965
 966 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
 967                                 unsigned target, LLVMValueRef out)
 968 {
 969         LLVMBuilderRef builder = ctx->ac.builder;
 970
 971         /* 1D textures are allocated and used as 2D on GFX9. */
 972         if (ctx->screen->info.chip_class >= GFX9 &&
 973             (target == TGSI_TEXTURE_1D_ARRAY ||
 974              target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
 975                 LLVMValueRef layers =
 976                         LLVMBuildExtractElement(builder, out,
 977                                                 LLVMConstInt(ctx->i32, 2, 0), "");
 978                 out = LLVMBuildInsertElement(builder, out, layers,
 979                                              ctx->i32_1, "");
 980         }
 981
 982         /* Divide the number of layers by 6 to get the number of cubes. */
 983         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 984             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 985                 LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
 986
 987                 LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
 988                 z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
 989
 990                 out = LLVMBuildInsertElement(builder, out, z, imm2, "");
 991         }
 992         return out;
 993 }
 994
 995 static void resq_emit(
 996                 const struct lp_build_tgsi_action *action,
 997                 struct lp_build_tgsi_context *bld_base,
 998                 struct lp_build_emit_data *emit_data)
 999 {
1000         struct si_shader_context *ctx = si_shader_context(bld_base);
1001         LLVMBuilderRef builder = ctx->ac.builder;
1002         const struct tgsi_full_instruction *inst = emit_data->inst;
1003         const struct tgsi_full_src_register *reg =
1004                 &inst->Src[inst->Instruction.Opcode == TGSI_OPCODE_TXQ ? 1 : 0];
1005
1006         if (reg->Register.File == TGSI_FILE_BUFFER) {
1007                 LLVMValueRef rsrc = shader_buffer_fetch_rsrc(ctx, reg, false);
1008
1009                 emit_data->output[emit_data->chan] =
1010                         LLVMBuildExtractElement(builder, rsrc,
1011                                                 LLVMConstInt(ctx->i32, 2, 0), "");
1012                 return;
1013         }
1014
1015         if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ &&
1016             inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
1017                 LLVMValueRef rsrc;
1018
1019                 tex_fetch_ptrs(bld_base, emit_data, &rsrc, NULL, NULL);
1020                 /* Read the size from the buffer descriptor directly. */
1021                 emit_data->output[emit_data->chan] =
1022                         get_buffer_size(bld_base, rsrc);
1023                 return;
1024         }
1025
1026         if (inst->Instruction.Opcode == TGSI_OPCODE_RESQ &&
1027             inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
1028                 LLVMValueRef rsrc;
1029
1030                 image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, &rsrc);
1031                 emit_data->output[emit_data->chan] =
1032                         get_buffer_size(bld_base, rsrc);
1033                 return;
1034         }
1035
1036         unsigned target;
1037
1038         if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
1039                 target = inst->Texture.Texture;
1040         } else {
1041                 if (inst->Memory.Texture == TGSI_TEXTURE_3D)
1042                         target = TGSI_TEXTURE_2D_ARRAY;
1043                 else
1044                         target = inst->Memory.Texture;
1045         }
1046
1047         struct ac_image_args args = {};
1048         args.opcode = ac_image_get_resinfo;
1049         args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
1050         args.dmask = 0xf;
1051
1052         if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
1053                 tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL);
1054                 args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
1055         } else {
1056                 image_fetch_rsrc(bld_base, reg, false, target, &args.resource);
1057                 args.lod = ctx->i32_0;
1058         }
1059
1060         emit_data->output[emit_data->chan] =
1061                 fix_resinfo(ctx, target, ac_build_image_opcode(&ctx->ac, &args));
1062 }
1063
1064 /**
1065  * Load an image view, fmask view. or sampler state descriptor.
1066  */
1067 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
1068                                   LLVMValueRef list, LLVMValueRef index,
1069                                   enum ac_descriptor_type type)
1070 {
1071         LLVMBuilderRef builder = ctx->ac.builder;
1072
1073         switch (type) {
1074         case AC_DESC_IMAGE:
1075                 /* The image is at [0:7]. */
1076                 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
1077                 break;
1078         case AC_DESC_BUFFER:
1079                 /* The buffer is in [4:7]. */
1080                 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
1081                 index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
1082                 list = LLVMBuildPointerCast(builder, list,
1083                                             ac_array_in_const32_addr_space(ctx->v4i32), "");
1084                 break;
1085         case AC_DESC_FMASK:
1086                 /* The FMASK is at [8:15]. */
1087                 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
1088                 index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
1089                 break;
1090         case AC_DESC_SAMPLER:
1091                 /* The sampler state is at [12:15]. */
1092                 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
1093                 index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
1094                 list = LLVMBuildPointerCast(builder, list,
1095                                             ac_array_in_const32_addr_space(ctx->v4i32), "");
1096                 break;
1097         }
1098
1099         return ac_build_load_to_sgpr(&ctx->ac, list, index);
1100 }
1101
1102 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
1103  *
1104  * SI-CI:
1105  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
1106  *   filtering manually. The driver sets img7 to a mask clearing
1107  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
1108  *     s_and_b32 samp0, samp0, img7
1109  *
1110  * VI:
1111  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
1112  */
1113 static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
1114                                            LLVMValueRef res, LLVMValueRef samp)
1115 {
1116         LLVMValueRef img7, samp0;
1117
1118         if (ctx->screen->info.chip_class >= VI)
1119                 return samp;
1120
1121         img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
1122                                        LLVMConstInt(ctx->i32, 7, 0), "");
1123         samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
1124                                         ctx->i32_0, "");
1125         samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
1126         return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
1127                                       ctx->i32_0, "");
1128 }
1129
1130 static void tex_fetch_ptrs(struct lp_build_tgsi_context *bld_base,
1131                            struct lp_build_emit_data *emit_data,
1132                            LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
1133                            LLVMValueRef *fmask_ptr)
1134 {
1135         struct si_shader_context *ctx = si_shader_context(bld_base);
1136         LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
1137         const struct tgsi_full_instruction *inst = emit_data->inst;
1138         const struct tgsi_full_src_register *reg;
1139         unsigned target = inst->Texture.Texture;
1140         unsigned sampler_src;
1141         LLVMValueRef index;
1142
1143         sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
1144         reg = &emit_data->inst->Src[sampler_src];
1145
1146         if (reg->Register.Indirect) {
1147                 index = si_get_bounded_indirect_index(ctx,
1148                                                       &reg->Indirect,
1149                                                       reg->Register.Index,
1150                                                       ctx->num_samplers);
1151                 index = LLVMBuildAdd(ctx->ac.builder, index,
1152                                      LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
1153         } else {
1154                 index = LLVMConstInt(ctx->i32,
1155                                      si_get_sampler_slot(reg->Register.Index), 0);
1156         }
1157
1158         if (reg->Register.File != TGSI_FILE_SAMPLER) {
1159                 /* Bindless descriptors are accessible from a different pair of
1160                  * user SGPR indices.
1161                  */
1162                 list = LLVMGetParam(ctx->main_fn,
1163                                     ctx->param_bindless_samplers_and_images);
1164                 index = lp_build_emit_fetch_src(bld_base, reg,
1165                                                 TGSI_TYPE_UNSIGNED, 0);
1166         }
1167
1168         if (target == TGSI_TEXTURE_BUFFER)
1169                 *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
1170         else
1171                 *res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
1172
1173         if (samp_ptr)
1174                 *samp_ptr = NULL;
1175         if (fmask_ptr)
1176                 *fmask_ptr = NULL;
1177
1178         if (target == TGSI_TEXTURE_2D_MSAA ||
1179             target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1180                 if (fmask_ptr)
1181                         *fmask_ptr = si_load_sampler_desc(ctx, list, index,
1182                                                           AC_DESC_FMASK);
1183         } else if (target != TGSI_TEXTURE_BUFFER) {
1184                 if (samp_ptr) {
1185                         *samp_ptr = si_load_sampler_desc(ctx, list, index,
1186                                                          AC_DESC_SAMPLER);
1187                         *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
1188                 }
1189         }
1190 }
1191
1192 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
1193  * incorrectly forces nearest filtering if the texture format is integer.
1194  * The only effect it has on Gather4, which always returns 4 texels for
1195  * bilinear filtering, is that the final coordinates are off by 0.5 of
1196  * the texel size.
1197  *
1198  * The workaround is to subtract 0.5 from the unnormalized coordinates,
1199  * or (0.5 / size) from the normalized coordinates.
1200  *
1201  * However, cube textures with 8_8_8_8 data formats require a different
1202  * workaround of overriding the num format to USCALED/SSCALED. This would lose
1203  * precision in 32-bit data formats, so it needs to be applied dynamically at
1204  * runtime. In this case, return an i1 value that indicates whether the
1205  * descriptor was overridden (and hence a fixup of the sampler result is needed).
1206  */
1207 static LLVMValueRef
1208 si_lower_gather4_integer(struct si_shader_context *ctx,
1209                          struct ac_image_args *args,
1210                          unsigned target,
1211                          enum tgsi_return_type return_type)
1212 {
1213         LLVMBuilderRef builder = ctx->ac.builder;
1214         LLVMValueRef wa_8888 = NULL;
1215         LLVMValueRef half_texel[2];
1216
1217         assert(return_type == TGSI_RETURN_TYPE_SINT ||
1218                return_type == TGSI_RETURN_TYPE_UINT);
1219
1220         if (target == TGSI_TEXTURE_CUBE ||
1221             target == TGSI_TEXTURE_CUBE_ARRAY) {
1222                 LLVMValueRef formats;
1223                 LLVMValueRef data_format;
1224                 LLVMValueRef wa_formats;
1225
1226                 formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
1227
1228                 data_format = LLVMBuildLShr(builder, formats,
1229                                             LLVMConstInt(ctx->i32, 20, false), "");
1230                 data_format = LLVMBuildAnd(builder, data_format,
1231                                            LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1232                 wa_8888 = LLVMBuildICmp(
1233                         builder, LLVMIntEQ, data_format,
1234                         LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
1235                         "");
1236
1237                 uint32_t wa_num_format =
1238                         return_type == TGSI_RETURN_TYPE_UINT ?
1239                         S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) :
1240                         S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED);
1241                 wa_formats = LLVMBuildAnd(builder, formats,
1242                                           LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false),
1243                                           "");
1244                 wa_formats = LLVMBuildOr(builder, wa_formats,
1245                                         LLVMConstInt(ctx->i32, wa_num_format, false), "");
1246
1247                 formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
1248                 args->resource = LLVMBuildInsertElement(
1249                         builder, args->resource, formats, ctx->i32_1, "");
1250         }
1251
1252         if (target == TGSI_TEXTURE_RECT ||
1253             target == TGSI_TEXTURE_SHADOWRECT) {
1254                 assert(!wa_8888);
1255                 half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1256         } else {
1257                 struct ac_image_args resinfo = {};
1258                 struct lp_build_if_state if_ctx;
1259
1260                 if (wa_8888) {
1261                         /* Skip the texture size query entirely if we don't need it. */
1262                         lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
1263                 }
1264
1265                 /* Query the texture size. */
1266                 resinfo.opcode = ac_image_get_resinfo;
1267                 resinfo.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
1268                 resinfo.resource = args->resource;
1269                 resinfo.sampler = args->sampler;
1270                 resinfo.lod = ctx->ac.i32_0;
1271                 resinfo.dmask = 0xf;
1272
1273                 LLVMValueRef texsize =
1274                         fix_resinfo(ctx, target,
1275                                     ac_build_image_opcode(&ctx->ac, &resinfo));
1276
1277                 /* Compute -0.5 / size. */
1278                 for (unsigned c = 0; c < 2; c++) {
1279                         half_texel[c] =
1280                                 LLVMBuildExtractElement(builder, texsize,
1281                                                         LLVMConstInt(ctx->i32, c, 0), "");
1282                         half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
1283                         half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, half_texel[c]);
1284                         half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
1285                                                       LLVMConstReal(ctx->f32, -0.5), "");
1286                 }
1287
1288                 if (wa_8888) {
1289                         lp_build_endif(&if_ctx);
1290
1291                         LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
1292
1293                         for (unsigned c = 0; c < 2; c++) {
1294                                 LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
1295                                 half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
1296                                                              values, bb);
1297                         }
1298                 }
1299         }
1300
1301         for (unsigned c = 0; c < 2; c++) {
1302                 LLVMValueRef tmp;
1303                 tmp = ac_to_float(&ctx->ac, args->coords[c]);
1304                 tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
1305                 args->coords[c] = ac_to_integer(&ctx->ac, tmp);
1306         }
1307
1308         return wa_8888;
1309 }
1310
1311 /* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
1312  * result after the gather operation.
1313  */
1314 static LLVMValueRef
1315 si_fix_gather4_integer_result(struct si_shader_context *ctx,
1316                            LLVMValueRef result,
1317                            enum tgsi_return_type return_type,
1318                            LLVMValueRef wa)
1319 {
1320         LLVMBuilderRef builder = ctx->ac.builder;
1321
1322         assert(return_type == TGSI_RETURN_TYPE_SINT ||
1323                return_type == TGSI_RETURN_TYPE_UINT);
1324
1325         for (unsigned chan = 0; chan < 4; ++chan) {
1326                 LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
1327                 LLVMValueRef value;
1328                 LLVMValueRef wa_value;
1329
1330                 value = LLVMBuildExtractElement(builder, result, chanv, "");
1331
1332                 if (return_type == TGSI_RETURN_TYPE_UINT)
1333                         wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
1334                 else
1335                         wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
1336                 wa_value = ac_to_float(&ctx->ac, wa_value);
1337                 value = LLVMBuildSelect(builder, wa, wa_value, value, "");
1338
1339                 result = LLVMBuildInsertElement(builder, result, value, chanv, "");
1340         }
1341
1342         return result;
1343 }
1344
1345 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
1346                                 struct lp_build_tgsi_context *bld_base,
1347                                 struct lp_build_emit_data *emit_data)
1348 {
1349         struct si_shader_context *ctx = si_shader_context(bld_base);
1350         const struct tgsi_full_instruction *inst = emit_data->inst;
1351         unsigned opcode = inst->Instruction.Opcode;
1352         unsigned target = inst->Texture.Texture;
1353         struct ac_image_args args = {};
1354         int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
1355         unsigned chan;
1356         bool has_offset = inst->Texture.NumOffsets > 0;
1357         LLVMValueRef fmask_ptr = NULL;
1358
1359         tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr);
1360
1361         if (target == TGSI_TEXTURE_BUFFER) {
1362                 LLVMValueRef vindex = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
1363                 unsigned num_channels =
1364                         util_last_bit(inst->Dst[0].Register.WriteMask);
1365                 LLVMValueRef result =
1366                         ac_build_buffer_load_format(&ctx->ac,
1367                                                     args.resource,
1368                                                     vindex,
1369                                                     ctx->i32_0,
1370                                                     num_channels, false, true);
1371                 emit_data->output[emit_data->chan] =
1372                         ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
1373                 return;
1374         }
1375
1376         /* Fetch and project texture coordinates */
1377         args.coords[3] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_W);
1378         for (chan = 0; chan < 3; chan++) {
1379                 args.coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan);
1380                 if (opcode == TGSI_OPCODE_TXP)
1381                         args.coords[chan] = ac_build_fdiv(&ctx->ac,
1382                                 args.coords[chan], args.coords[3]);
1383         }
1384
1385         if (opcode == TGSI_OPCODE_TXP)
1386                 args.coords[3] = ctx->ac.f32_1;
1387
1388         /* Pack offsets. */
1389         if (has_offset &&
1390             opcode != TGSI_OPCODE_TXF &&
1391             opcode != TGSI_OPCODE_TXF_LZ) {
1392                 /* The offsets are six-bit signed integers packed like this:
1393                  *   X=[5:0], Y=[13:8], and Z=[21:16].
1394                  */
1395                 LLVMValueRef offset[3], pack;
1396
1397                 assert(inst->Texture.NumOffsets == 1);
1398
1399                 for (chan = 0; chan < 3; chan++) {
1400                         offset[chan] = lp_build_emit_fetch_texoffset(bld_base, inst, 0, chan);
1401                         offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
1402                                                     LLVMConstInt(ctx->i32, 0x3f, 0), "");
1403                         if (chan)
1404                                 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
1405                                                             LLVMConstInt(ctx->i32, chan*8, 0), "");
1406                 }
1407
1408                 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
1409                 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
1410                 args.offset = pack;
1411         }
1412
1413         /* Pack LOD bias value */
1414         if (opcode == TGSI_OPCODE_TXB)
1415                 args.bias = args.coords[3];
1416         if (opcode == TGSI_OPCODE_TXB2)
1417                 args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1418
1419         /* Pack depth comparison value */
1420         if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
1421                 LLVMValueRef z;
1422
1423                 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1424                         z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1425                 } else {
1426                         assert(ref_pos >= 0);
1427                         z = args.coords[ref_pos];
1428                 }
1429
1430                 /* Section 8.23.1 (Depth Texture Comparison Mode) of the
1431                  * OpenGL 4.5 spec says:
1432                  *
1433                  *    "If the texture’s internal format indicates a fixed-point
1434                  *     depth texture, then D_t and D_ref are clamped to the
1435                  *     range [0, 1]; otherwise no clamping is performed."
1436                  *
1437                  * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
1438                  * so the depth comparison value isn't clamped for Z16 and
1439                  * Z24 anymore. Do it manually here.
1440                  */
1441                 if (ctx->screen->info.chip_class >= VI) {
1442                         LLVMValueRef upgraded;
1443                         LLVMValueRef clamped;
1444                         upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
1445                                                            LLVMConstInt(ctx->i32, 3, false), "");
1446                         upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
1447                                                  LLVMConstInt(ctx->i32, 29, false), "");
1448                         upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
1449                         clamped = ac_build_clamp(&ctx->ac, z);
1450                         z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
1451                 }
1452
1453                 args.compare = z;
1454         }
1455
1456         /* Pack user derivatives */
1457         if (opcode == TGSI_OPCODE_TXD) {
1458                 int param, num_src_deriv_channels, num_dst_deriv_channels;
1459
1460                 switch (target) {
1461                 case TGSI_TEXTURE_3D:
1462                         num_src_deriv_channels = 3;
1463                         num_dst_deriv_channels = 3;
1464                         break;
1465                 case TGSI_TEXTURE_2D:
1466                 case TGSI_TEXTURE_SHADOW2D:
1467                 case TGSI_TEXTURE_RECT:
1468                 case TGSI_TEXTURE_SHADOWRECT:
1469                 case TGSI_TEXTURE_2D_ARRAY:
1470                 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1471                         num_src_deriv_channels = 2;
1472                         num_dst_deriv_channels = 2;
1473                         break;
1474                 case TGSI_TEXTURE_CUBE:
1475                 case TGSI_TEXTURE_SHADOWCUBE:
1476                 case TGSI_TEXTURE_CUBE_ARRAY:
1477                 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1478                         /* Cube derivatives will be converted to 2D. */
1479                         num_src_deriv_channels = 3;
1480                         num_dst_deriv_channels = 3;
1481                         break;
1482                 case TGSI_TEXTURE_1D:
1483                 case TGSI_TEXTURE_SHADOW1D:
1484                 case TGSI_TEXTURE_1D_ARRAY:
1485                 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1486                         num_src_deriv_channels = 1;
1487
1488                         /* 1D textures are allocated and used as 2D on GFX9. */
1489                         if (ctx->screen->info.chip_class >= GFX9) {
1490                                 num_dst_deriv_channels = 2;
1491                         } else {
1492                                 num_dst_deriv_channels = 1;
1493                         }
1494                         break;
1495                 default:
1496                         unreachable("invalid target");
1497                 }
1498
1499                 for (param = 0; param < 2; param++) {
1500                         for (chan = 0; chan < num_src_deriv_channels; chan++)
1501                                 args.derivs[param * num_dst_deriv_channels + chan] =
1502                                         lp_build_emit_fetch(bld_base, inst, param+1, chan);
1503
1504                         /* Fill in the rest with zeros. */
1505                         for (chan = num_src_deriv_channels;
1506                              chan < num_dst_deriv_channels; chan++)
1507                                 args.derivs[param * num_dst_deriv_channels + chan] =
1508                                         ctx->ac.f32_0;
1509                 }
1510         }
1511
1512         if (target == TGSI_TEXTURE_CUBE ||
1513             target == TGSI_TEXTURE_CUBE_ARRAY ||
1514             target == TGSI_TEXTURE_SHADOWCUBE ||
1515             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1516                 ac_prepare_cube_coords(&ctx->ac,
1517                                        opcode == TGSI_OPCODE_TXD,
1518                                        target == TGSI_TEXTURE_CUBE_ARRAY ||
1519                                        target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
1520                                        opcode == TGSI_OPCODE_LODQ,
1521                                        args.coords, args.derivs);
1522         } else if (tgsi_is_array_sampler(target) &&
1523                    opcode != TGSI_OPCODE_TXF &&
1524                    opcode != TGSI_OPCODE_TXF_LZ &&
1525                    ctx->screen->info.chip_class <= VI) {
1526                 unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
1527                 args.coords[array_coord] =
1528                         ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
1529                                            &args.coords[array_coord], 1, 0);
1530         }
1531
1532         /* 1D textures are allocated and used as 2D on GFX9. */
1533         if (ctx->screen->info.chip_class >= GFX9) {
1534                 LLVMValueRef filler;
1535
1536                 /* Use 0.5, so that we don't sample the border color. */
1537                 if (opcode == TGSI_OPCODE_TXF ||
1538                     opcode == TGSI_OPCODE_TXF_LZ)
1539                         filler = ctx->i32_0;
1540                 else
1541                         filler = LLVMConstReal(ctx->f32, 0.5);
1542
1543                 if (target == TGSI_TEXTURE_1D ||
1544                     target == TGSI_TEXTURE_SHADOW1D) {
1545                         args.coords[1] = filler;
1546                 } else if (target == TGSI_TEXTURE_1D_ARRAY ||
1547                            target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
1548                         args.coords[2] = args.coords[1];
1549                         args.coords[1] = filler;
1550                 }
1551         }
1552
1553         /* Pack LOD or sample index */
1554         if (opcode == TGSI_OPCODE_TXL)
1555                 args.lod = args.coords[3];
1556         else if (opcode == TGSI_OPCODE_TXL2)
1557                 args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
1558         else if (opcode == TGSI_OPCODE_TXF) {
1559                 if (target == TGSI_TEXTURE_2D_MSAA) {
1560                         /* No LOD, but move sample index into the right place. */
1561                         args.coords[2] = args.coords[3];
1562                 } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
1563                         args.lod = args.coords[3];
1564                 }
1565         }
1566
1567         if (target == TGSI_TEXTURE_2D_MSAA ||
1568             target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1569                 ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
1570                                          target == TGSI_TEXTURE_2D_ARRAY_MSAA);
1571         }
1572
1573         if (opcode == TGSI_OPCODE_TXF ||
1574             opcode == TGSI_OPCODE_TXF_LZ) {
1575                 /* add tex offsets */
1576                 if (inst->Texture.NumOffsets) {
1577                         const struct tgsi_texture_offset *off = inst->TexOffsets;
1578
1579                         assert(inst->Texture.NumOffsets == 1);
1580
1581                         switch (target) {
1582                         case TGSI_TEXTURE_3D:
1583                                 args.coords[2] =
1584                                         LLVMBuildAdd(ctx->ac.builder, args.coords[2],
1585                                                 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ], "");
1586                                 /* fall through */
1587                         case TGSI_TEXTURE_2D:
1588                         case TGSI_TEXTURE_SHADOW2D:
1589                         case TGSI_TEXTURE_RECT:
1590                         case TGSI_TEXTURE_SHADOWRECT:
1591                         case TGSI_TEXTURE_2D_ARRAY:
1592                         case TGSI_TEXTURE_SHADOW2D_ARRAY:
1593                                 args.coords[1] =
1594                                         LLVMBuildAdd(ctx->ac.builder, args.coords[1],
1595                                                 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY], "");
1596                                 /* fall through */
1597                         case TGSI_TEXTURE_1D:
1598                         case TGSI_TEXTURE_SHADOW1D:
1599                         case TGSI_TEXTURE_1D_ARRAY:
1600                         case TGSI_TEXTURE_SHADOW1D_ARRAY:
1601                                 args.coords[0] =
1602                                         LLVMBuildAdd(ctx->ac.builder, args.coords[0],
1603                                                 ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX], "");
1604                                 break;
1605                                 /* texture offsets do not apply to other texture targets */
1606                         }
1607                 }
1608         }
1609
1610         if (opcode == TGSI_OPCODE_TG4) {
1611                 unsigned gather_comp = 0;
1612
1613                 /* DMASK was repurposed for GATHER4. 4 components are always
1614                  * returned and DMASK works like a swizzle - it selects
1615                  * the component to fetch. The only valid DMASK values are
1616                  * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
1617                  * (red,red,red,red) etc.) The ISA document doesn't mention
1618                  * this.
1619                  */
1620
1621                 /* Get the component index from src1.x for Gather4. */
1622                 if (!tgsi_is_shadow_target(target)) {
1623                         LLVMValueRef comp_imm;
1624                         struct tgsi_src_register src1 = inst->Src[1].Register;
1625
1626                         assert(src1.File == TGSI_FILE_IMMEDIATE);
1627
1628                         comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
1629                         gather_comp = LLVMConstIntGetZExtValue(comp_imm);
1630                         gather_comp = CLAMP(gather_comp, 0, 3);
1631                 }
1632
1633                 args.dmask = 1 << gather_comp;
1634         } else {
1635                 args.dmask = 0xf;
1636         }
1637
1638         args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
1639         args.unorm = target == TGSI_TEXTURE_RECT ||
1640                      target == TGSI_TEXTURE_SHADOWRECT;
1641         args.opcode = ac_image_sample;
1642
1643         switch (opcode) {
1644         case TGSI_OPCODE_TXF:
1645         case TGSI_OPCODE_TXF_LZ:
1646                 args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
1647                               target == TGSI_TEXTURE_2D_MSAA ||
1648                               target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
1649                                       ac_image_load : ac_image_load_mip;
1650                 break;
1651         case TGSI_OPCODE_LODQ:
1652                 args.opcode = ac_image_get_lod;
1653                 break;
1654         case TGSI_OPCODE_TEX:
1655         case TGSI_OPCODE_TEX2:
1656         case TGSI_OPCODE_TXP:
1657                 if (ctx->type != PIPE_SHADER_FRAGMENT)
1658                         args.level_zero = true;
1659                 break;
1660         case TGSI_OPCODE_TEX_LZ:
1661                 args.level_zero = true;
1662                 break;
1663         case TGSI_OPCODE_TXB:
1664         case TGSI_OPCODE_TXB2:
1665                 assert(ctx->type == PIPE_SHADER_FRAGMENT);
1666                 break;
1667         case TGSI_OPCODE_TXL:
1668         case TGSI_OPCODE_TXL2:
1669                 break;
1670         case TGSI_OPCODE_TXD:
1671                 break;
1672         case TGSI_OPCODE_TG4:
1673                 args.opcode = ac_image_gather4;
1674                 args.level_zero = true;
1675                 break;
1676         default:
1677                 assert(0);
1678                 return;
1679         }
1680
1681         /* The hardware needs special lowering for Gather4 with integer formats. */
1682         LLVMValueRef gather4_int_result_workaround = NULL;
1683
1684         if (ctx->screen->info.chip_class <= VI &&
1685             opcode == TGSI_OPCODE_TG4) {
1686                 assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
1687
1688                 if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
1689                     inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
1690                         gather4_int_result_workaround =
1691                                 si_lower_gather4_integer(ctx, &args, target,
1692                                                          inst->Texture.ReturnType);
1693                 }
1694         }
1695
1696         args.attributes = AC_FUNC_ATTR_READNONE;
1697         LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
1698
1699         if (gather4_int_result_workaround) {
1700                 result = si_fix_gather4_integer_result(ctx, result,
1701                                                        inst->Texture.ReturnType,
1702                                                        gather4_int_result_workaround);
1703         }
1704
1705         emit_data->output[emit_data->chan] = result;
1706 }
1707
1708 static void si_llvm_emit_txqs(
1709         const struct lp_build_tgsi_action *action,
1710         struct lp_build_tgsi_context *bld_base,
1711         struct lp_build_emit_data *emit_data)
1712 {
1713         struct si_shader_context *ctx = si_shader_context(bld_base);
1714         LLVMValueRef res, samples;
1715         LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
1716
1717         tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
1718
1719         /* Read the samples from the descriptor directly. */
1720         res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, "");
1721         samples = LLVMBuildExtractElement(ctx->ac.builder, res,
1722                                           LLVMConstInt(ctx->i32, 3, 0), "");
1723         samples = LLVMBuildLShr(ctx->ac.builder, samples,
1724                                 LLVMConstInt(ctx->i32, 16, 0), "");
1725         samples = LLVMBuildAnd(ctx->ac.builder, samples,
1726                                LLVMConstInt(ctx->i32, 0xf, 0), "");
1727         samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
1728                                samples, "");
1729
1730         emit_data->output[emit_data->chan] = samples;
1731 }
1732
1733 static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
1734                                  struct lp_build_tgsi_context *bld_base,
1735                                  struct lp_build_emit_data *emit_data)
1736 {
1737         struct si_shader_context *ctx = si_shader_context(bld_base);
1738         struct ac_image_args args = {};
1739         LLVMValueRef ptr, image, fmask;
1740
1741         /* Ignore src0, because KHR_blend_func_extended disallows multiple render
1742          * targets.
1743          */
1744
1745         /* Load the image descriptor. */
1746         STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
1747         ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
1748         ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
1749                                    ac_array_in_const32_addr_space(ctx->v8i32), "");
1750         image = ac_build_load_to_sgpr(&ctx->ac, ptr,
1751                         LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
1752
1753         unsigned chan = 0;
1754
1755         args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16);
1756
1757         if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
1758                 args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16);
1759
1760         /* Get the current render target layer index. */
1761         if (ctx->shader->key.mono.u.ps.fbfetch_layered)
1762                 args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11);
1763
1764         if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
1765                 args.coords[chan++] = si_get_sample_id(ctx);
1766
1767         if (ctx->shader->key.mono.u.ps.fbfetch_msaa) {
1768                 fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
1769                         LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
1770
1771                 ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
1772                                          ctx->shader->key.mono.u.ps.fbfetch_layered);
1773         }
1774
1775         args.opcode = ac_image_load;
1776         args.resource = image;
1777         args.dmask = 0xf;
1778         if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
1779                 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
1780                         ac_image_2darraymsaa : ac_image_2dmsaa;
1781         else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
1782                 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
1783                         ac_image_1darray : ac_image_1d;
1784         else
1785                 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
1786                         ac_image_2darray : ac_image_2d;
1787
1788         emit_data->output[emit_data->chan] =
1789                 ac_build_image_opcode(&ctx->ac, &args);
1790 }
1791
1792 /**
1793  * Setup actions for TGSI memory opcode, including texture opcodes.
1794  */
1795 void si_shader_context_init_mem(struct si_shader_context *ctx)
1796 {
1797         struct lp_build_tgsi_context *bld_base;
1798         struct lp_build_tgsi_action tmpl = {};
1799
1800         bld_base = &ctx->bld_base;
1801
1802         bld_base->op_actions[TGSI_OPCODE_TEX].emit = build_tex_intrinsic;
1803         bld_base->op_actions[TGSI_OPCODE_TEX_LZ].emit = build_tex_intrinsic;
1804         bld_base->op_actions[TGSI_OPCODE_TEX2].emit = build_tex_intrinsic;
1805         bld_base->op_actions[TGSI_OPCODE_TXB].emit = build_tex_intrinsic;
1806         bld_base->op_actions[TGSI_OPCODE_TXB2].emit = build_tex_intrinsic;
1807         bld_base->op_actions[TGSI_OPCODE_TXD].emit = build_tex_intrinsic;
1808         bld_base->op_actions[TGSI_OPCODE_TXF].emit = build_tex_intrinsic;
1809         bld_base->op_actions[TGSI_OPCODE_TXF_LZ].emit = build_tex_intrinsic;
1810         bld_base->op_actions[TGSI_OPCODE_TXL].emit = build_tex_intrinsic;
1811         bld_base->op_actions[TGSI_OPCODE_TXL2].emit = build_tex_intrinsic;
1812         bld_base->op_actions[TGSI_OPCODE_TXP].emit = build_tex_intrinsic;
1813         bld_base->op_actions[TGSI_OPCODE_TXQ].emit = resq_emit;
1814         bld_base->op_actions[TGSI_OPCODE_TG4].emit = build_tex_intrinsic;
1815         bld_base->op_actions[TGSI_OPCODE_LODQ].emit = build_tex_intrinsic;
1816         bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
1817
1818         bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch;
1819
1820         bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
1821         bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
1822         bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
1823
1824         tmpl.fetch_args = atomic_fetch_args;
1825         tmpl.emit = atomic_emit;
1826         bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
1827         bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
1828         bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
1829         bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
1830         bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
1831         bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
1832         bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
1833         bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
1834         bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
1835         bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
1836         bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
1837         bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
1838         bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
1839         bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
1840         bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
1841         bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
1842         bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
1843         bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
1844         bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
1845         bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
1846 }