src/gallium/drivers/radeonsi/si_shader.c

   1 /*
   2  * Copyright 2012 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  * Authors:
  24  *      Tom Stellard <thomas.stellard@amd.com>
  25  *      Michel Dänzer <michel.daenzer@amd.com>
  26  *      Christian König <christian.koenig@amd.com>
  27  */
  28
  29 #include "gallivm/lp_bld_const.h"
  30 #include "gallivm/lp_bld_gather.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_logic.h"
  33 #include "gallivm/lp_bld_arit.h"
  34 #include "gallivm/lp_bld_flow.h"
  35 #include "radeon/r600_cs.h"
  36 #include "radeon/radeon_llvm.h"
  37 #include "radeon/radeon_elf_util.h"
  38 #include "radeon/radeon_llvm_emit.h"
  39 #include "util/u_memory.h"
  40 #include "util/u_pstipple.h"
  41 #include "tgsi/tgsi_parse.h"
  42 #include "tgsi/tgsi_util.h"
  43 #include "tgsi/tgsi_dump.h"
  44
  45 #include "si_pipe.h"
  46 #include "si_shader.h"
  47 #include "sid.h"
  48
  49 #include <errno.h>
  50
  51 static const char *scratch_rsrc_dword0_symbol =
  52         "SCRATCH_RSRC_DWORD0";
  53
  54 static const char *scratch_rsrc_dword1_symbol =
  55         "SCRATCH_RSRC_DWORD1";
  56
  57 struct si_shader_output_values
  58 {
  59         LLVMValueRef values[4];
  60         unsigned name;
  61         unsigned sid;
  62 };
  63
  64 struct si_shader_context
  65 {
  66         struct radeon_llvm_context radeon_bld;
  67         struct si_shader *shader;
  68         struct si_screen *screen;
  69         unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
  70         int param_streamout_config;
  71         int param_streamout_write_index;
  72         int param_streamout_offset[4];
  73         int param_vertex_id;
  74         int param_instance_id;
  75         int param_es2gs_offset;
  76         LLVMTargetMachineRef tm;
  77         LLVMValueRef const_md;
  78         LLVMValueRef const_resource[SI_NUM_CONST_BUFFERS];
  79         LLVMValueRef ddxy_lds;
  80         LLVMValueRef *constants[SI_NUM_CONST_BUFFERS];
  81         LLVMValueRef resources[SI_NUM_SAMPLER_VIEWS];
  82         LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
  83         LLVMValueRef so_buffers[4];
  84         LLVMValueRef esgs_ring;
  85         LLVMValueRef gsvs_ring;
  86         LLVMValueRef gs_next_vertex;
  87 };
  88
  89 static struct si_shader_context * si_shader_context(
  90         struct lp_build_tgsi_context * bld_base)
  91 {
  92         return (struct si_shader_context *)bld_base;
  93 }
  94
  95
  96 #define PERSPECTIVE_BASE 0
  97 #define LINEAR_BASE 9
  98
  99 #define SAMPLE_OFFSET 0
 100 #define CENTER_OFFSET 2
 101 #define CENTROID_OFSET 4
 102
 103 #define USE_SGPR_MAX_SUFFIX_LEN 5
 104 #define CONST_ADDR_SPACE 2
 105 #define LOCAL_ADDR_SPACE 3
 106 #define USER_SGPR_ADDR_SPACE 8
 107
 108
 109 #define SENDMSG_GS 2
 110 #define SENDMSG_GS_DONE 3
 111
 112 #define SENDMSG_GS_OP_NOP      (0 << 4)
 113 #define SENDMSG_GS_OP_CUT      (1 << 4)
 114 #define SENDMSG_GS_OP_EMIT     (2 << 4)
 115 #define SENDMSG_GS_OP_EMIT_CUT (3 << 4)
 116
 117 /**
 118  * Returns a unique index for a semantic name and index. The index must be
 119  * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
 120  * calculated.
 121  */
 122 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 123 {
 124         switch (semantic_name) {
 125         case TGSI_SEMANTIC_POSITION:
 126                 return 0;
 127         case TGSI_SEMANTIC_PSIZE:
 128                 return 1;
 129         case TGSI_SEMANTIC_CLIPDIST:
 130                 assert(index <= 1);
 131                 return 2 + index;
 132         case TGSI_SEMANTIC_GENERIC:
 133                 assert(index <= 63-4);
 134                 return 4 + index;
 135
 136         default:
 137                 /* Don't fail here. The result of this function is only used
 138                  * for LS, TCS, TES, and GS, where legacy GL semantics can't
 139                  * occur, but this function is called for all vertex shaders
 140                  * before it's known whether LS will be compiled or not.
 141                  */
 142                 return 0;
 143         }
 144 }
 145
 146 /**
 147  * Given a semantic name and index of a parameter and a mask of used parameters
 148  * (inputs or outputs), return the index of the parameter in the list of all
 149  * used parameters.
 150  *
 151  * For example, assume this list of parameters:
 152  *   POSITION, PSIZE, GENERIC0, GENERIC2
 153  * which has the mask:
 154  *   11000000000101
 155  * Then:
 156  *   querying POSITION returns 0,
 157  *   querying PSIZE returns 1,
 158  *   querying GENERIC0 returns 2,
 159  *   querying GENERIC2 returns 3.
 160  *
 161  * Which can be used as an offset to a parameter buffer in units of vec4s.
 162  */
 163 static int get_param_index(unsigned semantic_name, unsigned index,
 164                            uint64_t mask)
 165 {
 166         unsigned unique_index = si_shader_io_get_unique_index(semantic_name, index);
 167         int i, param_index = 0;
 168
 169         /* If not present... */
 170         if (!((1llu << unique_index) & mask))
 171                 return -1;
 172
 173         for (i = 0; mask; i++) {
 174                 uint64_t bit = 1llu << i;
 175
 176                 if (bit & mask) {
 177                         if (i == unique_index)
 178                                 return param_index;
 179
 180                         mask &= ~bit;
 181                         param_index++;
 182                 }
 183         }
 184
 185         assert(!"unreachable");
 186         return -1;
 187 }
 188
 189 /**
 190  * Get the value of a shader input parameter and extract a bitfield.
 191  */
 192 static LLVMValueRef unpack_param(struct si_shader_context *si_shader_ctx,
 193                                  unsigned param, unsigned rshift,
 194                                  unsigned bitwidth)
 195 {
 196         struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
 197         LLVMValueRef value = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 198                                           param);
 199
 200         if (rshift)
 201                 value = LLVMBuildLShr(gallivm->builder, value,
 202                                       lp_build_const_int32(gallivm, rshift), "");
 203
 204         if (rshift + bitwidth < 32) {
 205                 unsigned mask = (1 << bitwidth) - 1;
 206                 value = LLVMBuildAnd(gallivm->builder, value,
 207                                      lp_build_const_int32(gallivm, mask), "");
 208         }
 209
 210         return value;
 211 }
 212
 213 /**
 214  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
 215  * It's equivalent to doing a load from &base_ptr[index].
 216  *
 217  * \param base_ptr  Where the array starts.
 218  * \param index     The element index into the array.
 219  */
 220 static LLVMValueRef build_indexed_load(struct si_shader_context *si_shader_ctx,
 221                                        LLVMValueRef base_ptr, LLVMValueRef index)
 222 {
 223         struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
 224         struct gallivm_state *gallivm = bld_base->base.gallivm;
 225         LLVMValueRef indices[2], pointer;
 226
 227         indices[0] = bld_base->uint_bld.zero;
 228         indices[1] = index;
 229
 230         pointer = LLVMBuildGEP(gallivm->builder, base_ptr, indices, 2, "");
 231         return LLVMBuildLoad(gallivm->builder, pointer, "");
 232 }
 233
 234 /**
 235  * Do a load from &base_ptr[index], but also add a flag that it's loading
 236  * a constant.
 237  */
 238 static LLVMValueRef build_indexed_load_const(
 239         struct si_shader_context * si_shader_ctx,
 240         LLVMValueRef base_ptr, LLVMValueRef index)
 241 {
 242         LLVMValueRef result = build_indexed_load(si_shader_ctx, base_ptr, index);
 243         LLVMSetMetadata(result, 1, si_shader_ctx->const_md);
 244         return result;
 245 }
 246
 247 static LLVMValueRef get_instance_index_for_fetch(
 248         struct radeon_llvm_context * radeon_bld,
 249         unsigned divisor)
 250 {
 251         struct si_shader_context *si_shader_ctx =
 252                 si_shader_context(&radeon_bld->soa.bld_base);
 253         struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm;
 254
 255         LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn,
 256                                            si_shader_ctx->param_instance_id);
 257
 258         /* The division must be done before START_INSTANCE is added. */
 259         if (divisor > 1)
 260                 result = LLVMBuildUDiv(gallivm->builder, result,
 261                                 lp_build_const_int32(gallivm, divisor), "");
 262
 263         return LLVMBuildAdd(gallivm->builder, result, LLVMGetParam(
 264                         radeon_bld->main_fn, SI_PARAM_START_INSTANCE), "");
 265 }
 266
 267 static void declare_input_vs(
 268         struct radeon_llvm_context *radeon_bld,
 269         unsigned input_index,
 270         const struct tgsi_full_declaration *decl)
 271 {
 272         struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
 273         struct gallivm_state *gallivm = base->gallivm;
 274         struct si_shader_context *si_shader_ctx =
 275                 si_shader_context(&radeon_bld->soa.bld_base);
 276         unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index];
 277
 278         unsigned chan;
 279
 280         LLVMValueRef t_list_ptr;
 281         LLVMValueRef t_offset;
 282         LLVMValueRef t_list;
 283         LLVMValueRef attribute_offset;
 284         LLVMValueRef buffer_index;
 285         LLVMValueRef args[3];
 286         LLVMTypeRef vec4_type;
 287         LLVMValueRef input;
 288
 289         /* Load the T list */
 290         t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
 291
 292         t_offset = lp_build_const_int32(gallivm, input_index);
 293
 294         t_list = build_indexed_load_const(si_shader_ctx, t_list_ptr, t_offset);
 295
 296         /* Build the attribute offset */
 297         attribute_offset = lp_build_const_int32(gallivm, 0);
 298
 299         if (divisor) {
 300                 /* Build index from instance ID, start instance and divisor */
 301                 si_shader_ctx->shader->uses_instanceid = true;
 302                 buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor);
 303         } else {
 304                 /* Load the buffer index for vertices. */
 305                 LLVMValueRef vertex_id = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 306                                                       si_shader_ctx->param_vertex_id);
 307                 LLVMValueRef base_vertex = LLVMGetParam(radeon_bld->main_fn,
 308                                                         SI_PARAM_BASE_VERTEX);
 309                 buffer_index = LLVMBuildAdd(gallivm->builder, base_vertex, vertex_id, "");
 310         }
 311
 312         vec4_type = LLVMVectorType(base->elem_type, 4);
 313         args[0] = t_list;
 314         args[1] = attribute_offset;
 315         args[2] = buffer_index;
 316         input = build_intrinsic(gallivm->builder,
 317                 "llvm.SI.vs.load.input", vec4_type, args, 3,
 318                 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 319
 320         /* Break up the vec4 into individual components */
 321         for (chan = 0; chan < 4; chan++) {
 322                 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 323                 /* XXX: Use a helper function for this.  There is one in
 324                  * tgsi_llvm.c. */
 325                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
 326                                 LLVMBuildExtractElement(gallivm->builder,
 327                                 input, llvm_chan, "");
 328         }
 329 }
 330
 331 static LLVMValueRef fetch_input_gs(
 332         struct lp_build_tgsi_context *bld_base,
 333         const struct tgsi_full_src_register *reg,
 334         enum tgsi_opcode_type type,
 335         unsigned swizzle)
 336 {
 337         struct lp_build_context *base = &bld_base->base;
 338         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 339         struct si_shader *shader = si_shader_ctx->shader;
 340         struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 341         struct gallivm_state *gallivm = base->gallivm;
 342         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
 343         LLVMValueRef vtx_offset;
 344         LLVMValueRef args[9];
 345         unsigned vtx_offset_param;
 346         struct tgsi_shader_info *info = &shader->selector->info;
 347         unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
 348         unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
 349
 350         if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID) {
 351                 if (swizzle == 0)
 352                         return LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 353                                             SI_PARAM_PRIMITIVE_ID);
 354                 else
 355                         return uint->zero;
 356         }
 357
 358         if (!reg->Register.Dimension)
 359                 return NULL;
 360
 361         if (swizzle == ~0) {
 362                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 363                 unsigned chan;
 364                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 365                         values[chan] = fetch_input_gs(bld_base, reg, type, chan);
 366                 }
 367                 return lp_build_gather_values(bld_base->base.gallivm, values,
 368                                               TGSI_NUM_CHANNELS);
 369         }
 370
 371         /* Get the vertex offset parameter */
 372         vtx_offset_param = reg->Dimension.Index;
 373         if (vtx_offset_param < 2) {
 374                 vtx_offset_param += SI_PARAM_VTX0_OFFSET;
 375         } else {
 376                 assert(vtx_offset_param < 6);
 377                 vtx_offset_param += SI_PARAM_VTX2_OFFSET - 2;
 378         }
 379         vtx_offset = lp_build_mul_imm(uint,
 380                                       LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 381                                                    vtx_offset_param),
 382                                       4);
 383
 384         args[0] = si_shader_ctx->esgs_ring;
 385         args[1] = vtx_offset;
 386         args[2] = lp_build_const_int32(gallivm,
 387                                        (get_param_index(semantic_name, semantic_index,
 388                                                         shader->selector->gs_used_inputs) * 4 +
 389                                         swizzle) * 256);
 390         args[3] = uint->zero;
 391         args[4] = uint->one;  /* OFFEN */
 392         args[5] = uint->zero; /* IDXEN */
 393         args[6] = uint->one;  /* GLC */
 394         args[7] = uint->zero; /* SLC */
 395         args[8] = uint->zero; /* TFE */
 396
 397         return LLVMBuildBitCast(gallivm->builder,
 398                                 build_intrinsic(gallivm->builder,
 399                                                 "llvm.SI.buffer.load.dword.i32.i32",
 400                                                 i32, args, 9,
 401                                                 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
 402                                 tgsi2llvmtype(bld_base, type), "");
 403 }
 404
 405 static void declare_input_fs(
 406         struct radeon_llvm_context *radeon_bld,
 407         unsigned input_index,
 408         const struct tgsi_full_declaration *decl)
 409 {
 410         struct lp_build_context *base = &radeon_bld->soa.bld_base.base;
 411         struct si_shader_context *si_shader_ctx =
 412                 si_shader_context(&radeon_bld->soa.bld_base);
 413         struct si_shader *shader = si_shader_ctx->shader;
 414         struct lp_build_context *uint = &radeon_bld->soa.bld_base.uint_bld;
 415         struct gallivm_state *gallivm = base->gallivm;
 416         LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
 417         LLVMValueRef main_fn = radeon_bld->main_fn;
 418
 419         LLVMValueRef interp_param;
 420         const char * intr_name;
 421
 422         /* This value is:
 423          * [15:0] NewPrimMask (Bit mask for each quad.  It is set it the
 424          *                     quad begins a new primitive.  Bit 0 always needs
 425          *                     to be unset)
 426          * [32:16] ParamOffset
 427          *
 428          */
 429         LLVMValueRef params = LLVMGetParam(main_fn, SI_PARAM_PRIM_MASK);
 430         LLVMValueRef attr_number;
 431
 432         unsigned chan;
 433
 434         if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
 435                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 436                         unsigned soa_index =
 437                                 radeon_llvm_reg_index_soa(input_index, chan);
 438                         radeon_bld->inputs[soa_index] =
 439                                 LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
 440
 441                         if (chan == 3)
 442                                 /* RCP for fragcoord.w */
 443                                 radeon_bld->inputs[soa_index] =
 444                                         LLVMBuildFDiv(gallivm->builder,
 445                                                       lp_build_const_float(gallivm, 1.0f),
 446                                                       radeon_bld->inputs[soa_index],
 447                                                       "");
 448                 }
 449                 return;
 450         }
 451
 452         if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
 453                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
 454                         LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
 455                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
 456                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
 457                         lp_build_const_float(gallivm, 0.0f);
 458                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
 459                         lp_build_const_float(gallivm, 1.0f);
 460
 461                 return;
 462         }
 463
 464         shader->ps_input_param_offset[input_index] = shader->nparam++;
 465         attr_number = lp_build_const_int32(gallivm,
 466                                            shader->ps_input_param_offset[input_index]);
 467
 468         switch (decl->Interp.Interpolate) {
 469         case TGSI_INTERPOLATE_CONSTANT:
 470                 interp_param = 0;
 471                 break;
 472         case TGSI_INTERPOLATE_LINEAR:
 473                 if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
 474                         interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_SAMPLE);
 475                 else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
 476                         interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
 477                 else
 478                         interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
 479                 break;
 480         case TGSI_INTERPOLATE_COLOR:
 481         case TGSI_INTERPOLATE_PERSPECTIVE:
 482                 if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
 483                         interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
 484                 else if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID)
 485                         interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
 486                 else
 487                         interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
 488                 break;
 489         default:
 490                 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
 491                 return;
 492         }
 493
 494         /* fs.constant returns the param from the middle vertex, so it's not
 495          * really useful for flat shading. It's meant to be used for custom
 496          * interpolation (but the intrinsic can't fetch from the other two
 497          * vertices).
 498          *
 499          * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
 500          * to do the right thing. The only reason we use fs.constant is that
 501          * fs.interp cannot be used on integers, because they can be equal
 502          * to NaN.
 503          */
 504         intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
 505
 506         if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
 507             si_shader_ctx->shader->key.ps.color_two_side) {
 508                 LLVMValueRef args[4];
 509                 LLVMValueRef face, is_face_positive;
 510                 LLVMValueRef back_attr_number =
 511                         lp_build_const_int32(gallivm,
 512                                              shader->ps_input_param_offset[input_index] + 1);
 513
 514                 face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
 515
 516                 is_face_positive = LLVMBuildFCmp(gallivm->builder,
 517                                                  LLVMRealOGT, face,
 518                                                  lp_build_const_float(gallivm, 0.0f),
 519                                                  "");
 520
 521                 args[2] = params;
 522                 args[3] = interp_param;
 523                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 524                         LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 525                         unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
 526                         LLVMValueRef front, back;
 527
 528                         args[0] = llvm_chan;
 529                         args[1] = attr_number;
 530                         front = build_intrinsic(gallivm->builder, intr_name,
 531                                                 input_type, args, args[3] ? 4 : 3,
 532                                                 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 533
 534                         args[1] = back_attr_number;
 535                         back = build_intrinsic(gallivm->builder, intr_name,
 536                                                input_type, args, args[3] ? 4 : 3,
 537                                                LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 538
 539                         radeon_bld->inputs[soa_index] =
 540                                 LLVMBuildSelect(gallivm->builder,
 541                                                 is_face_positive,
 542                                                 front,
 543                                                 back,
 544                                                 "");
 545                 }
 546
 547                 shader->nparam++;
 548         } else if (decl->Semantic.Name == TGSI_SEMANTIC_FOG) {
 549                 LLVMValueRef args[4];
 550
 551                 args[0] = uint->zero;
 552                 args[1] = attr_number;
 553                 args[2] = params;
 554                 args[3] = interp_param;
 555                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
 556                         build_intrinsic(gallivm->builder, intr_name,
 557                                         input_type, args, args[3] ? 4 : 3,
 558                                         LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 559                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
 560                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
 561                         lp_build_const_float(gallivm, 0.0f);
 562                 radeon_bld->inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
 563                         lp_build_const_float(gallivm, 1.0f);
 564         } else {
 565                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 566                         LLVMValueRef args[4];
 567                         LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 568                         unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
 569                         args[0] = llvm_chan;
 570                         args[1] = attr_number;
 571                         args[2] = params;
 572                         args[3] = interp_param;
 573                         radeon_bld->inputs[soa_index] =
 574                                 build_intrinsic(gallivm->builder, intr_name,
 575                                                 input_type, args, args[3] ? 4 : 3,
 576                                                 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 577                 }
 578         }
 579 }
 580
 581 static LLVMValueRef get_sample_id(struct radeon_llvm_context *radeon_bld)
 582 {
 583         return unpack_param(si_shader_context(&radeon_bld->soa.bld_base),
 584                             SI_PARAM_ANCILLARY, 8, 4);
 585 }
 586
 587 /**
 588  * Load a dword from a constant buffer.
 589  */
 590 static LLVMValueRef buffer_load_const(LLVMBuilderRef builder, LLVMValueRef resource,
 591                                       LLVMValueRef offset, LLVMTypeRef return_type)
 592 {
 593         LLVMValueRef args[2] = {resource, offset};
 594
 595         return build_intrinsic(builder, "llvm.SI.load.const", return_type, args, 2,
 596                                LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 597 }
 598
 599 static void declare_system_value(
 600         struct radeon_llvm_context * radeon_bld,
 601         unsigned index,
 602         const struct tgsi_full_declaration *decl)
 603 {
 604         struct si_shader_context *si_shader_ctx =
 605                 si_shader_context(&radeon_bld->soa.bld_base);
 606         struct lp_build_context *uint_bld = &radeon_bld->soa.bld_base.uint_bld;
 607         struct gallivm_state *gallivm = &radeon_bld->gallivm;
 608         LLVMValueRef value = 0;
 609
 610         switch (decl->Semantic.Name) {
 611         case TGSI_SEMANTIC_INSTANCEID:
 612                 value = LLVMGetParam(radeon_bld->main_fn,
 613                                      si_shader_ctx->param_instance_id);
 614                 break;
 615
 616         case TGSI_SEMANTIC_VERTEXID:
 617                 value = LLVMBuildAdd(gallivm->builder,
 618                                      LLVMGetParam(radeon_bld->main_fn,
 619                                                   si_shader_ctx->param_vertex_id),
 620                                      LLVMGetParam(radeon_bld->main_fn,
 621                                                   SI_PARAM_BASE_VERTEX), "");
 622                 break;
 623
 624         case TGSI_SEMANTIC_VERTEXID_NOBASE:
 625                 value = LLVMGetParam(radeon_bld->main_fn,
 626                                      si_shader_ctx->param_vertex_id);
 627                 break;
 628
 629         case TGSI_SEMANTIC_BASEVERTEX:
 630                 value = LLVMGetParam(radeon_bld->main_fn,
 631                                      SI_PARAM_BASE_VERTEX);
 632                 break;
 633
 634         case TGSI_SEMANTIC_INVOCATIONID:
 635                 value = LLVMGetParam(radeon_bld->main_fn,
 636                                      SI_PARAM_GS_INSTANCE_ID);
 637                 break;
 638
 639         case TGSI_SEMANTIC_SAMPLEID:
 640                 value = get_sample_id(radeon_bld);
 641                 break;
 642
 643         case TGSI_SEMANTIC_SAMPLEPOS:
 644         {
 645                 LLVMBuilderRef builder = gallivm->builder;
 646                 LLVMValueRef desc = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
 647                 LLVMValueRef buf_index = lp_build_const_int32(gallivm, SI_DRIVER_STATE_CONST_BUF);
 648                 LLVMValueRef resource = build_indexed_load_const(si_shader_ctx, desc, buf_index);
 649
 650                 /* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
 651                 LLVMValueRef offset0 = lp_build_mul_imm(uint_bld, get_sample_id(radeon_bld), 8);
 652                 LLVMValueRef offset1 = LLVMBuildAdd(builder, offset0, lp_build_const_int32(gallivm, 4), "");
 653
 654                 LLVMValueRef pos[4] = {
 655                         buffer_load_const(builder, resource, offset0, radeon_bld->soa.bld_base.base.elem_type),
 656                         buffer_load_const(builder, resource, offset1, radeon_bld->soa.bld_base.base.elem_type),
 657                         lp_build_const_float(gallivm, 0),
 658                         lp_build_const_float(gallivm, 0)
 659                 };
 660                 value = lp_build_gather_values(gallivm, pos, 4);
 661                 break;
 662         }
 663
 664         case TGSI_SEMANTIC_SAMPLEMASK:
 665                 /* Smoothing isn't MSAA in GL, but it's MSAA in hardware.
 666                  * Therefore, force gl_SampleMaskIn to 1 for GL. */
 667                 if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
 668                         value = uint_bld->one;
 669                 else
 670                         value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_SAMPLE_COVERAGE);
 671                 break;
 672
 673         default:
 674                 assert(!"unknown system value");
 675                 return;
 676         }
 677
 678         radeon_bld->system_values[index] = value;
 679 }
 680
 681 static LLVMValueRef fetch_constant(
 682         struct lp_build_tgsi_context * bld_base,
 683         const struct tgsi_full_src_register *reg,
 684         enum tgsi_opcode_type type,
 685         unsigned swizzle)
 686 {
 687         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 688         struct lp_build_context * base = &bld_base->base;
 689         const struct tgsi_ind_register *ireg = &reg->Indirect;
 690         unsigned buf, idx;
 691
 692         LLVMValueRef addr;
 693         LLVMValueRef result;
 694
 695         if (swizzle == LP_CHAN_ALL) {
 696                 unsigned chan;
 697                 LLVMValueRef values[4];
 698                 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
 699                         values[chan] = fetch_constant(bld_base, reg, type, chan);
 700
 701                 return lp_build_gather_values(bld_base->base.gallivm, values, 4);
 702         }
 703
 704         buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
 705         idx = reg->Register.Index * 4 + swizzle;
 706
 707         if (!reg->Register.Indirect) {
 708                 if (type != TGSI_TYPE_DOUBLE)
 709                         return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
 710                 else {
 711                         return radeon_llvm_emit_fetch_double(bld_base,
 712                                                              si_shader_ctx->constants[buf][idx],
 713                                                              si_shader_ctx->constants[buf][idx + 1]);
 714                 }
 715         }
 716
 717         addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
 718         addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
 719         addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16);
 720         addr = lp_build_add(&bld_base->uint_bld, addr,
 721                             lp_build_const_int32(base->gallivm, idx * 4));
 722
 723         result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
 724                                    addr, bld_base->base.elem_type);
 725
 726         if (type != TGSI_TYPE_DOUBLE)
 727                 result = bitcast(bld_base, type, result);
 728         else {
 729                 LLVMValueRef addr2, result2;
 730                 addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
 731                 addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
 732                 addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
 733                 addr2 = lp_build_add(&bld_base->uint_bld, addr2,
 734                                      lp_build_const_int32(base->gallivm, idx * 4));
 735
 736                 result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
 737                                    addr2, bld_base->base.elem_type);
 738
 739                 result = radeon_llvm_emit_fetch_double(bld_base,
 740                                                        result, result2);
 741         }
 742         return result;
 743 }
 744
 745 /* Initialize arguments for the shader export intrinsic */
 746 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 747                                      LLVMValueRef *values,
 748                                      unsigned target,
 749                                      LLVMValueRef *args)
 750 {
 751         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 752         struct lp_build_context *uint =
 753                                 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 754         struct lp_build_context *base = &bld_base->base;
 755         unsigned compressed = 0;
 756         unsigned chan;
 757
 758         if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
 759                 int cbuf = target - V_008DFC_SQ_EXP_MRT;
 760
 761                 if (cbuf >= 0 && cbuf < 8) {
 762                         compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1;
 763
 764                         if (compressed)
 765                                 si_shader_ctx->shader->spi_shader_col_format |=
 766                                         V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
 767                         else
 768                                 si_shader_ctx->shader->spi_shader_col_format |=
 769                                         V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
 770
 771                         si_shader_ctx->shader->cb_shader_mask |= 0xf << (4 * cbuf);
 772                 }
 773         }
 774
 775         if (compressed) {
 776                 /* Pixel shader needs to pack output values before export */
 777                 for (chan = 0; chan < 2; chan++ ) {
 778                         args[0] = values[2 * chan];
 779                         args[1] = values[2 * chan + 1];
 780                         args[chan + 5] =
 781                                 build_intrinsic(base->gallivm->builder,
 782                                                 "llvm.SI.packf16",
 783                                                 LLVMInt32TypeInContext(base->gallivm->context),
 784                                                 args, 2,
 785                                                 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 786                         args[chan + 7] = args[chan + 5] =
 787                                 LLVMBuildBitCast(base->gallivm->builder,
 788                                                  args[chan + 5],
 789                                                  LLVMFloatTypeInContext(base->gallivm->context),
 790                                                  "");
 791                 }
 792
 793                 /* Set COMPR flag */
 794                 args[4] = uint->one;
 795         } else {
 796                 for (chan = 0; chan < 4; chan++ )
 797                         /* +5 because the first output value will be
 798                          * the 6th argument to the intrinsic. */
 799                         args[chan + 5] = values[chan];
 800
 801                 /* Clear COMPR flag */
 802                 args[4] = uint->zero;
 803         }
 804
 805         /* XXX: This controls which components of the output
 806          * registers actually get exported. (e.g bit 0 means export
 807          * X component, bit 1 means export Y component, etc.)  I'm
 808          * hard coding this to 0xf for now.  In the future, we might
 809          * want to do something else. */
 810         args[0] = lp_build_const_int32(base->gallivm, 0xf);
 811
 812         /* Specify whether the EXEC mask represents the valid mask */
 813         args[1] = uint->zero;
 814
 815         /* Specify whether this is the last export */
 816         args[2] = uint->zero;
 817
 818         /* Specify the target we are exporting */
 819         args[3] = lp_build_const_int32(base->gallivm, target);
 820
 821         /* XXX: We probably need to keep track of the output
 822          * values, so we know what we are passing to the next
 823          * stage. */
 824 }
 825
 826 /* Load from output pointers and initialize arguments for the shader export intrinsic */
 827 static void si_llvm_init_export_args_load(struct lp_build_tgsi_context *bld_base,
 828                                           LLVMValueRef *out_ptr,
 829                                           unsigned target,
 830                                           LLVMValueRef *args)
 831 {
 832         struct gallivm_state *gallivm = bld_base->base.gallivm;
 833         LLVMValueRef values[4];
 834         int i;
 835
 836         for (i = 0; i < 4; i++)
 837                 values[i] = LLVMBuildLoad(gallivm->builder, out_ptr[i], "");
 838
 839         si_llvm_init_export_args(bld_base, values, target, args);
 840 }
 841
 842 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 843                           LLVMValueRef alpha_ptr)
 844 {
 845         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 846         struct gallivm_state *gallivm = bld_base->base.gallivm;
 847
 848         if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) {
 849                 LLVMValueRef alpha_ref = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 850                                 SI_PARAM_ALPHA_REF);
 851
 852                 LLVMValueRef alpha_pass =
 853                         lp_build_cmp(&bld_base->base,
 854                                      si_shader_ctx->shader->key.ps.alpha_func,
 855                                      LLVMBuildLoad(gallivm->builder, alpha_ptr, ""),
 856                                      alpha_ref);
 857                 LLVMValueRef arg =
 858                         lp_build_select(&bld_base->base,
 859                                         alpha_pass,
 860                                         lp_build_const_float(gallivm, 1.0f),
 861                                         lp_build_const_float(gallivm, -1.0f));
 862
 863                 build_intrinsic(gallivm->builder,
 864                                 "llvm.AMDGPU.kill",
 865                                 LLVMVoidTypeInContext(gallivm->context),
 866                                 &arg, 1, 0);
 867         } else {
 868                 build_intrinsic(gallivm->builder,
 869                                 "llvm.AMDGPU.kilp",
 870                                 LLVMVoidTypeInContext(gallivm->context),
 871                                 NULL, 0, 0);
 872         }
 873
 874         si_shader_ctx->shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
 875 }
 876
 877 static void si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *bld_base,
 878                                           LLVMValueRef alpha_ptr)
 879 {
 880         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 881         struct gallivm_state *gallivm = bld_base->base.gallivm;
 882         LLVMValueRef coverage, alpha;
 883
 884         /* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
 885         coverage = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
 886                                 SI_PARAM_SAMPLE_COVERAGE);
 887         coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
 888
 889         coverage = build_intrinsic(gallivm->builder, "llvm.ctpop.i32",
 890                                    bld_base->int_bld.elem_type,
 891                                    &coverage, 1, LLVMReadNoneAttribute);
 892
 893         coverage = LLVMBuildUIToFP(gallivm->builder, coverage,
 894                                    bld_base->base.elem_type, "");
 895
 896         coverage = LLVMBuildFMul(gallivm->builder, coverage,
 897                                  lp_build_const_float(gallivm,
 898                                         1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
 899
 900         alpha = LLVMBuildLoad(gallivm->builder, alpha_ptr, "");
 901         alpha = LLVMBuildFMul(gallivm->builder, alpha, coverage, "");
 902         LLVMBuildStore(gallivm->builder, alpha, alpha_ptr);
 903 }
 904
 905 static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base,
 906                                     LLVMValueRef (*pos)[9], LLVMValueRef *out_elts)
 907 {
 908         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 909         struct lp_build_context *base = &bld_base->base;
 910         struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 911         unsigned reg_index;
 912         unsigned chan;
 913         unsigned const_chan;
 914         LLVMValueRef base_elt;
 915         LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
 916         LLVMValueRef constbuf_index = lp_build_const_int32(base->gallivm, SI_DRIVER_STATE_CONST_BUF);
 917         LLVMValueRef const_resource = build_indexed_load_const(si_shader_ctx, ptr, constbuf_index);
 918
 919         for (reg_index = 0; reg_index < 2; reg_index ++) {
 920                 LLVMValueRef *args = pos[2 + reg_index];
 921
 922                 args[5] =
 923                 args[6] =
 924                 args[7] =
 925                 args[8] = lp_build_const_float(base->gallivm, 0.0f);
 926
 927                 /* Compute dot products of position and user clip plane vectors */
 928                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 929                         for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) {
 930                                 args[1] = lp_build_const_int32(base->gallivm,
 931                                                                ((reg_index * 4 + chan) * 4 +
 932                                                                 const_chan) * 4);
 933                                 base_elt = buffer_load_const(base->gallivm->builder, const_resource,
 934                                                       args[1], base->elem_type);
 935                                 args[5 + chan] =
 936                                         lp_build_add(base, args[5 + chan],
 937                                                      lp_build_mul(base, base_elt,
 938                                                                   out_elts[const_chan]));
 939                         }
 940                 }
 941
 942                 args[0] = lp_build_const_int32(base->gallivm, 0xf);
 943                 args[1] = uint->zero;
 944                 args[2] = uint->zero;
 945                 args[3] = lp_build_const_int32(base->gallivm,
 946                                                V_008DFC_SQ_EXP_POS + 2 + reg_index);
 947                 args[4] = uint->zero;
 948         }
 949 }
 950
 951 static void si_dump_streamout(struct pipe_stream_output_info *so)
 952 {
 953         unsigned i;
 954
 955         if (so->num_outputs)
 956                 fprintf(stderr, "STREAMOUT\n");
 957
 958         for (i = 0; i < so->num_outputs; i++) {
 959                 unsigned mask = ((1 << so->output[i].num_components) - 1) <<
 960                                 so->output[i].start_component;
 961                 fprintf(stderr, "  %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n",
 962                         i, so->output[i].output_buffer,
 963                         so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1,
 964                         so->output[i].register_index,
 965                         mask & 1 ? "x" : "",
 966                         mask & 2 ? "y" : "",
 967                         mask & 4 ? "z" : "",
 968                         mask & 8 ? "w" : "");
 969         }
 970 }
 971
 972 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
 973  * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
 974  * or v4i32 (num_channels=3,4). */
 975 static void build_tbuffer_store(struct si_shader_context *shader,
 976                                 LLVMValueRef rsrc,
 977                                 LLVMValueRef vdata,
 978                                 unsigned num_channels,
 979                                 LLVMValueRef vaddr,
 980                                 LLVMValueRef soffset,
 981                                 unsigned inst_offset,
 982                                 unsigned dfmt,
 983                                 unsigned nfmt,
 984                                 unsigned offen,
 985                                 unsigned idxen,
 986                                 unsigned glc,
 987                                 unsigned slc,
 988                                 unsigned tfe)
 989 {
 990         struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
 991         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
 992         LLVMValueRef args[] = {
 993                 rsrc,
 994                 vdata,
 995                 LLVMConstInt(i32, num_channels, 0),
 996                 vaddr,
 997                 soffset,
 998                 LLVMConstInt(i32, inst_offset, 0),
 999                 LLVMConstInt(i32, dfmt, 0),
1000                 LLVMConstInt(i32, nfmt, 0),
1001                 LLVMConstInt(i32, offen, 0),
1002                 LLVMConstInt(i32, idxen, 0),
1003                 LLVMConstInt(i32, glc, 0),
1004                 LLVMConstInt(i32, slc, 0),
1005                 LLVMConstInt(i32, tfe, 0)
1006         };
1007
1008         /* The instruction offset field has 12 bits */
1009         assert(offen || inst_offset < (1 << 12));
1010
1011         /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
1012         unsigned func = CLAMP(num_channels, 1, 3) - 1;
1013         const char *types[] = {"i32", "v2i32", "v4i32"};
1014         char name[256];
1015         snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
1016
1017         lp_build_intrinsic(gallivm->builder, name,
1018                            LLVMVoidTypeInContext(gallivm->context),
1019                            args, Elements(args));
1020 }
1021
1022 static void build_streamout_store(struct si_shader_context *shader,
1023                                   LLVMValueRef rsrc,
1024                                   LLVMValueRef vdata,
1025                                   unsigned num_channels,
1026                                   LLVMValueRef vaddr,
1027                                   LLVMValueRef soffset,
1028                                   unsigned inst_offset)
1029 {
1030         static unsigned dfmt[] = {
1031                 V_008F0C_BUF_DATA_FORMAT_32,
1032                 V_008F0C_BUF_DATA_FORMAT_32_32,
1033                 V_008F0C_BUF_DATA_FORMAT_32_32_32,
1034                 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
1035         };
1036         assert(num_channels >= 1 && num_channels <= 4);
1037
1038         build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset,
1039                             inst_offset, dfmt[num_channels-1],
1040                             V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
1041 }
1042
1043 /* On SI, the vertex shader is responsible for writing streamout data
1044  * to buffers. */
1045 static void si_llvm_emit_streamout(struct si_shader_context *shader,
1046                                    struct si_shader_output_values *outputs,
1047                                    unsigned noutput)
1048 {
1049         struct pipe_stream_output_info *so = &shader->shader->selector->so;
1050         struct gallivm_state *gallivm = &shader->radeon_bld.gallivm;
1051         LLVMBuilderRef builder = gallivm->builder;
1052         int i, j;
1053         struct lp_build_if_state if_ctx;
1054
1055         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1056
1057         /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
1058         LLVMValueRef so_vtx_count =
1059                 unpack_param(shader, shader->param_streamout_config, 16, 7);
1060
1061         LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32,
1062                                            NULL, 0, LLVMReadNoneAttribute);
1063
1064         /* can_emit = tid < so_vtx_count; */
1065         LLVMValueRef can_emit =
1066                 LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
1067
1068         /* Emit the streamout code conditionally. This actually avoids
1069          * out-of-bounds buffer access. The hw tells us via the SGPR
1070          * (so_vtx_count) which threads are allowed to emit streamout data. */
1071         lp_build_if(&if_ctx, gallivm, can_emit);
1072         {
1073                 /* The buffer offset is computed as follows:
1074                  *   ByteOffset = streamout_offset[buffer_id]*4 +
1075                  *                (streamout_write_index + thread_id)*stride[buffer_id] +
1076                  *                attrib_offset
1077                  */
1078
1079                 LLVMValueRef so_write_index =
1080                         LLVMGetParam(shader->radeon_bld.main_fn,
1081                                      shader->param_streamout_write_index);
1082
1083                 /* Compute (streamout_write_index + thread_id). */
1084                 so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
1085
1086                 /* Compute the write offset for each enabled buffer. */
1087                 LLVMValueRef so_write_offset[4] = {};
1088                 for (i = 0; i < 4; i++) {
1089                         if (!so->stride[i])
1090                                 continue;
1091
1092                         LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn,
1093                                                               shader->param_streamout_offset[i]);
1094                         so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), "");
1095
1096                         so_write_offset[i] = LLVMBuildMul(builder, so_write_index,
1097                                                           LLVMConstInt(i32, so->stride[i]*4, 0), "");
1098                         so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, "");
1099                 }
1100
1101                 /* Write streamout data. */
1102                 for (i = 0; i < so->num_outputs; i++) {
1103                         unsigned buf_idx = so->output[i].output_buffer;
1104                         unsigned reg = so->output[i].register_index;
1105                         unsigned start = so->output[i].start_component;
1106                         unsigned num_comps = so->output[i].num_components;
1107                         LLVMValueRef out[4];
1108
1109                         assert(num_comps && num_comps <= 4);
1110                         if (!num_comps || num_comps > 4)
1111                                 continue;
1112
1113                         if (reg >= noutput)
1114                                 continue;
1115
1116                         /* Load the output as int. */
1117                         for (j = 0; j < num_comps; j++) {
1118                                 out[j] = LLVMBuildBitCast(builder,
1119                                                           outputs[reg].values[start+j],
1120                                                 i32, "");
1121                         }
1122
1123                         /* Pack the output. */
1124                         LLVMValueRef vdata = NULL;
1125
1126                         switch (num_comps) {
1127                         case 1: /* as i32 */
1128                                 vdata = out[0];
1129                                 break;
1130                         case 2: /* as v2i32 */
1131                         case 3: /* as v4i32 (aligned to 4) */
1132                         case 4: /* as v4i32 */
1133                                 vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps)));
1134                                 for (j = 0; j < num_comps; j++) {
1135                                         vdata = LLVMBuildInsertElement(builder, vdata, out[j],
1136                                                                        LLVMConstInt(i32, j, 0), "");
1137                                 }
1138                                 break;
1139                         }
1140
1141                         build_streamout_store(shader, shader->so_buffers[buf_idx],
1142                                               vdata, num_comps,
1143                                               so_write_offset[buf_idx],
1144                                               LLVMConstInt(i32, 0, 0),
1145                                               so->output[i].dst_offset*4);
1146                 }
1147         }
1148         lp_build_endif(&if_ctx);
1149 }
1150
1151
1152 /* Generate export instructions for hardware VS shader stage */
1153 static void si_llvm_export_vs(struct lp_build_tgsi_context *bld_base,
1154                               struct si_shader_output_values *outputs,
1155                               unsigned noutput)
1156 {
1157         struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
1158         struct si_shader * shader = si_shader_ctx->shader;
1159         struct lp_build_context * base = &bld_base->base;
1160         struct lp_build_context * uint =
1161                                 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
1162         LLVMValueRef args[9];
1163         LLVMValueRef pos_args[4][9] = { { 0 } };
1164         LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL, viewport_index_value = NULL;
1165         unsigned semantic_name, semantic_index;
1166         unsigned target;
1167         unsigned param_count = 0;
1168         unsigned pos_idx;
1169         int i;
1170
1171         if (outputs && si_shader_ctx->shader->selector->so.num_outputs) {
1172                 si_llvm_emit_streamout(si_shader_ctx, outputs, noutput);
1173         }
1174
1175         for (i = 0; i < noutput; i++) {
1176                 semantic_name = outputs[i].name;
1177                 semantic_index = outputs[i].sid;
1178
1179 handle_semantic:
1180                 /* Select the correct target */
1181                 switch(semantic_name) {
1182                 case TGSI_SEMANTIC_PSIZE:
1183                         psize_value = outputs[i].values[0];
1184                         continue;
1185                 case TGSI_SEMANTIC_EDGEFLAG:
1186                         edgeflag_value = outputs[i].values[0];
1187                         continue;
1188                 case TGSI_SEMANTIC_LAYER:
1189                         layer_value = outputs[i].values[0];
1190                         semantic_name = TGSI_SEMANTIC_GENERIC;
1191                         goto handle_semantic;
1192                 case TGSI_SEMANTIC_VIEWPORT_INDEX:
1193                         viewport_index_value = outputs[i].values[0];
1194                         semantic_name = TGSI_SEMANTIC_GENERIC;
1195                         goto handle_semantic;
1196                 case TGSI_SEMANTIC_POSITION:
1197                         target = V_008DFC_SQ_EXP_POS;
1198                         break;
1199                 case TGSI_SEMANTIC_COLOR:
1200                 case TGSI_SEMANTIC_BCOLOR:
1201                         target = V_008DFC_SQ_EXP_PARAM + param_count;
1202                         shader->vs_output_param_offset[i] = param_count;
1203                         param_count++;
1204                         break;
1205                 case TGSI_SEMANTIC_CLIPDIST:
1206                         target = V_008DFC_SQ_EXP_POS + 2 + semantic_index;
1207                         break;
1208                 case TGSI_SEMANTIC_CLIPVERTEX:
1209                         si_llvm_emit_clipvertex(bld_base, pos_args, outputs[i].values);
1210                         continue;
1211                 case TGSI_SEMANTIC_PRIMID:
1212                 case TGSI_SEMANTIC_FOG:
1213                 case TGSI_SEMANTIC_TEXCOORD:
1214                 case TGSI_SEMANTIC_GENERIC:
1215                         target = V_008DFC_SQ_EXP_PARAM + param_count;
1216                         shader->vs_output_param_offset[i] = param_count;
1217                         param_count++;
1218                         break;
1219                 default:
1220                         target = 0;
1221                         fprintf(stderr,
1222                                 "Warning: SI unhandled vs output type:%d\n",
1223                                 semantic_name);
1224                 }
1225
1226                 si_llvm_init_export_args(bld_base, outputs[i].values, target, args);
1227
1228                 if (target >= V_008DFC_SQ_EXP_POS &&
1229                     target <= (V_008DFC_SQ_EXP_POS + 3)) {
1230                         memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
1231                                args, sizeof(args));
1232                 } else {
1233                         lp_build_intrinsic(base->gallivm->builder,
1234                                            "llvm.SI.export",
1235                                            LLVMVoidTypeInContext(base->gallivm->context),
1236                                            args, 9);
1237                 }
1238
1239                 if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
1240                         semantic_name = TGSI_SEMANTIC_GENERIC;
1241                         goto handle_semantic;
1242                 }
1243         }
1244
1245         shader->nr_param_exports = param_count;
1246
1247         /* We need to add the position output manually if it's missing. */
1248         if (!pos_args[0][0]) {
1249                 pos_args[0][0] = lp_build_const_int32(base->gallivm, 0xf); /* writemask */
1250                 pos_args[0][1] = uint->zero; /* EXEC mask */
1251                 pos_args[0][2] = uint->zero; /* last export? */
1252                 pos_args[0][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS);
1253                 pos_args[0][4] = uint->zero; /* COMPR flag */
1254                 pos_args[0][5] = base->zero; /* X */
1255                 pos_args[0][6] = base->zero; /* Y */
1256                 pos_args[0][7] = base->zero; /* Z */
1257                 pos_args[0][8] = base->one;  /* W */
1258         }
1259
1260         /* Write the misc vector (point size, edgeflag, layer, viewport). */
1261         if (shader->selector->info.writes_psize ||
1262             shader->selector->info.writes_edgeflag ||
1263             shader->selector->info.writes_viewport_index ||
1264             shader->selector->info.writes_layer) {
1265                 pos_args[1][0] = lp_build_const_int32(base->gallivm, /* writemask */
1266                                                       shader->selector->info.writes_psize |
1267                                                       (shader->selector->info.writes_edgeflag << 1) |
1268                                                       (shader->selector->info.writes_layer << 2) |
1269                                                       (shader->selector->info.writes_viewport_index << 3));
1270                 pos_args[1][1] = uint->zero; /* EXEC mask */
1271                 pos_args[1][2] = uint->zero; /* last export? */
1272                 pos_args[1][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + 1);
1273                 pos_args[1][4] = uint->zero; /* COMPR flag */
1274                 pos_args[1][5] = base->zero; /* X */
1275                 pos_args[1][6] = base->zero; /* Y */
1276                 pos_args[1][7] = base->zero; /* Z */
1277                 pos_args[1][8] = base->zero; /* W */
1278
1279                 if (shader->selector->info.writes_psize)
1280                         pos_args[1][5] = psize_value;
1281
1282                 if (shader->selector->info.writes_edgeflag) {
1283                         /* The output is a float, but the hw expects an integer
1284                          * with the first bit containing the edge flag. */
1285                         edgeflag_value = LLVMBuildFPToUI(base->gallivm->builder,
1286                                                          edgeflag_value,
1287                                                          bld_base->uint_bld.elem_type, "");
1288                         edgeflag_value = lp_build_min(&bld_base->int_bld,
1289                                                       edgeflag_value,
1290                                                       bld_base->int_bld.one);
1291
1292                         /* The LLVM intrinsic expects a float. */
1293                         pos_args[1][6] = LLVMBuildBitCast(base->gallivm->builder,
1294                                                           edgeflag_value,
1295                                                           base->elem_type, "");
1296                 }
1297
1298                 if (shader->selector->info.writes_layer)
1299                         pos_args[1][7] = layer_value;
1300
1301                 if (shader->selector->info.writes_viewport_index)
1302                         pos_args[1][8] = viewport_index_value;
1303         }
1304
1305         for (i = 0; i < 4; i++)
1306                 if (pos_args[i][0])
1307                         shader->nr_pos_exports++;
1308
1309         pos_idx = 0;
1310         for (i = 0; i < 4; i++) {
1311                 if (!pos_args[i][0])
1312                         continue;
1313
1314                 /* Specify the target we are exporting */
1315                 pos_args[i][3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_POS + pos_idx++);
1316
1317                 if (pos_idx == shader->nr_pos_exports)
1318                         /* Specify that this is the last export */
1319                         pos_args[i][2] = uint->one;
1320
1321                 lp_build_intrinsic(base->gallivm->builder,
1322                                    "llvm.SI.export",
1323                                    LLVMVoidTypeInContext(base->gallivm->context),
1324                                    pos_args[i], 9);
1325         }
1326 }
1327
1328 static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
1329 {
1330         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1331         struct gallivm_state *gallivm = bld_base->base.gallivm;
1332         struct si_shader *es = si_shader_ctx->shader;
1333         struct tgsi_shader_info *info = &es->selector->info;
1334         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
1335         LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
1336                                             si_shader_ctx->param_es2gs_offset);
1337         unsigned chan;
1338         int i;
1339
1340         for (i = 0; i < info->num_outputs; i++) {
1341                 LLVMValueRef *out_ptr =
1342                         si_shader_ctx->radeon_bld.soa.outputs[i];
1343                 int param_index;
1344
1345                 if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
1346                     info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
1347                         continue;
1348
1349                 param_index = get_param_index(info->output_semantic_name[i],
1350                                               info->output_semantic_index[i],
1351                                               es->key.vs.gs_used_inputs);
1352                 if (param_index < 0)
1353                         continue;
1354
1355                 for (chan = 0; chan < 4; chan++) {
1356                         LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
1357                         out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
1358
1359                         build_tbuffer_store(si_shader_ctx,
1360                                             si_shader_ctx->esgs_ring,
1361                                             out_val, 1,
1362                                             LLVMGetUndef(i32), soffset,
1363                                             (4 * param_index + chan) * 4,
1364                                             V_008F0C_BUF_DATA_FORMAT_32,
1365                                             V_008F0C_BUF_NUM_FORMAT_UINT,
1366                                             0, 0, 1, 1, 0);
1367                 }
1368         }
1369 }
1370
1371 static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
1372 {
1373         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1374         struct gallivm_state *gallivm = bld_base->base.gallivm;
1375         LLVMValueRef args[2];
1376
1377         args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_NOP | SENDMSG_GS_DONE);
1378         args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
1379         build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
1380                         LLVMVoidTypeInContext(gallivm->context), args, 2,
1381                         LLVMNoUnwindAttribute);
1382 }
1383
1384 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context * bld_base)
1385 {
1386         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1387         struct gallivm_state *gallivm = bld_base->base.gallivm;
1388         struct tgsi_shader_info *info = &si_shader_ctx->shader->selector->info;
1389         struct si_shader_output_values *outputs = NULL;
1390         int i,j;
1391
1392         outputs = MALLOC(info->num_outputs * sizeof(outputs[0]));
1393
1394         for (i = 0; i < info->num_outputs; i++) {
1395                 outputs[i].name = info->output_semantic_name[i];
1396                 outputs[i].sid = info->output_semantic_index[i];
1397
1398                 for (j = 0; j < 4; j++)
1399                         outputs[i].values[j] =
1400                                 LLVMBuildLoad(gallivm->builder,
1401                                               si_shader_ctx->radeon_bld.soa.outputs[i][j],
1402                                               "");
1403         }
1404
1405         si_llvm_export_vs(bld_base, outputs, info->num_outputs);
1406         FREE(outputs);
1407 }
1408
1409 static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
1410 {
1411         struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
1412         struct si_shader * shader = si_shader_ctx->shader;
1413         struct lp_build_context * base = &bld_base->base;
1414         struct lp_build_context * uint = &bld_base->uint_bld;
1415         struct tgsi_shader_info *info = &shader->selector->info;
1416         LLVMValueRef args[9];
1417         LLVMValueRef last_args[9] = { 0 };
1418         int depth_index = -1, stencil_index = -1, samplemask_index = -1;
1419         int i;
1420
1421         for (i = 0; i < info->num_outputs; i++) {
1422                 unsigned semantic_name = info->output_semantic_name[i];
1423                 unsigned semantic_index = info->output_semantic_index[i];
1424                 unsigned target;
1425                 LLVMValueRef alpha_ptr;
1426
1427                 /* Select the correct target */
1428                 switch (semantic_name) {
1429                 case TGSI_SEMANTIC_POSITION:
1430                         depth_index = i;
1431                         continue;
1432                 case TGSI_SEMANTIC_STENCIL:
1433                         stencil_index = i;
1434                         continue;
1435                 case TGSI_SEMANTIC_SAMPLEMASK:
1436                         samplemask_index = i;
1437                         continue;
1438                 case TGSI_SEMANTIC_COLOR:
1439                         target = V_008DFC_SQ_EXP_MRT + semantic_index;
1440                         alpha_ptr = si_shader_ctx->radeon_bld.soa.outputs[i][3];
1441
1442                         if (si_shader_ctx->shader->key.ps.alpha_to_one)
1443                                 LLVMBuildStore(base->gallivm->builder,
1444                                                base->one, alpha_ptr);
1445
1446                         if (semantic_index == 0 &&
1447                             si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
1448                                 si_alpha_test(bld_base, alpha_ptr);
1449
1450                         if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
1451                                 si_scale_alpha_by_sample_mask(bld_base, alpha_ptr);
1452                         break;
1453                 default:
1454                         target = 0;
1455                         fprintf(stderr,
1456                                 "Warning: SI unhandled fs output type:%d\n",
1457                                 semantic_name);
1458                 }
1459
1460                 si_llvm_init_export_args_load(bld_base,
1461                                               si_shader_ctx->radeon_bld.soa.outputs[i],
1462                                               target, args);
1463
1464                 if (semantic_name == TGSI_SEMANTIC_COLOR) {
1465                         /* If there is an export instruction waiting to be emitted, do so now. */
1466                         if (last_args[0]) {
1467                                 lp_build_intrinsic(base->gallivm->builder,
1468                                                    "llvm.SI.export",
1469                                                    LLVMVoidTypeInContext(base->gallivm->context),
1470                                                    last_args, 9);
1471                         }
1472
1473                         /* This instruction will be emitted at the end of the shader. */
1474                         memcpy(last_args, args, sizeof(args));
1475
1476                         /* Handle FS_COLOR0_WRITES_ALL_CBUFS. */
1477                         if (shader->selector->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
1478                             semantic_index == 0 &&
1479                             si_shader_ctx->shader->key.ps.last_cbuf > 0) {
1480                                 for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
1481                                         si_llvm_init_export_args_load(bld_base,
1482                                                                       si_shader_ctx->radeon_bld.soa.outputs[i],
1483                                                                       V_008DFC_SQ_EXP_MRT + c, args);
1484                                         lp_build_intrinsic(base->gallivm->builder,
1485                                                            "llvm.SI.export",
1486                                                            LLVMVoidTypeInContext(base->gallivm->context),
1487                                                            args, 9);
1488                                 }
1489                         }
1490                 } else {
1491                         lp_build_intrinsic(base->gallivm->builder,
1492                                            "llvm.SI.export",
1493                                            LLVMVoidTypeInContext(base->gallivm->context),
1494                                            args, 9);
1495                 }
1496         }
1497
1498         if (depth_index >= 0 || stencil_index >= 0 || samplemask_index >= 0) {
1499                 LLVMValueRef out_ptr;
1500                 unsigned mask = 0;
1501
1502                 /* Specify the target we are exporting */
1503                 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
1504
1505                 args[5] = base->zero; /* R, depth */
1506                 args[6] = base->zero; /* G, stencil test value[0:7], stencil op value[8:15] */
1507                 args[7] = base->zero; /* B, sample mask */
1508                 args[8] = base->zero; /* A, alpha to mask */
1509
1510                 if (depth_index >= 0) {
1511                         out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
1512                         args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1513                         mask |= 0x1;
1514                         si_shader_ctx->shader->db_shader_control |= S_02880C_Z_EXPORT_ENABLE(1);
1515                 }
1516
1517                 if (stencil_index >= 0) {
1518                         out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
1519                         args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1520                         mask |= 0x2;
1521                         si_shader_ctx->shader->db_shader_control |=
1522                                 S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1);
1523                 }
1524
1525                 if (samplemask_index >= 0) {
1526                         out_ptr = si_shader_ctx->radeon_bld.soa.outputs[samplemask_index][0];
1527                         args[7] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
1528                         mask |= 0x4;
1529                         si_shader_ctx->shader->db_shader_control |= S_02880C_MASK_EXPORT_ENABLE(1);
1530                 }
1531
1532                 /* SI (except OLAND) has a bug that it only looks
1533                  * at the X writemask component. */
1534                 if (si_shader_ctx->screen->b.chip_class == SI &&
1535                     si_shader_ctx->screen->b.family != CHIP_OLAND)
1536                         mask |= 0x1;
1537
1538                 if (samplemask_index >= 0)
1539                         si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_ABGR;
1540                 else if (stencil_index >= 0)
1541                         si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_GR;
1542                 else
1543                         si_shader_ctx->shader->spi_shader_z_format = V_028710_SPI_SHADER_32_R;
1544
1545                 /* Specify which components to enable */
1546                 args[0] = lp_build_const_int32(base->gallivm, mask);
1547
1548                 args[1] =
1549                 args[2] =
1550                 args[4] = uint->zero;
1551
1552                 if (last_args[0])
1553                         lp_build_intrinsic(base->gallivm->builder,
1554                                            "llvm.SI.export",
1555                                            LLVMVoidTypeInContext(base->gallivm->context),
1556                                            args, 9);
1557                 else
1558                         memcpy(last_args, args, sizeof(args));
1559         }
1560
1561         if (!last_args[0]) {
1562                 /* Specify which components to enable */
1563                 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
1564
1565                 /* Specify the target we are exporting */
1566                 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
1567
1568                 /* Set COMPR flag to zero to export data as 32-bit */
1569                 last_args[4] = uint->zero;
1570
1571                 /* dummy bits */
1572                 last_args[5]= uint->zero;
1573                 last_args[6]= uint->zero;
1574                 last_args[7]= uint->zero;
1575                 last_args[8]= uint->zero;
1576         }
1577
1578         /* Specify whether the EXEC mask represents the valid mask */
1579         last_args[1] = uint->one;
1580
1581         /* Specify that this is the last export */
1582         last_args[2] = lp_build_const_int32(base->gallivm, 1);
1583
1584         lp_build_intrinsic(base->gallivm->builder,
1585                            "llvm.SI.export",
1586                            LLVMVoidTypeInContext(base->gallivm->context),
1587                            last_args, 9);
1588 }
1589
1590 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
1591                                 struct lp_build_tgsi_context * bld_base,
1592                                 struct lp_build_emit_data * emit_data);
1593
1594 static bool tgsi_is_shadow_sampler(unsigned target)
1595 {
1596         return target == TGSI_TEXTURE_SHADOW1D ||
1597                target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1598                target == TGSI_TEXTURE_SHADOW2D ||
1599                target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1600                target == TGSI_TEXTURE_SHADOWCUBE ||
1601                target == TGSI_TEXTURE_SHADOWCUBE_ARRAY ||
1602                target == TGSI_TEXTURE_SHADOWRECT;
1603 }
1604
1605 static const struct lp_build_tgsi_action tex_action;
1606
1607 static void tex_fetch_args(
1608         struct lp_build_tgsi_context * bld_base,
1609         struct lp_build_emit_data * emit_data)
1610 {
1611         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
1612         struct gallivm_state *gallivm = bld_base->base.gallivm;
1613         const struct tgsi_full_instruction * inst = emit_data->inst;
1614         unsigned opcode = inst->Instruction.Opcode;
1615         unsigned target = inst->Texture.Texture;
1616         LLVMValueRef coords[5];
1617         LLVMValueRef address[16];
1618         int ref_pos;
1619         unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos);
1620         unsigned count = 0;
1621         unsigned chan;
1622         unsigned sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
1623         unsigned sampler_index = emit_data->inst->Src[sampler_src].Register.Index;
1624         bool has_offset = HAVE_LLVM >= 0x0305 ? inst->Texture.NumOffsets > 0 : false;
1625
1626         if (target == TGSI_TEXTURE_BUFFER) {
1627                 LLVMTypeRef i128 = LLVMIntTypeInContext(gallivm->context, 128);
1628                 LLVMTypeRef v2i128 = LLVMVectorType(i128, 2);
1629                 LLVMTypeRef i8 = LLVMInt8TypeInContext(gallivm->context);
1630                 LLVMTypeRef v16i8 = LLVMVectorType(i8, 16);
1631
1632                 /* Bitcast and truncate v8i32 to v16i8. */
1633                 LLVMValueRef res = si_shader_ctx->resources[sampler_index];
1634                 res = LLVMBuildBitCast(gallivm->builder, res, v2i128, "");
1635                 res = LLVMBuildExtractElement(gallivm->builder, res, bld_base->uint_bld.one, "");
1636                 res = LLVMBuildBitCast(gallivm->builder, res, v16i8, "");
1637
1638                 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
1639                 emit_data->args[0] = res;
1640                 emit_data->args[1] = bld_base->uint_bld.zero;
1641                 emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, 0);
1642                 emit_data->arg_count = 3;
1643                 return;
1644         }
1645
1646         /* Fetch and project texture coordinates */
1647         coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
1648         for (chan = 0; chan < 3; chan++ ) {
1649                 coords[chan] = lp_build_emit_fetch(bld_base,
1650                                                    emit_data->inst, 0,
1651                                                    chan);
1652                 if (opcode == TGSI_OPCODE_TXP)
1653                         coords[chan] = lp_build_emit_llvm_binary(bld_base,
1654                                                                  TGSI_OPCODE_DIV,
1655                                                                  coords[chan],
1656                                                                  coords[3]);
1657         }
1658
1659         if (opcode == TGSI_OPCODE_TXP)
1660                 coords[3] = bld_base->base.one;
1661
1662         /* Pack offsets. */
1663         if (has_offset && opcode != TGSI_OPCODE_TXF) {
1664                 /* The offsets are six-bit signed integers packed like this:
1665                  *   X=[5:0], Y=[13:8], and Z=[21:16].
1666                  */
1667                 LLVMValueRef offset[3], pack;
1668
1669                 assert(inst->Texture.NumOffsets == 1);
1670
1671                 for (chan = 0; chan < 3; chan++) {
1672                         offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
1673                                                                      emit_data->inst, 0, chan);
1674                         offset[chan] = LLVMBuildAnd(gallivm->builder, offset[chan],
1675                                                     lp_build_const_int32(gallivm, 0x3f), "");
1676                         if (chan)
1677                                 offset[chan] = LLVMBuildShl(gallivm->builder, offset[chan],
1678                                                             lp_build_const_int32(gallivm, chan*8), "");
1679                 }
1680
1681                 pack = LLVMBuildOr(gallivm->builder, offset[0], offset[1], "");
1682                 pack = LLVMBuildOr(gallivm->builder, pack, offset[2], "");
1683                 address[count++] = pack;
1684         }
1685
1686         /* Pack LOD bias value */
1687         if (opcode == TGSI_OPCODE_TXB)
1688                 address[count++] = coords[3];
1689         if (opcode == TGSI_OPCODE_TXB2)
1690                 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1691
1692         /* Pack depth comparison value */
1693         if (tgsi_is_shadow_sampler(target) && opcode != TGSI_OPCODE_LODQ) {
1694                 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1695                         address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1696                 } else {
1697                         assert(ref_pos >= 0);
1698                         address[count++] = coords[ref_pos];
1699                 }
1700         }
1701
1702         if (target == TGSI_TEXTURE_CUBE ||
1703             target == TGSI_TEXTURE_CUBE_ARRAY ||
1704             target == TGSI_TEXTURE_SHADOWCUBE ||
1705             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
1706                 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
1707
1708         /* Pack user derivatives */
1709         if (opcode == TGSI_OPCODE_TXD) {
1710                 int num_deriv_channels, param;
1711
1712                 switch (target) {
1713                 case TGSI_TEXTURE_3D:
1714                         num_deriv_channels = 3;
1715                         break;
1716                 case TGSI_TEXTURE_2D:
1717                 case TGSI_TEXTURE_SHADOW2D:
1718                 case TGSI_TEXTURE_RECT:
1719                 case TGSI_TEXTURE_SHADOWRECT:
1720                 case TGSI_TEXTURE_2D_ARRAY:
1721                 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1722                 case TGSI_TEXTURE_CUBE:
1723                 case TGSI_TEXTURE_SHADOWCUBE:
1724                 case TGSI_TEXTURE_CUBE_ARRAY:
1725                 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1726                         num_deriv_channels = 2;
1727                         break;
1728                 case TGSI_TEXTURE_1D:
1729                 case TGSI_TEXTURE_SHADOW1D:
1730                 case TGSI_TEXTURE_1D_ARRAY:
1731                 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1732                         num_deriv_channels = 1;
1733                         break;
1734                 default:
1735                         assert(0); /* no other targets are valid here */
1736                 }
1737
1738                 for (param = 1; param <= 2; param++)
1739                         for (chan = 0; chan < num_deriv_channels; chan++)
1740                                 address[count++] = lp_build_emit_fetch(bld_base, inst, param, chan);
1741         }
1742
1743         /* Pack texture coordinates */
1744         address[count++] = coords[0];
1745         if (num_coords > 1)
1746                 address[count++] = coords[1];
1747         if (num_coords > 2)
1748                 address[count++] = coords[2];
1749
1750         /* Pack LOD or sample index */
1751         if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
1752                 address[count++] = coords[3];
1753         else if (opcode == TGSI_OPCODE_TXL2)
1754                 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
1755
1756         if (count > 16) {
1757                 assert(!"Cannot handle more than 16 texture address parameters");
1758                 count = 16;
1759         }
1760
1761         for (chan = 0; chan < count; chan++ ) {
1762                 address[chan] = LLVMBuildBitCast(gallivm->builder,
1763                                                  address[chan],
1764                                                  LLVMInt32TypeInContext(gallivm->context),
1765                                                  "");
1766         }
1767
1768         /* Adjust the sample index according to FMASK.
1769          *
1770          * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1771          * which is the identity mapping. Each nibble says which physical sample
1772          * should be fetched to get that sample.
1773          *
1774          * For example, 0x11111100 means there are only 2 samples stored and
1775          * the second sample covers 3/4 of the pixel. When reading samples 0
1776          * and 1, return physical sample 0 (determined by the first two 0s
1777          * in FMASK), otherwise return physical sample 1.
1778          *
1779          * The sample index should be adjusted as follows:
1780          *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
1781          */
1782         if (target == TGSI_TEXTURE_2D_MSAA ||
1783             target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
1784                 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1785                 struct lp_build_emit_data txf_emit_data = *emit_data;
1786                 LLVMValueRef txf_address[4];
1787                 unsigned txf_count = count;
1788                 struct tgsi_full_instruction inst = {};
1789
1790                 memcpy(txf_address, address, sizeof(txf_address));
1791
1792                 if (target == TGSI_TEXTURE_2D_MSAA) {
1793                         txf_address[2] = bld_base->uint_bld.zero;
1794                 }
1795                 txf_address[3] = bld_base->uint_bld.zero;
1796
1797                 /* Pad to a power-of-two size. */
1798                 while (txf_count < util_next_power_of_two(txf_count))
1799                         txf_address[txf_count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1800
1801                 /* Read FMASK using TXF. */
1802                 inst.Instruction.Opcode = TGSI_OPCODE_TXF;
1803                 inst.Texture.Texture = target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY;
1804                 txf_emit_data.inst = &inst;
1805                 txf_emit_data.chan = 0;
1806                 txf_emit_data.dst_type = LLVMVectorType(
1807                         LLVMInt32TypeInContext(gallivm->context), 4);
1808                 txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count);
1809                 txf_emit_data.args[1] = si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index];
1810                 txf_emit_data.args[2] = lp_build_const_int32(gallivm, inst.Texture.Texture);
1811                 txf_emit_data.arg_count = 3;
1812
1813                 build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
1814
1815                 /* Initialize some constants. */
1816                 LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0);
1817                 LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0);
1818
1819                 /* Apply the formula. */
1820                 LLVMValueRef fmask =
1821                         LLVMBuildExtractElement(gallivm->builder,
1822                                                 txf_emit_data.output[0],
1823                                                 uint_bld->zero, "");
1824
1825                 unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
1826
1827                 LLVMValueRef sample_index4 =
1828                         LLVMBuildMul(gallivm->builder, address[sample_chan], four, "");
1829
1830                 LLVMValueRef shifted_fmask =
1831                         LLVMBuildLShr(gallivm->builder, fmask, sample_index4, "");
1832
1833                 LLVMValueRef final_sample =
1834                         LLVMBuildAnd(gallivm->builder, shifted_fmask, F, "");
1835
1836                 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
1837                  * resource descriptor is 0 (invalid),
1838                  */
1839                 LLVMValueRef fmask_desc =
1840                         LLVMBuildBitCast(gallivm->builder,
1841                                          si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + sampler_index],
1842                                          LLVMVectorType(uint_bld->elem_type, 8), "");
1843
1844                 LLVMValueRef fmask_word1 =
1845                         LLVMBuildExtractElement(gallivm->builder, fmask_desc,
1846                                                 uint_bld->one, "");
1847
1848                 LLVMValueRef word1_is_nonzero =
1849                         LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1850                                       fmask_word1, uint_bld->zero, "");
1851
1852                 /* Replace the MSAA sample index. */
1853                 address[sample_chan] =
1854                         LLVMBuildSelect(gallivm->builder, word1_is_nonzero,
1855                                         final_sample, address[sample_chan], "");
1856         }
1857
1858         /* Resource */
1859         emit_data->args[1] = si_shader_ctx->resources[sampler_index];
1860
1861         if (opcode == TGSI_OPCODE_TXF) {
1862                 /* add tex offsets */
1863                 if (inst->Texture.NumOffsets) {
1864                         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1865                         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
1866                         const struct tgsi_texture_offset * off = inst->TexOffsets;
1867
1868                         assert(inst->Texture.NumOffsets == 1);
1869
1870                         switch (target) {
1871                         case TGSI_TEXTURE_3D:
1872                                 address[2] = lp_build_add(uint_bld, address[2],
1873                                                 bld->immediates[off->Index][off->SwizzleZ]);
1874                                 /* fall through */
1875                         case TGSI_TEXTURE_2D:
1876                         case TGSI_TEXTURE_SHADOW2D:
1877                         case TGSI_TEXTURE_RECT:
1878                         case TGSI_TEXTURE_SHADOWRECT:
1879                         case TGSI_TEXTURE_2D_ARRAY:
1880                         case TGSI_TEXTURE_SHADOW2D_ARRAY:
1881                                 address[1] =
1882                                         lp_build_add(uint_bld, address[1],
1883                                                 bld->immediates[off->Index][off->SwizzleY]);
1884                                 /* fall through */
1885                         case TGSI_TEXTURE_1D:
1886                         case TGSI_TEXTURE_SHADOW1D:
1887                         case TGSI_TEXTURE_1D_ARRAY:
1888                         case TGSI_TEXTURE_SHADOW1D_ARRAY:
1889                                 address[0] =
1890                                         lp_build_add(uint_bld, address[0],
1891                                                 bld->immediates[off->Index][off->SwizzleX]);
1892                                 break;
1893                                 /* texture offsets do not apply to other texture targets */
1894                         }
1895                 }
1896
1897                 emit_data->args[2] = lp_build_const_int32(gallivm, target);
1898                 emit_data->arg_count = 3;
1899
1900                 emit_data->dst_type = LLVMVectorType(
1901                         LLVMInt32TypeInContext(gallivm->context),
1902                         4);
1903         } else if (opcode == TGSI_OPCODE_TG4 ||
1904                    opcode == TGSI_OPCODE_LODQ ||
1905                    has_offset) {
1906                 unsigned is_array = target == TGSI_TEXTURE_1D_ARRAY ||
1907                                     target == TGSI_TEXTURE_SHADOW1D_ARRAY ||
1908                                     target == TGSI_TEXTURE_2D_ARRAY ||
1909                                     target == TGSI_TEXTURE_SHADOW2D_ARRAY ||
1910                                     target == TGSI_TEXTURE_CUBE_ARRAY ||
1911                                     target == TGSI_TEXTURE_SHADOWCUBE_ARRAY;
1912                 unsigned is_rect = target == TGSI_TEXTURE_RECT;
1913                 unsigned dmask = 0xf;
1914
1915                 if (opcode == TGSI_OPCODE_TG4) {
1916                         unsigned gather_comp = 0;
1917
1918                         /* DMASK was repurposed for GATHER4. 4 components are always
1919                          * returned and DMASK works like a swizzle - it selects
1920                          * the component to fetch. The only valid DMASK values are
1921                          * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
1922                          * (red,red,red,red) etc.) The ISA document doesn't mention
1923                          * this.
1924                          */
1925
1926                         /* Get the component index from src1.x for Gather4. */
1927                         if (!tgsi_is_shadow_sampler(target)) {
1928                                 LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
1929                                 LLVMValueRef comp_imm;
1930                                 struct tgsi_src_register src1 = inst->Src[1].Register;
1931
1932                                 assert(src1.File == TGSI_FILE_IMMEDIATE);
1933
1934                                 comp_imm = imms[src1.Index][src1.SwizzleX];
1935                                 gather_comp = LLVMConstIntGetZExtValue(comp_imm);
1936                                 gather_comp = CLAMP(gather_comp, 0, 3);
1937                         }
1938
1939                         dmask = 1 << gather_comp;
1940                 }
1941
1942                 emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
1943                 emit_data->args[3] = lp_build_const_int32(gallivm, dmask);
1944                 emit_data->args[4] = lp_build_const_int32(gallivm, is_rect); /* unorm */
1945                 emit_data->args[5] = lp_build_const_int32(gallivm, 0); /* r128 */
1946                 emit_data->args[6] = lp_build_const_int32(gallivm, is_array); /* da */
1947                 emit_data->args[7] = lp_build_const_int32(gallivm, 0); /* glc */
1948                 emit_data->args[8] = lp_build_const_int32(gallivm, 0); /* slc */
1949                 emit_data->args[9] = lp_build_const_int32(gallivm, 0); /* tfe */
1950                 emit_data->args[10] = lp_build_const_int32(gallivm, 0); /* lwe */
1951
1952                 emit_data->arg_count = 11;
1953
1954                 emit_data->dst_type = LLVMVectorType(
1955                         LLVMFloatTypeInContext(gallivm->context),
1956                         4);
1957         } else {
1958                 emit_data->args[2] = si_shader_ctx->samplers[sampler_index];
1959                 emit_data->args[3] = lp_build_const_int32(gallivm, target);
1960                 emit_data->arg_count = 4;
1961
1962                 emit_data->dst_type = LLVMVectorType(
1963                         LLVMFloatTypeInContext(gallivm->context),
1964                         4);
1965         }
1966
1967         /* The fetch opcode has been converted to a 2D array fetch.
1968          * This simplifies the LLVM backend. */
1969         if (target == TGSI_TEXTURE_CUBE_ARRAY)
1970                 target = TGSI_TEXTURE_2D_ARRAY;
1971         else if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
1972                 target = TGSI_TEXTURE_SHADOW2D_ARRAY;
1973
1974         /* Pad to power of two vector */
1975         while (count < util_next_power_of_two(count))
1976                 address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
1977
1978         emit_data->args[0] = lp_build_gather_values(gallivm, address, count);
1979 }
1980
1981 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
1982                                 struct lp_build_tgsi_context * bld_base,
1983                                 struct lp_build_emit_data * emit_data)
1984 {
1985         struct lp_build_context * base = &bld_base->base;
1986         unsigned opcode = emit_data->inst->Instruction.Opcode;
1987         unsigned target = emit_data->inst->Texture.Texture;
1988         char intr_name[127];
1989         bool has_offset = HAVE_LLVM >= 0x0305 ?
1990                                 emit_data->inst->Texture.NumOffsets > 0 : false;
1991
1992         if (target == TGSI_TEXTURE_BUFFER) {
1993                 emit_data->output[emit_data->chan] = build_intrinsic(
1994                         base->gallivm->builder,
1995                         "llvm.SI.vs.load.input", emit_data->dst_type,
1996                         emit_data->args, emit_data->arg_count,
1997                         LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
1998                 return;
1999         }
2000
2001         if (opcode == TGSI_OPCODE_TG4 ||
2002             opcode == TGSI_OPCODE_LODQ ||
2003             (opcode != TGSI_OPCODE_TXF && has_offset)) {
2004                 bool is_shadow = tgsi_is_shadow_sampler(target);
2005                 const char *name = "llvm.SI.image.sample";
2006                 const char *infix = "";
2007
2008                 switch (opcode) {
2009                 case TGSI_OPCODE_TEX:
2010                 case TGSI_OPCODE_TEX2:
2011                 case TGSI_OPCODE_TXP:
2012                         break;
2013                 case TGSI_OPCODE_TXB:
2014                 case TGSI_OPCODE_TXB2:
2015                         infix = ".b";
2016                         break;
2017                 case TGSI_OPCODE_TXL:
2018                 case TGSI_OPCODE_TXL2:
2019                         infix = ".l";
2020                         break;
2021                 case TGSI_OPCODE_TXD:
2022                         infix = ".d";
2023                         break;
2024                 case TGSI_OPCODE_TG4:
2025                         name = "llvm.SI.gather4";
2026                         break;
2027                 case TGSI_OPCODE_LODQ:
2028                         name = "llvm.SI.getlod";
2029                         is_shadow = false;
2030                         has_offset = false;
2031                         break;
2032                 default:
2033                         assert(0);
2034                         return;
2035                 }
2036
2037                 /* Add the type and suffixes .c, .o if needed. */
2038                 sprintf(intr_name, "%s%s%s%s.v%ui32", name,
2039                         is_shadow ? ".c" : "", infix, has_offset ? ".o" : "",
2040                         LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
2041
2042                 emit_data->output[emit_data->chan] = build_intrinsic(
2043                         base->gallivm->builder, intr_name, emit_data->dst_type,
2044                         emit_data->args, emit_data->arg_count,
2045                         LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2046         } else {
2047                 LLVMTypeRef i8, v16i8, v32i8;
2048                 const char *name;
2049
2050                 switch (opcode) {
2051                 case TGSI_OPCODE_TEX:
2052                 case TGSI_OPCODE_TEX2:
2053                 case TGSI_OPCODE_TXP:
2054                         name = "llvm.SI.sample";
2055                         break;
2056                 case TGSI_OPCODE_TXB:
2057                 case TGSI_OPCODE_TXB2:
2058                         name = "llvm.SI.sampleb";
2059                         break;
2060                 case TGSI_OPCODE_TXD:
2061                         name = "llvm.SI.sampled";
2062                         break;
2063                 case TGSI_OPCODE_TXF:
2064                         name = "llvm.SI.imageload";
2065                         break;
2066                 case TGSI_OPCODE_TXL:
2067                 case TGSI_OPCODE_TXL2:
2068                         name = "llvm.SI.samplel";
2069                         break;
2070                 default:
2071                         assert(0);
2072                         return;
2073                 }
2074
2075                 i8 = LLVMInt8TypeInContext(base->gallivm->context);
2076                 v16i8 = LLVMVectorType(i8, 16);
2077                 v32i8 = LLVMVectorType(i8, 32);
2078
2079                 emit_data->args[1] = LLVMBuildBitCast(base->gallivm->builder,
2080                                                 emit_data->args[1], v32i8, "");
2081                 if (opcode != TGSI_OPCODE_TXF) {
2082                         emit_data->args[2] = LLVMBuildBitCast(base->gallivm->builder,
2083                                                 emit_data->args[2], v16i8, "");
2084                 }
2085
2086                 sprintf(intr_name, "%s.v%ui32", name,
2087                         LLVMGetVectorSize(LLVMTypeOf(emit_data->args[0])));
2088
2089                 emit_data->output[emit_data->chan] = build_intrinsic(
2090                         base->gallivm->builder, intr_name, emit_data->dst_type,
2091                         emit_data->args, emit_data->arg_count,
2092                         LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
2093         }
2094 }
2095
2096 static void txq_fetch_args(
2097         struct lp_build_tgsi_context * bld_base,
2098         struct lp_build_emit_data * emit_data)
2099 {
2100         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2101         const struct tgsi_full_instruction *inst = emit_data->inst;
2102         struct gallivm_state *gallivm = bld_base->base.gallivm;
2103         unsigned target = inst->Texture.Texture;
2104
2105         if (target == TGSI_TEXTURE_BUFFER) {
2106                 LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2107                 LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
2108
2109                 /* Read the size from the buffer descriptor directly. */
2110                 LLVMValueRef size = si_shader_ctx->resources[inst->Src[1].Register.Index];
2111                 size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
2112                 size = LLVMBuildExtractElement(gallivm->builder, size,
2113                                               lp_build_const_int32(gallivm, 6), "");
2114                 emit_data->args[0] = size;
2115                 return;
2116         }
2117
2118         /* Mip level */
2119         emit_data->args[0] = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
2120
2121         /* Resource */
2122         emit_data->args[1] = si_shader_ctx->resources[inst->Src[1].Register.Index];
2123
2124         /* Texture target */
2125         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
2126             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
2127                 target = TGSI_TEXTURE_2D_ARRAY;
2128
2129         emit_data->args[2] = lp_build_const_int32(bld_base->base.gallivm,
2130                                                   target);
2131
2132         emit_data->arg_count = 3;
2133
2134         emit_data->dst_type = LLVMVectorType(
2135                 LLVMInt32TypeInContext(bld_base->base.gallivm->context),
2136                 4);
2137 }
2138
2139 static void build_txq_intrinsic(const struct lp_build_tgsi_action * action,
2140                                 struct lp_build_tgsi_context * bld_base,
2141                                 struct lp_build_emit_data * emit_data)
2142 {
2143         unsigned target = emit_data->inst->Texture.Texture;
2144
2145         if (target == TGSI_TEXTURE_BUFFER) {
2146                 /* Just return the buffer size. */
2147                 emit_data->output[emit_data->chan] = emit_data->args[0];
2148                 return;
2149         }
2150
2151         build_tgsi_intrinsic_nomem(action, bld_base, emit_data);
2152
2153         /* Divide the number of layers by 6 to get the number of cubes. */
2154         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
2155             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
2156                 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2157                 LLVMValueRef two = lp_build_const_int32(bld_base->base.gallivm, 2);
2158                 LLVMValueRef six = lp_build_const_int32(bld_base->base.gallivm, 6);
2159
2160                 LLVMValueRef v4 = emit_data->output[emit_data->chan];
2161                 LLVMValueRef z = LLVMBuildExtractElement(builder, v4, two, "");
2162                 z = LLVMBuildSDiv(builder, z, six, "");
2163
2164                 emit_data->output[emit_data->chan] =
2165                         LLVMBuildInsertElement(builder, v4, z, two, "");
2166         }
2167 }
2168
2169 static void si_llvm_emit_ddxy(
2170         const struct lp_build_tgsi_action * action,
2171         struct lp_build_tgsi_context * bld_base,
2172         struct lp_build_emit_data * emit_data)
2173 {
2174         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2175         struct gallivm_state *gallivm = bld_base->base.gallivm;
2176         struct lp_build_context * base = &bld_base->base;
2177         const struct tgsi_full_instruction *inst = emit_data->inst;
2178         unsigned opcode = inst->Instruction.Opcode;
2179         LLVMValueRef indices[2];
2180         LLVMValueRef store_ptr, load_ptr0, load_ptr1;
2181         LLVMValueRef tl, trbl, result[4];
2182         LLVMTypeRef i32;
2183         unsigned swizzle[4];
2184         unsigned c;
2185
2186         i32 = LLVMInt32TypeInContext(gallivm->context);
2187
2188         indices[0] = bld_base->uint_bld.zero;
2189         indices[1] = build_intrinsic(gallivm->builder, "llvm.SI.tid", i32,
2190                                      NULL, 0, LLVMReadNoneAttribute);
2191         store_ptr = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2192                                  indices, 2, "");
2193
2194         indices[1] = LLVMBuildAnd(gallivm->builder, indices[1],
2195                                   lp_build_const_int32(gallivm, 0xfffffffc), "");
2196         load_ptr0 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2197                                  indices, 2, "");
2198
2199         indices[1] = LLVMBuildAdd(gallivm->builder, indices[1],
2200                                   lp_build_const_int32(gallivm,
2201                                                        opcode == TGSI_OPCODE_DDX ? 1 : 2),
2202                                   "");
2203         load_ptr1 = LLVMBuildGEP(gallivm->builder, si_shader_ctx->ddxy_lds,
2204                                  indices, 2, "");
2205
2206         for (c = 0; c < 4; ++c) {
2207                 unsigned i;
2208
2209                 swizzle[c] = tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c);
2210                 for (i = 0; i < c; ++i) {
2211                         if (swizzle[i] == swizzle[c]) {
2212                                 result[c] = result[i];
2213                                 break;
2214                         }
2215                 }
2216                 if (i != c)
2217                         continue;
2218
2219                 LLVMBuildStore(gallivm->builder,
2220                                LLVMBuildBitCast(gallivm->builder,
2221                                                 lp_build_emit_fetch(bld_base, inst, 0, c),
2222                                                 i32, ""),
2223                                store_ptr);
2224
2225                 tl = LLVMBuildLoad(gallivm->builder, load_ptr0, "");
2226                 tl = LLVMBuildBitCast(gallivm->builder, tl, base->elem_type, "");
2227
2228                 trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, "");
2229                 trbl = LLVMBuildBitCast(gallivm->builder, trbl, base->elem_type, "");
2230
2231                 result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, "");
2232         }
2233
2234         emit_data->output[0] = lp_build_gather_values(gallivm, result, 4);
2235 }
2236
2237 /* Emit one vertex from the geometry shader */
2238 static void si_llvm_emit_vertex(
2239         const struct lp_build_tgsi_action *action,
2240         struct lp_build_tgsi_context *bld_base,
2241         struct lp_build_emit_data *emit_data)
2242 {
2243         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2244         struct lp_build_context *uint = &bld_base->uint_bld;
2245         struct si_shader *shader = si_shader_ctx->shader;
2246         struct tgsi_shader_info *info = &shader->selector->info;
2247         struct gallivm_state *gallivm = bld_base->base.gallivm;
2248         LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
2249         LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2250                                             SI_PARAM_GS2VS_OFFSET);
2251         LLVMValueRef gs_next_vertex;
2252         LLVMValueRef can_emit, kill;
2253         LLVMValueRef args[2];
2254         unsigned chan;
2255         int i;
2256
2257         /* Write vertex attribute values to GSVS ring */
2258         gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
2259
2260         /* If this thread has already emitted the declared maximum number of
2261          * vertices, kill it: excessive vertex emissions are not supposed to
2262          * have any effect, and GS threads have no externally observable
2263          * effects other than emitting vertices.
2264          */
2265         can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
2266                                  lp_build_const_int32(gallivm,
2267                                                       shader->selector->gs_max_out_vertices), "");
2268         kill = lp_build_select(&bld_base->base, can_emit,
2269                                lp_build_const_float(gallivm, 1.0f),
2270                                lp_build_const_float(gallivm, -1.0f));
2271         build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
2272                         LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
2273
2274         for (i = 0; i < info->num_outputs; i++) {
2275                 LLVMValueRef *out_ptr =
2276                         si_shader_ctx->radeon_bld.soa.outputs[i];
2277
2278                 for (chan = 0; chan < 4; chan++) {
2279                         LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");
2280                         LLVMValueRef voffset =
2281                                 lp_build_const_int32(gallivm, (i * 4 + chan) *
2282                                                      shader->selector->gs_max_out_vertices);
2283
2284                         voffset = lp_build_add(uint, voffset, gs_next_vertex);
2285                         voffset = lp_build_mul_imm(uint, voffset, 4);
2286
2287                         out_val = LLVMBuildBitCast(gallivm->builder, out_val, i32, "");
2288
2289                         build_tbuffer_store(si_shader_ctx,
2290                                             si_shader_ctx->gsvs_ring,
2291                                             out_val, 1,
2292                                             voffset, soffset, 0,
2293                                             V_008F0C_BUF_DATA_FORMAT_32,
2294                                             V_008F0C_BUF_NUM_FORMAT_UINT,
2295                                             1, 0, 1, 1, 0);
2296                 }
2297         }
2298         gs_next_vertex = lp_build_add(uint, gs_next_vertex,
2299                                       lp_build_const_int32(gallivm, 1));
2300         LLVMBuildStore(gallivm->builder, gs_next_vertex, si_shader_ctx->gs_next_vertex);
2301
2302         /* Signal vertex emission */
2303         args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | SENDMSG_GS);
2304         args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
2305         build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
2306                         LLVMVoidTypeInContext(gallivm->context), args, 2,
2307                         LLVMNoUnwindAttribute);
2308 }
2309
2310 /* Cut one primitive from the geometry shader */
2311 static void si_llvm_emit_primitive(
2312         const struct lp_build_tgsi_action *action,
2313         struct lp_build_tgsi_context *bld_base,
2314         struct lp_build_emit_data *emit_data)
2315 {
2316         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
2317         struct gallivm_state *gallivm = bld_base->base.gallivm;
2318         LLVMValueRef args[2];
2319
2320         /* Signal primitive cut */
2321         args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | SENDMSG_GS);
2322         args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_GS_WAVE_ID);
2323         build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
2324                         LLVMVoidTypeInContext(gallivm->context), args, 2,
2325                         LLVMNoUnwindAttribute);
2326 }
2327
2328 static const struct lp_build_tgsi_action tex_action = {
2329         .fetch_args = tex_fetch_args,
2330         .emit = build_tex_intrinsic,
2331 };
2332
2333 static const struct lp_build_tgsi_action txq_action = {
2334         .fetch_args = txq_fetch_args,
2335         .emit = build_txq_intrinsic,
2336         .intr_name = "llvm.SI.resinfo"
2337 };
2338
2339 static void create_meta_data(struct si_shader_context *si_shader_ctx)
2340 {
2341         struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
2342         LLVMValueRef args[3];
2343
2344         args[0] = LLVMMDStringInContext(gallivm->context, "const", 5);
2345         args[1] = 0;
2346         args[2] = lp_build_const_int32(gallivm, 1);
2347
2348         si_shader_ctx->const_md = LLVMMDNodeInContext(gallivm->context, args, 3);
2349 }
2350
2351 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
2352 {
2353         return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
2354                                CONST_ADDR_SPACE);
2355 }
2356
2357 static void declare_streamout_params(struct si_shader_context *si_shader_ctx,
2358                                      struct pipe_stream_output_info *so,
2359                                      LLVMTypeRef *params, LLVMTypeRef i32,
2360                                      unsigned *num_params)
2361 {
2362         int i;
2363
2364         /* Streamout SGPRs. */
2365         if (so->num_outputs) {
2366                 params[si_shader_ctx->param_streamout_config = (*num_params)++] = i32;
2367                 params[si_shader_ctx->param_streamout_write_index = (*num_params)++] = i32;
2368         }
2369         /* A streamout buffer offset is loaded if the stride is non-zero. */
2370         for (i = 0; i < 4; i++) {
2371                 if (!so->stride[i])
2372                         continue;
2373
2374                 params[si_shader_ctx->param_streamout_offset[i] = (*num_params)++] = i32;
2375         }
2376 }
2377
2378 static void create_function(struct si_shader_context *si_shader_ctx)
2379 {
2380         struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2381         struct gallivm_state *gallivm = bld_base->base.gallivm;
2382         struct si_shader *shader = si_shader_ctx->shader;
2383         LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32, v16i8, v4i32, v8i32;
2384         unsigned i, last_array_pointer, last_sgpr, num_params;
2385
2386         i8 = LLVMInt8TypeInContext(gallivm->context);
2387         i32 = LLVMInt32TypeInContext(gallivm->context);
2388         f32 = LLVMFloatTypeInContext(gallivm->context);
2389         v2i32 = LLVMVectorType(i32, 2);
2390         v3i32 = LLVMVectorType(i32, 3);
2391         v4i32 = LLVMVectorType(i32, 4);
2392         v8i32 = LLVMVectorType(i32, 8);
2393         v16i8 = LLVMVectorType(i8, 16);
2394
2395         params[SI_PARAM_RW_BUFFERS] = const_array(v16i8, SI_NUM_RW_BUFFERS);
2396         params[SI_PARAM_CONST] = const_array(v16i8, SI_NUM_CONST_BUFFERS);
2397         params[SI_PARAM_SAMPLER] = const_array(v4i32, SI_NUM_SAMPLER_STATES);
2398         params[SI_PARAM_RESOURCE] = const_array(v8i32, SI_NUM_SAMPLER_VIEWS);
2399         last_array_pointer = SI_PARAM_RESOURCE;
2400
2401         switch (si_shader_ctx->type) {
2402         case TGSI_PROCESSOR_VERTEX:
2403                 params[SI_PARAM_VERTEX_BUFFER] = const_array(v16i8, SI_NUM_VERTEX_BUFFERS);
2404                 last_array_pointer = SI_PARAM_VERTEX_BUFFER;
2405                 params[SI_PARAM_BASE_VERTEX] = i32;
2406                 params[SI_PARAM_START_INSTANCE] = i32;
2407                 num_params = SI_PARAM_START_INSTANCE+1;
2408
2409                 if (shader->key.vs.as_es) {
2410                         params[si_shader_ctx->param_es2gs_offset = num_params++] = i32;
2411                 } else {
2412                         if (shader->is_gs_copy_shader) {
2413                                 last_array_pointer = SI_PARAM_CONST;
2414                                 num_params = SI_PARAM_CONST+1;
2415                         }
2416
2417                         /* The locations of the other parameters are assigned dynamically. */
2418                         declare_streamout_params(si_shader_ctx, &shader->selector->so,
2419                                                  params, i32, &num_params);
2420                 }
2421
2422                 last_sgpr = num_params-1;
2423
2424                 /* VGPRs */
2425                 params[si_shader_ctx->param_vertex_id = num_params++] = i32;
2426                 params[num_params++] = i32; /* unused*/
2427                 params[num_params++] = i32; /* unused */
2428                 params[si_shader_ctx->param_instance_id = num_params++] = i32;
2429                 break;
2430
2431         case TGSI_PROCESSOR_GEOMETRY:
2432                 params[SI_PARAM_GS2VS_OFFSET] = i32;
2433                 params[SI_PARAM_GS_WAVE_ID] = i32;
2434                 last_sgpr = SI_PARAM_GS_WAVE_ID;
2435
2436                 /* VGPRs */
2437                 params[SI_PARAM_VTX0_OFFSET] = i32;
2438                 params[SI_PARAM_VTX1_OFFSET] = i32;
2439                 params[SI_PARAM_PRIMITIVE_ID] = i32;
2440                 params[SI_PARAM_VTX2_OFFSET] = i32;
2441                 params[SI_PARAM_VTX3_OFFSET] = i32;
2442                 params[SI_PARAM_VTX4_OFFSET] = i32;
2443                 params[SI_PARAM_VTX5_OFFSET] = i32;
2444                 params[SI_PARAM_GS_INSTANCE_ID] = i32;
2445                 num_params = SI_PARAM_GS_INSTANCE_ID+1;
2446                 break;
2447
2448         case TGSI_PROCESSOR_FRAGMENT:
2449                 params[SI_PARAM_ALPHA_REF] = f32;
2450                 params[SI_PARAM_PRIM_MASK] = i32;
2451                 last_sgpr = SI_PARAM_PRIM_MASK;
2452                 params[SI_PARAM_PERSP_SAMPLE] = v2i32;
2453                 params[SI_PARAM_PERSP_CENTER] = v2i32;
2454                 params[SI_PARAM_PERSP_CENTROID] = v2i32;
2455                 params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
2456                 params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
2457                 params[SI_PARAM_LINEAR_CENTER] = v2i32;
2458                 params[SI_PARAM_LINEAR_CENTROID] = v2i32;
2459                 params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
2460                 params[SI_PARAM_POS_X_FLOAT] = f32;
2461                 params[SI_PARAM_POS_Y_FLOAT] = f32;
2462                 params[SI_PARAM_POS_Z_FLOAT] = f32;
2463                 params[SI_PARAM_POS_W_FLOAT] = f32;
2464                 params[SI_PARAM_FRONT_FACE] = f32;
2465                 params[SI_PARAM_ANCILLARY] = i32;
2466                 params[SI_PARAM_SAMPLE_COVERAGE] = f32;
2467                 params[SI_PARAM_POS_FIXED_PT] = f32;
2468                 num_params = SI_PARAM_POS_FIXED_PT+1;
2469                 break;
2470
2471         default:
2472                 assert(0 && "unimplemented shader");
2473                 return;
2474         }
2475
2476         assert(num_params <= Elements(params));
2477         radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
2478         radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
2479
2480         if (shader->dx10_clamp_mode)
2481                 LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
2482                                                    "enable-no-nans-fp-math", "true");
2483
2484         for (i = 0; i <= last_sgpr; ++i) {
2485                 LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
2486
2487                 /* We tell llvm that array inputs are passed by value to allow Sinking pass
2488                  * to move load. Inputs are constant so this is fine. */
2489                 if (i <= last_array_pointer)
2490                         LLVMAddAttribute(P, LLVMByValAttribute);
2491                 else
2492                         LLVMAddAttribute(P, LLVMInRegAttribute);
2493         }
2494
2495         if (bld_base->info &&
2496             (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
2497              bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0))
2498                 si_shader_ctx->ddxy_lds =
2499                         LLVMAddGlobalInAddressSpace(gallivm->module,
2500                                                     LLVMArrayType(i32, 64),
2501                                                     "ddxy_lds",
2502                                                     LOCAL_ADDR_SPACE);
2503 }
2504
2505 static void preload_constants(struct si_shader_context *si_shader_ctx)
2506 {
2507         struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2508         struct gallivm_state * gallivm = bld_base->base.gallivm;
2509         const struct tgsi_shader_info * info = bld_base->info;
2510         unsigned buf;
2511         LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
2512
2513         for (buf = 0; buf < SI_NUM_CONST_BUFFERS; buf++) {
2514                 unsigned i, num_const = info->const_file_max[buf] + 1;
2515
2516                 if (num_const == 0)
2517                         continue;
2518
2519                 /* Allocate space for the constant values */
2520                 si_shader_ctx->constants[buf] = CALLOC(num_const * 4, sizeof(LLVMValueRef));
2521
2522                 /* Load the resource descriptor */
2523                 si_shader_ctx->const_resource[buf] =
2524                         build_indexed_load_const(si_shader_ctx, ptr, lp_build_const_int32(gallivm, buf));
2525
2526                 /* Load the constants, we rely on the code sinking to do the rest */
2527                 for (i = 0; i < num_const * 4; ++i) {
2528                         si_shader_ctx->constants[buf][i] =
2529                                 buffer_load_const(gallivm->builder,
2530                                         si_shader_ctx->const_resource[buf],
2531                                         lp_build_const_int32(gallivm, i * 4),
2532                                         bld_base->base.elem_type);
2533                 }
2534         }
2535 }
2536
2537 static void preload_samplers(struct si_shader_context *si_shader_ctx)
2538 {
2539         struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2540         struct gallivm_state * gallivm = bld_base->base.gallivm;
2541         const struct tgsi_shader_info * info = bld_base->info;
2542
2543         unsigned i, num_samplers = info->file_max[TGSI_FILE_SAMPLER] + 1;
2544
2545         LLVMValueRef res_ptr, samp_ptr;
2546         LLVMValueRef offset;
2547
2548         if (num_samplers == 0)
2549                 return;
2550
2551         res_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
2552         samp_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
2553
2554         /* Load the resources and samplers, we rely on the code sinking to do the rest */
2555         for (i = 0; i < num_samplers; ++i) {
2556                 /* Resource */
2557                 offset = lp_build_const_int32(gallivm, i);
2558                 si_shader_ctx->resources[i] = build_indexed_load_const(si_shader_ctx, res_ptr, offset);
2559
2560                 /* Sampler */
2561                 offset = lp_build_const_int32(gallivm, i);
2562                 si_shader_ctx->samplers[i] = build_indexed_load_const(si_shader_ctx, samp_ptr, offset);
2563
2564                 /* FMASK resource */
2565                 if (info->is_msaa_sampler[i]) {
2566                         offset = lp_build_const_int32(gallivm, SI_FMASK_TEX_OFFSET + i);
2567                         si_shader_ctx->resources[SI_FMASK_TEX_OFFSET + i] =
2568                                 build_indexed_load_const(si_shader_ctx, res_ptr, offset);
2569                 }
2570         }
2571 }
2572
2573 static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
2574 {
2575         struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2576         struct gallivm_state * gallivm = bld_base->base.gallivm;
2577         unsigned i;
2578
2579         if (si_shader_ctx->type != TGSI_PROCESSOR_VERTEX ||
2580             si_shader_ctx->shader->key.vs.as_es ||
2581             !si_shader_ctx->shader->selector->so.num_outputs)
2582                 return;
2583
2584         LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2585                                             SI_PARAM_RW_BUFFERS);
2586
2587         /* Load the resources, we rely on the code sinking to do the rest */
2588         for (i = 0; i < 4; ++i) {
2589                 if (si_shader_ctx->shader->selector->so.stride[i]) {
2590                         LLVMValueRef offset = lp_build_const_int32(gallivm,
2591                                                                    SI_SO_BUF_OFFSET + i);
2592
2593                         si_shader_ctx->so_buffers[i] = build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2594                 }
2595         }
2596 }
2597
2598 /**
2599  * Load ESGS and GSVS ring buffer resource descriptors and save the variables
2600  * for later use.
2601  */
2602 static void preload_ring_buffers(struct si_shader_context *si_shader_ctx)
2603 {
2604         struct gallivm_state *gallivm =
2605                 si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
2606
2607         LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2608                                             SI_PARAM_RW_BUFFERS);
2609
2610         if ((si_shader_ctx->type == TGSI_PROCESSOR_VERTEX &&
2611              si_shader_ctx->shader->key.vs.as_es) ||
2612             si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
2613                 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_ESGS);
2614
2615                 si_shader_ctx->esgs_ring =
2616                         build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2617         }
2618
2619         if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
2620             si_shader_ctx->shader->is_gs_copy_shader) {
2621                 LLVMValueRef offset = lp_build_const_int32(gallivm, SI_RING_GSVS);
2622
2623                 si_shader_ctx->gsvs_ring =
2624                         build_indexed_load_const(si_shader_ctx, buf_ptr, offset);
2625         }
2626 }
2627
2628 void si_shader_binary_read_config(const struct si_screen *sscreen,
2629                                 struct si_shader *shader,
2630                                 unsigned symbol_offset)
2631 {
2632         unsigned i;
2633         const unsigned char *config =
2634                 radeon_shader_binary_config_start(&shader->binary,
2635                                                 symbol_offset);
2636
2637         /* XXX: We may be able to emit some of these values directly rather than
2638          * extracting fields to be emitted later.
2639          */
2640
2641         for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
2642                 unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
2643                 unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
2644                 switch (reg) {
2645                 case R_00B028_SPI_SHADER_PGM_RSRC1_PS:
2646                 case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
2647                 case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
2648                 case R_00B848_COMPUTE_PGM_RSRC1:
2649                         shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
2650                         shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
2651                         shader->float_mode =  G_00B028_FLOAT_MODE(value);
2652                         break;
2653                 case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
2654                         shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
2655                         break;
2656                 case R_00B84C_COMPUTE_PGM_RSRC2:
2657                         shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
2658                         break;
2659                 case R_0286CC_SPI_PS_INPUT_ENA:
2660                         shader->spi_ps_input_ena = value;
2661                         break;
2662                 case R_0286E8_SPI_TMPRING_SIZE:
2663                 case R_00B860_COMPUTE_TMPRING_SIZE:
2664                         /* WAVESIZE is in units of 256 dwords. */
2665                         shader->scratch_bytes_per_wave =
2666                                 G_00B860_WAVESIZE(value) * 256 * 4 * 1;
2667                         break;
2668                 default:
2669                         fprintf(stderr, "Warning: Compiler emitted unknown "
2670                                 "config register: 0x%x\n", reg);
2671                         break;
2672                 }
2673         }
2674 }
2675
2676 void si_shader_apply_scratch_relocs(struct si_context *sctx,
2677                         struct si_shader *shader,
2678                         uint64_t scratch_va)
2679 {
2680         unsigned i;
2681         uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
2682         uint32_t scratch_rsrc_dword1 =
2683                 S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
2684                 |  S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
2685
2686         for (i = 0 ; i < shader->binary.reloc_count; i++) {
2687                 const struct radeon_shader_reloc *reloc =
2688                                         &shader->binary.relocs[i];
2689                 if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
2690                         util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
2691                         &scratch_rsrc_dword0, 4);
2692                 } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
2693                         util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
2694                         &scratch_rsrc_dword1, 4);
2695                 }
2696         }
2697 }
2698
2699 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
2700 {
2701         const struct radeon_shader_binary *binary = &shader->binary;
2702         unsigned code_size = binary->code_size + binary->rodata_size;
2703         unsigned char *ptr;
2704
2705         r600_resource_reference(&shader->bo, NULL);
2706         shader->bo = si_resource_create_custom(&sscreen->b.b,
2707                                                PIPE_USAGE_IMMUTABLE,
2708                                                code_size);
2709         if (!shader->bo)
2710                 return -ENOMEM;
2711
2712         ptr = sscreen->b.ws->buffer_map(shader->bo->cs_buf, NULL,
2713                                         PIPE_TRANSFER_READ_WRITE);
2714         util_memcpy_cpu_to_le32(ptr, binary->code, binary->code_size);
2715         if (binary->rodata_size > 0) {
2716                 ptr += binary->code_size;
2717                 util_memcpy_cpu_to_le32(ptr, binary->rodata,
2718                                         binary->rodata_size);
2719         }
2720
2721         sscreen->b.ws->buffer_unmap(shader->bo->cs_buf);
2722         return 0;
2723 }
2724
2725 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
2726 {
2727         const struct radeon_shader_binary *binary = &shader->binary;
2728         unsigned i;
2729         bool dump  = r600_can_dump_shader(&sscreen->b,
2730                 shader->selector ? shader->selector->tokens : NULL);
2731
2732         si_shader_binary_read_config(sscreen, shader, 0);
2733         si_shader_binary_upload(sscreen, shader);
2734
2735         if (dump) {
2736                 if (!binary->disassembled) {
2737                         fprintf(stderr, "SI CODE:\n");
2738                         for (i = 0; i < binary->code_size; i+=4 ) {
2739                                 fprintf(stderr, "@0x%x: %02x%02x%02x%02x\n", i, binary->code[i + 3],
2740                                 binary->code[i + 2], binary->code[i + 1],
2741                                 binary->code[i]);
2742                         }
2743                 }
2744
2745                 fprintf(stderr, "*** SHADER STATS ***\n"
2746                         "SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
2747                         "Scratch: %d bytes per wave\n********************\n",
2748                         shader->num_sgprs, shader->num_vgprs, binary->code_size,
2749                         shader->lds_size, shader->scratch_bytes_per_wave);
2750         }
2751         return 0;
2752 }
2753
2754 int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
2755                     LLVMTargetMachineRef tm, LLVMModuleRef mod)
2756 {
2757         int r = 0;
2758         bool dump = r600_can_dump_shader(&sscreen->b,
2759                         shader->selector ? shader->selector->tokens : NULL);
2760         r = radeon_llvm_compile(mod, &shader->binary,
2761                 r600_get_llvm_processor_name(sscreen->b.family), dump, tm);
2762
2763         if (r) {
2764                 return r;
2765         }
2766         r = si_shader_binary_read(sscreen, shader);
2767
2768         FREE(shader->binary.config);
2769         FREE(shader->binary.rodata);
2770         FREE(shader->binary.global_symbol_offsets);
2771         if (shader->scratch_bytes_per_wave == 0) {
2772                 FREE(shader->binary.code);
2773                 FREE(shader->binary.relocs);
2774                 memset(&shader->binary, 0, sizeof(shader->binary));
2775         }
2776         return r;
2777 }
2778
2779 /* Generate code for the hardware VS shader stage to go with a geometry shader */
2780 static int si_generate_gs_copy_shader(struct si_screen *sscreen,
2781                                       struct si_shader_context *si_shader_ctx,
2782                                       struct si_shader *gs, bool dump)
2783 {
2784         struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm;
2785         struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
2786         struct lp_build_context *base = &bld_base->base;
2787         struct lp_build_context *uint = &bld_base->uint_bld;
2788         struct si_shader *shader = si_shader_ctx->shader;
2789         struct si_shader_output_values *outputs;
2790         struct tgsi_shader_info *gsinfo = &gs->selector->info;
2791         LLVMValueRef args[9];
2792         int i, r;
2793
2794         outputs = MALLOC(gsinfo->num_outputs * sizeof(outputs[0]));
2795
2796         si_shader_ctx->type = TGSI_PROCESSOR_VERTEX;
2797         shader->is_gs_copy_shader = true;
2798
2799         radeon_llvm_context_init(&si_shader_ctx->radeon_bld);
2800
2801         create_meta_data(si_shader_ctx);
2802         create_function(si_shader_ctx);
2803         preload_streamout_buffers(si_shader_ctx);
2804         preload_ring_buffers(si_shader_ctx);
2805
2806         args[0] = si_shader_ctx->gsvs_ring;
2807         args[1] = lp_build_mul_imm(uint,
2808                                    LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
2809                                                 si_shader_ctx->param_vertex_id),
2810                                    4);
2811         args[3] = uint->zero;
2812         args[4] = uint->one;  /* OFFEN */
2813         args[5] = uint->zero; /* IDXEN */
2814         args[6] = uint->one;  /* GLC */
2815         args[7] = uint->one;  /* SLC */
2816         args[8] = uint->zero; /* TFE */
2817
2818         /* Fetch vertex data from GSVS ring */
2819         for (i = 0; i < gsinfo->num_outputs; ++i) {
2820                 unsigned chan;
2821
2822                 outputs[i].name = gsinfo->output_semantic_name[i];
2823                 outputs[i].sid = gsinfo->output_semantic_index[i];
2824
2825                 for (chan = 0; chan < 4; chan++) {
2826                         args[2] = lp_build_const_int32(gallivm,
2827                                                        (i * 4 + chan) *
2828                                                        gs->selector->gs_max_out_vertices * 16 * 4);
2829
2830                         outputs[i].values[chan] =
2831                                 LLVMBuildBitCast(gallivm->builder,
2832                                                  build_intrinsic(gallivm->builder,
2833                                                                  "llvm.SI.buffer.load.dword.i32.i32",
2834                                                                  LLVMInt32TypeInContext(gallivm->context),
2835                                                                  args, 9,
2836                                                                  LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
2837                                                  base->elem_type, "");
2838                 }
2839         }
2840
2841         si_llvm_export_vs(bld_base, outputs, gsinfo->num_outputs);
2842
2843         radeon_llvm_finalize_module(&si_shader_ctx->radeon_bld);
2844
2845         if (dump)
2846                 fprintf(stderr, "Copy Vertex Shader for Geometry Shader:\n\n");
2847
2848         r = si_compile_llvm(sscreen, si_shader_ctx->shader,
2849                             si_shader_ctx->tm, bld_base->base.gallivm->module);
2850
2851         radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
2852
2853         FREE(outputs);
2854         return r;
2855 }
2856
2857 static void si_dump_key(unsigned shader, union si_shader_key *key)
2858 {
2859         int i;
2860
2861         fprintf(stderr, "SHADER KEY\n");
2862
2863         switch (shader) {
2864         case PIPE_SHADER_VERTEX:
2865                 fprintf(stderr, "  instance_divisors = {");
2866                 for (i = 0; i < Elements(key->vs.instance_divisors); i++)
2867                         fprintf(stderr, !i ? "%u" : ", %u",
2868                                 key->vs.instance_divisors[i]);
2869                 fprintf(stderr, "}\n");
2870
2871                 if (key->vs.as_es)
2872                         fprintf(stderr, "  gs_used_inputs = 0x%"PRIx64"\n",
2873                                 key->vs.gs_used_inputs);
2874                 fprintf(stderr, "  as_es = %u\n", key->vs.as_es);
2875                 break;
2876
2877         case PIPE_SHADER_GEOMETRY:
2878                 break;
2879
2880         case PIPE_SHADER_FRAGMENT:
2881                 fprintf(stderr, "  export_16bpc = 0x%X\n", key->ps.export_16bpc);
2882                 fprintf(stderr, "  last_cbuf = %u\n", key->ps.last_cbuf);
2883                 fprintf(stderr, "  color_two_side = %u\n", key->ps.color_two_side);
2884                 fprintf(stderr, "  alpha_func = %u\n", key->ps.alpha_func);
2885                 fprintf(stderr, "  alpha_to_one = %u\n", key->ps.alpha_to_one);
2886                 fprintf(stderr, "  poly_stipple = %u\n", key->ps.poly_stipple);
2887                 break;
2888
2889         default:
2890                 assert(0);
2891         }
2892 }
2893
2894 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
2895                      struct si_shader *shader)
2896 {
2897         struct si_shader_selector *sel = shader->selector;
2898         struct tgsi_token *tokens = sel->tokens;
2899         struct si_shader_context si_shader_ctx;
2900         struct lp_build_tgsi_context * bld_base;
2901         struct tgsi_shader_info stipple_shader_info;
2902         LLVMModuleRef mod;
2903         int r = 0;
2904         bool poly_stipple = sel->type == PIPE_SHADER_FRAGMENT &&
2905                             shader->key.ps.poly_stipple;
2906         bool dump = r600_can_dump_shader(&sscreen->b, sel->tokens);
2907
2908         if (poly_stipple) {
2909                 tokens = util_pstipple_create_fragment_shader(tokens, NULL,
2910                                                 SI_POLY_STIPPLE_SAMPLER);
2911                 tgsi_scan_shader(tokens, &stipple_shader_info);
2912         }
2913
2914         /* Dump TGSI code before doing TGSI->LLVM conversion in case the
2915          * conversion fails. */
2916         if (dump) {
2917                 si_dump_key(sel->type, &shader->key);
2918                 tgsi_dump(tokens, 0);
2919                 si_dump_streamout(&sel->so);
2920         }
2921
2922         assert(shader->nparam == 0);
2923
2924         memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
2925         radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
2926         bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
2927
2928         if (sel->type != PIPE_SHADER_COMPUTE)
2929                 shader->dx10_clamp_mode = true;
2930
2931         if (sel->info.uses_kill)
2932                 shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
2933
2934         shader->uses_instanceid = sel->info.uses_instanceid;
2935         bld_base->info = poly_stipple ? &stipple_shader_info : &sel->info;
2936         bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
2937
2938         bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
2939         bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
2940         bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
2941         bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
2942         bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
2943         bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
2944         bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
2945         bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
2946         bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
2947         bld_base->op_actions[TGSI_OPCODE_TXQ] = txq_action;
2948         bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
2949         bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
2950
2951         bld_base->op_actions[TGSI_OPCODE_DDX].emit = si_llvm_emit_ddxy;
2952         bld_base->op_actions[TGSI_OPCODE_DDY].emit = si_llvm_emit_ddxy;
2953
2954         bld_base->op_actions[TGSI_OPCODE_EMIT].emit = si_llvm_emit_vertex;
2955         bld_base->op_actions[TGSI_OPCODE_ENDPRIM].emit = si_llvm_emit_primitive;
2956
2957         if (HAVE_LLVM >= 0x0306) {
2958                 bld_base->op_actions[TGSI_OPCODE_MAX].emit = build_tgsi_intrinsic_nomem;
2959                 bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.maxnum.f32";
2960                 bld_base->op_actions[TGSI_OPCODE_MIN].emit = build_tgsi_intrinsic_nomem;
2961                 bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.minnum.f32";
2962         }
2963
2964         si_shader_ctx.radeon_bld.load_system_value = declare_system_value;
2965         si_shader_ctx.shader = shader;
2966         si_shader_ctx.type = tgsi_get_processor_type(tokens);
2967         si_shader_ctx.screen = sscreen;
2968         si_shader_ctx.tm = tm;
2969
2970         switch (si_shader_ctx.type) {
2971         case TGSI_PROCESSOR_VERTEX:
2972                 si_shader_ctx.radeon_bld.load_input = declare_input_vs;
2973                 if (shader->key.vs.as_es) {
2974                         bld_base->emit_epilogue = si_llvm_emit_es_epilogue;
2975                 } else {
2976                         bld_base->emit_epilogue = si_llvm_emit_vs_epilogue;
2977                 }
2978                 break;
2979         case TGSI_PROCESSOR_GEOMETRY:
2980                 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = fetch_input_gs;
2981                 bld_base->emit_epilogue = si_llvm_emit_gs_epilogue;
2982                 break;
2983         case TGSI_PROCESSOR_FRAGMENT:
2984                 si_shader_ctx.radeon_bld.load_input = declare_input_fs;
2985                 bld_base->emit_epilogue = si_llvm_emit_fs_epilogue;
2986
2987                 switch (sel->info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]) {
2988                 case TGSI_FS_DEPTH_LAYOUT_GREATER:
2989                         shader->db_shader_control |=
2990                                 S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
2991                         break;
2992                 case TGSI_FS_DEPTH_LAYOUT_LESS:
2993                         shader->db_shader_control |=
2994                                 S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
2995                         break;
2996                 }
2997                 break;
2998         default:
2999                 assert(!"Unsupported shader type");
3000                 return -1;
3001         }
3002
3003         create_meta_data(&si_shader_ctx);
3004         create_function(&si_shader_ctx);
3005         preload_constants(&si_shader_ctx);
3006         preload_samplers(&si_shader_ctx);
3007         preload_streamout_buffers(&si_shader_ctx);
3008         preload_ring_buffers(&si_shader_ctx);
3009
3010         if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
3011                 si_shader_ctx.gs_next_vertex =
3012                         lp_build_alloca(bld_base->base.gallivm,
3013                                         bld_base->uint_bld.elem_type, "");
3014         }
3015
3016         if (!lp_build_tgsi_llvm(bld_base, tokens)) {
3017                 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
3018                 goto out;
3019         }
3020
3021         radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
3022
3023         mod = bld_base->base.gallivm->module;
3024         r = si_compile_llvm(sscreen, shader, tm, mod);
3025         if (r) {
3026                 fprintf(stderr, "LLVM failed to compile shader\n");
3027                 goto out;
3028         }
3029
3030         radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
3031
3032         if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
3033                 shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
3034                 shader->gs_copy_shader->selector = shader->selector;
3035                 shader->gs_copy_shader->key = shader->key;
3036                 si_shader_ctx.shader = shader->gs_copy_shader;
3037                 if ((r = si_generate_gs_copy_shader(sscreen, &si_shader_ctx,
3038                                                     shader, dump))) {
3039                         free(shader->gs_copy_shader);
3040                         shader->gs_copy_shader = NULL;
3041                         goto out;
3042                 }
3043         }
3044
3045 out:
3046         for (int i = 0; i < SI_NUM_CONST_BUFFERS; i++)
3047                 FREE(si_shader_ctx.constants[i]);
3048         if (poly_stipple)
3049                 tgsi_free_tokens(tokens);
3050         return r;
3051 }
3052
3053 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
3054 {
3055         if (shader->gs_copy_shader)
3056                 si_shader_destroy(ctx, shader->gs_copy_shader);
3057
3058         if (shader->scratch_bo)
3059                 r600_resource_reference(&shader->scratch_bo, NULL);
3060
3061         r600_resource_reference(&shader->bo, NULL);
3062
3063         FREE(shader->binary.code);
3064         FREE(shader->binary.relocs);
3065 }