src/gallium/drivers/radeonsi/radeonsi_shader.c

   1
   2 /*
   3  * Copyright 2012 Advanced Micro Devices, Inc.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  *
  24  * Authors:
  25  *      Tom Stellard <thomas.stellard@amd.com>
  26  *      Michel Dänzer <michel.daenzer@amd.com>
  27  *      Christian König <christian.koenig@amd.com>
  28  */
  29
  30 #include "gallivm/lp_bld_tgsi_action.h"
  31 #include "gallivm/lp_bld_const.h"
  32 #include "gallivm/lp_bld_gather.h"
  33 #include "gallivm/lp_bld_intr.h"
  34 #include "gallivm/lp_bld_logic.h"
  35 #include "gallivm/lp_bld_tgsi.h"
  36 #include "gallivm/lp_bld_arit.h"
  37 #include "radeon_llvm.h"
  38 #include "radeon_llvm_emit.h"
  39 #include "tgsi/tgsi_info.h"
  40 #include "tgsi/tgsi_parse.h"
  41 #include "tgsi/tgsi_scan.h"
  42 #include "tgsi/tgsi_dump.h"
  43
  44 #include "radeonsi_pipe.h"
  45 #include "radeonsi_shader.h"
  46 #include "si_state.h"
  47 #include "sid.h"
  48
  49 #include <assert.h>
  50 #include <errno.h>
  51 #include <stdio.h>
  52
  53 struct si_shader_context
  54 {
  55         struct radeon_llvm_context radeon_bld;
  56         struct r600_context *rctx;
  57         struct tgsi_parse_context parse;
  58         struct tgsi_token * tokens;
  59         struct si_pipe_shader *shader;
  60         struct si_shader_key key;
  61         unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
  62         unsigned ninput_emitted;
  63 /*      struct list_head inputs; */
  64 /*      unsigned * input_mappings *//* From TGSI to SI hw */
  65 /*      struct tgsi_shader_info info;*/
  66 };
  67
  68 static struct si_shader_context * si_shader_context(
  69         struct lp_build_tgsi_context * bld_base)
  70 {
  71         return (struct si_shader_context *)bld_base;
  72 }
  73
  74
  75 #define PERSPECTIVE_BASE 0
  76 #define LINEAR_BASE 9
  77
  78 #define SAMPLE_OFFSET 0
  79 #define CENTER_OFFSET 2
  80 #define CENTROID_OFSET 4
  81
  82 #define USE_SGPR_MAX_SUFFIX_LEN 5
  83 #define CONST_ADDR_SPACE 2
  84 #define USER_SGPR_ADDR_SPACE 8
  85
  86 /**
  87  * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
  88  *
  89  * @param offset The offset parameter specifies the number of
  90  * elements to offset, not the number of bytes or dwords.  An element is the
  91  * the type pointed to by the base_ptr parameter (e.g. int is the element of
  92  * an int* pointer)
  93  *
  94  * When LLVM lowers the load instruction, it will convert the element offset
  95  * into a dword offset automatically.
  96  *
  97  */
  98 static LLVMValueRef build_indexed_load(
  99         struct gallivm_state * gallivm,
 100         LLVMValueRef base_ptr,
 101         LLVMValueRef offset)
 102 {
 103         LLVMValueRef computed_ptr = LLVMBuildGEP(
 104                 gallivm->builder, base_ptr, &offset, 1, "");
 105
 106         return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
 107 }
 108
 109 static void declare_input_vs(
 110         struct si_shader_context * si_shader_ctx,
 111         unsigned input_index,
 112         const struct tgsi_full_declaration *decl)
 113 {
 114         LLVMValueRef t_list_ptr;
 115         LLVMValueRef t_offset;
 116         LLVMValueRef t_list;
 117         LLVMValueRef attribute_offset;
 118         LLVMValueRef buffer_index_reg;
 119         LLVMValueRef args[3];
 120         LLVMTypeRef vec4_type;
 121         LLVMValueRef input;
 122         struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
 123         //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
 124         unsigned chan;
 125
 126         /* Load the T list */
 127         t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER);
 128
 129         t_offset = lp_build_const_int32(base->gallivm, input_index);
 130
 131         t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
 132
 133         /* Build the attribute offset */
 134         attribute_offset = lp_build_const_int32(base->gallivm, 0);
 135
 136         /* Load the buffer index, which is always stored in VGPR0
 137          * for Vertex Shaders */
 138         buffer_index_reg = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_INDEX);
 139
 140         vec4_type = LLVMVectorType(base->elem_type, 4);
 141         args[0] = t_list;
 142         args[1] = attribute_offset;
 143         args[2] = buffer_index_reg;
 144         input = lp_build_intrinsic(base->gallivm->builder,
 145                 "llvm.SI.vs.load.input", vec4_type, args, 3);
 146
 147         /* Break up the vec4 into individual components */
 148         for (chan = 0; chan < 4; chan++) {
 149                 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
 150                 /* XXX: Use a helper function for this.  There is one in
 151                  * tgsi_llvm.c. */
 152                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
 153                                 LLVMBuildExtractElement(base->gallivm->builder,
 154                                 input, llvm_chan, "");
 155         }
 156 }
 157
 158 static void declare_input_fs(
 159         struct si_shader_context * si_shader_ctx,
 160         unsigned input_index,
 161         const struct tgsi_full_declaration *decl)
 162 {
 163         struct si_shader *shader = &si_shader_ctx->shader->shader;
 164         struct lp_build_context * base =
 165                                 &si_shader_ctx->radeon_bld.soa.bld_base.base;
 166         struct gallivm_state * gallivm = base->gallivm;
 167         LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
 168         LLVMValueRef main_fn = si_shader_ctx->radeon_bld.main_fn;
 169
 170         LLVMValueRef interp_param;
 171         const char * intr_name;
 172
 173         /* This value is:
 174          * [15:0] NewPrimMask (Bit mask for each quad.  It is set it the
 175          *                     quad begins a new primitive.  Bit 0 always needs
 176          *                     to be unset)
 177          * [32:16] ParamOffset
 178          *
 179          */
 180         LLVMValueRef params = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_PRIM_MASK);
 181         LLVMValueRef attr_number;
 182
 183         unsigned chan;
 184
 185         if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
 186                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 187                         unsigned soa_index =
 188                                 radeon_llvm_reg_index_soa(input_index, chan);
 189                         si_shader_ctx->radeon_bld.inputs[soa_index] =
 190                                 LLVMGetParam(main_fn, SI_PARAM_POS_X_FLOAT + chan);
 191
 192                         if (chan == 3)
 193                                 /* RCP for fragcoord.w */
 194                                 si_shader_ctx->radeon_bld.inputs[soa_index] =
 195                                         LLVMBuildFDiv(gallivm->builder,
 196                                                       lp_build_const_float(gallivm, 1.0f),
 197                                                       si_shader_ctx->radeon_bld.inputs[soa_index],
 198                                                       "");
 199                 }
 200                 return;
 201         }
 202
 203         if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
 204                 LLVMValueRef face, is_face_positive;
 205
 206                 face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
 207
 208                 is_face_positive = LLVMBuildFCmp(gallivm->builder,
 209                                                  LLVMRealUGT, face,
 210                                                  lp_build_const_float(gallivm, 0.0f),
 211                                                  "");
 212
 213                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 0)] =
 214                         LLVMBuildSelect(gallivm->builder,
 215                                         is_face_positive,
 216                                         lp_build_const_float(gallivm, 1.0f),
 217                                         lp_build_const_float(gallivm, 0.0f),
 218                                         "");
 219                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 1)] =
 220                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 2)] =
 221                         lp_build_const_float(gallivm, 0.0f);
 222                 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, 3)] =
 223                         lp_build_const_float(gallivm, 1.0f);
 224
 225                 return;
 226         }
 227
 228         shader->input[input_index].param_offset = shader->ninterp++;
 229         attr_number = lp_build_const_int32(gallivm,
 230                                            shader->input[input_index].param_offset);
 231
 232         /* XXX: Handle all possible interpolation modes */
 233         switch (decl->Interp.Interpolate) {
 234         case TGSI_INTERPOLATE_COLOR:
 235                 if (si_shader_ctx->key.flatshade) {
 236                         interp_param = 0;
 237                 } else {
 238                         if (decl->Interp.Centroid)
 239                                 interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
 240                         else
 241                                 interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
 242                 }
 243                 break;
 244         case TGSI_INTERPOLATE_CONSTANT:
 245                 interp_param = 0;
 246                 break;
 247         case TGSI_INTERPOLATE_LINEAR:
 248                 if (decl->Interp.Centroid)
 249                         interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTROID);
 250                 else
 251                         interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
 252                 break;
 253         case TGSI_INTERPOLATE_PERSPECTIVE:
 254                 if (decl->Interp.Centroid)
 255                         interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTROID);
 256                 else
 257                         interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_CENTER);
 258                 break;
 259         default:
 260                 fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
 261                 return;
 262         }
 263
 264         if (!si_shader_ctx->ninput_emitted++) {
 265                 /* Enable whole quad mode */
 266                 lp_build_intrinsic(gallivm->builder,
 267                                    "llvm.SI.wqm",
 268                                    LLVMVoidTypeInContext(gallivm->context),
 269                                    NULL, 0);
 270         }
 271
 272         intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
 273
 274         /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
 275         if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
 276             si_shader_ctx->key.color_two_side) {
 277                 LLVMValueRef args[4];
 278                 LLVMValueRef face, is_face_positive;
 279                 LLVMValueRef back_attr_number =
 280                         lp_build_const_int32(gallivm,
 281                                              shader->input[input_index].param_offset + 1);
 282
 283                 face = LLVMGetParam(main_fn, SI_PARAM_FRONT_FACE);
 284
 285                 is_face_positive = LLVMBuildFCmp(gallivm->builder,
 286                                                  LLVMRealUGT, face,
 287                                                  lp_build_const_float(gallivm, 0.0f),
 288                                                  "");
 289
 290                 args[2] = params;
 291                 args[3] = interp_param;
 292                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 293                         LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 294                         unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
 295                         LLVMValueRef front, back;
 296
 297                         args[0] = llvm_chan;
 298                         args[1] = attr_number;
 299                         front = build_intrinsic(base->gallivm->builder, intr_name,
 300                                                 input_type, args, args[3] ? 4 : 3,
 301                                                 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
 302
 303                         args[1] = back_attr_number;
 304                         back = build_intrinsic(base->gallivm->builder, intr_name,
 305                                                input_type, args, args[3] ? 4 : 3,
 306                                                LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
 307
 308                         si_shader_ctx->radeon_bld.inputs[soa_index] =
 309                                 LLVMBuildSelect(gallivm->builder,
 310                                                 is_face_positive,
 311                                                 front,
 312                                                 back,
 313                                                 "");
 314                 }
 315
 316                 shader->ninterp++;
 317         } else {
 318                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 319                         LLVMValueRef args[4];
 320                         LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
 321                         unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
 322                         args[0] = llvm_chan;
 323                         args[1] = attr_number;
 324                         args[2] = params;
 325                         args[3] = interp_param;
 326                         si_shader_ctx->radeon_bld.inputs[soa_index] =
 327                                 build_intrinsic(base->gallivm->builder, intr_name,
 328                                                 input_type, args, args[3] ? 4 : 3,
 329                                                 LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
 330                 }
 331         }
 332 }
 333
 334 static void declare_input(
 335         struct radeon_llvm_context * radeon_bld,
 336         unsigned input_index,
 337         const struct tgsi_full_declaration *decl)
 338 {
 339         struct si_shader_context * si_shader_ctx =
 340                                 si_shader_context(&radeon_bld->soa.bld_base);
 341         if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
 342                 declare_input_vs(si_shader_ctx, input_index, decl);
 343         } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
 344                 declare_input_fs(si_shader_ctx, input_index, decl);
 345         } else {
 346                 fprintf(stderr, "Warning: Unsupported shader type,\n");
 347         }
 348 }
 349
 350 static LLVMValueRef fetch_constant(
 351         struct lp_build_tgsi_context * bld_base,
 352         const struct tgsi_full_src_register *reg,
 353         enum tgsi_opcode_type type,
 354         unsigned swizzle)
 355 {
 356         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 357         struct lp_build_context * base = &bld_base->base;
 358
 359         LLVMValueRef ptr;
 360         LLVMValueRef args[2];
 361         LLVMValueRef result;
 362
 363         if (swizzle == LP_CHAN_ALL) {
 364                 unsigned chan;
 365                 LLVMValueRef values[4];
 366                 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan)
 367                         values[chan] = fetch_constant(bld_base, reg, type, chan);
 368
 369                 return lp_build_gather_values(bld_base->base.gallivm, values, 4);
 370         }
 371
 372         /* Load the resource descriptor */
 373         ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
 374         args[0] = build_indexed_load(base->gallivm, ptr, bld_base->uint_bld.zero);
 375
 376         args[1] = lp_build_const_int32(base->gallivm, (reg->Register.Index * 4 + swizzle) * 4);
 377         if (reg->Register.Indirect) {
 378                 const struct tgsi_ind_register *ireg = &reg->Indirect;
 379                 LLVMValueRef addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
 380                 LLVMValueRef idx = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
 381                 idx = lp_build_mul_imm(&bld_base->uint_bld, idx, 16);
 382                 args[1] = lp_build_add(&bld_base->uint_bld, idx, args[1]);
 383         }
 384
 385         result = build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type,
 386                                  args, 2, LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
 387
 388         return bitcast(bld_base, type, result);
 389 }
 390
 391 /* Initialize arguments for the shader export intrinsic */
 392 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base,
 393                                      struct tgsi_full_declaration *d,
 394                                      unsigned index,
 395                                      unsigned target,
 396                                      LLVMValueRef *args)
 397 {
 398         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 399         struct lp_build_context *uint =
 400                                 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 401         struct lp_build_context *base = &bld_base->base;
 402         unsigned compressed = 0;
 403         unsigned chan;
 404
 405         if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
 406                 int cbuf = target - V_008DFC_SQ_EXP_MRT;
 407
 408                 if (cbuf >= 0 && cbuf < 8) {
 409                         compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1;
 410
 411                         if (compressed)
 412                                 si_shader_ctx->shader->spi_shader_col_format |=
 413                                         V_028714_SPI_SHADER_FP16_ABGR << (4 * cbuf);
 414                         else
 415                                 si_shader_ctx->shader->spi_shader_col_format |=
 416                                         V_028714_SPI_SHADER_32_ABGR << (4 * cbuf);
 417                 }
 418         }
 419
 420         if (compressed) {
 421                 /* Pixel shader needs to pack output values before export */
 422                 for (chan = 0; chan < 2; chan++ ) {
 423                         LLVMValueRef *out_ptr =
 424                                 si_shader_ctx->radeon_bld.soa.outputs[index];
 425                         args[0] = LLVMBuildLoad(base->gallivm->builder,
 426                                                 out_ptr[2 * chan], "");
 427                         args[1] = LLVMBuildLoad(base->gallivm->builder,
 428                                                 out_ptr[2 * chan + 1], "");
 429                         args[chan + 5] =
 430                                 build_intrinsic(base->gallivm->builder,
 431                                                 "llvm.SI.packf16",
 432                                                 LLVMInt32TypeInContext(base->gallivm->context),
 433                                                 args, 2,
 434                                                 LLVMReadNoneAttribute | LLVMNoUnwindAttribute);
 435                         args[chan + 7] = args[chan + 5] =
 436                                 LLVMBuildBitCast(base->gallivm->builder,
 437                                                  args[chan + 5],
 438                                                  LLVMFloatTypeInContext(base->gallivm->context),
 439                                                  "");
 440                 }
 441
 442                 /* Set COMPR flag */
 443                 args[4] = uint->one;
 444         } else {
 445                 for (chan = 0; chan < 4; chan++ ) {
 446                         LLVMValueRef out_ptr =
 447                                 si_shader_ctx->radeon_bld.soa.outputs[index][chan];
 448                         /* +5 because the first output value will be
 449                          * the 6th argument to the intrinsic. */
 450                         args[chan + 5] = LLVMBuildLoad(base->gallivm->builder,
 451                                                        out_ptr, "");
 452                 }
 453
 454                 /* Clear COMPR flag */
 455                 args[4] = uint->zero;
 456         }
 457
 458         /* XXX: This controls which components of the output
 459          * registers actually get exported. (e.g bit 0 means export
 460          * X component, bit 1 means export Y component, etc.)  I'm
 461          * hard coding this to 0xf for now.  In the future, we might
 462          * want to do something else. */
 463         args[0] = lp_build_const_int32(base->gallivm, 0xf);
 464
 465         /* Specify whether the EXEC mask represents the valid mask */
 466         args[1] = uint->zero;
 467
 468         /* Specify whether this is the last export */
 469         args[2] = uint->zero;
 470
 471         /* Specify the target we are exporting */
 472         args[3] = lp_build_const_int32(base->gallivm, target);
 473
 474         /* XXX: We probably need to keep track of the output
 475          * values, so we know what we are passing to the next
 476          * stage. */
 477 }
 478
 479 static void si_alpha_test(struct lp_build_tgsi_context *bld_base,
 480                           unsigned index)
 481 {
 482         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 483         struct gallivm_state *gallivm = bld_base->base.gallivm;
 484
 485         if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) {
 486                 LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3];
 487                 LLVMValueRef alpha_pass =
 488                         lp_build_cmp(&bld_base->base,
 489                                      si_shader_ctx->key.alpha_func,
 490                                      LLVMBuildLoad(gallivm->builder, out_ptr, ""),
 491                                      lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref));
 492                 LLVMValueRef arg =
 493                         lp_build_select(&bld_base->base,
 494                                         alpha_pass,
 495                                         lp_build_const_float(gallivm, 1.0f),
 496                                         lp_build_const_float(gallivm, -1.0f));
 497
 498                 build_intrinsic(gallivm->builder,
 499                                 "llvm.AMDGPU.kill",
 500                                 LLVMVoidTypeInContext(gallivm->context),
 501                                 &arg, 1, 0);
 502         } else {
 503                 build_intrinsic(gallivm->builder,
 504                                 "llvm.AMDGPU.kilp",
 505                                 LLVMVoidTypeInContext(gallivm->context),
 506                                 NULL, 0, 0);
 507         }
 508 }
 509
 510 /* XXX: This is partially implemented for VS only at this point.  It is not complete */
 511 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
 512 {
 513         struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
 514         struct si_shader * shader = &si_shader_ctx->shader->shader;
 515         struct lp_build_context * base = &bld_base->base;
 516         struct lp_build_context * uint =
 517                                 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
 518         struct tgsi_parse_context *parse = &si_shader_ctx->parse;
 519         LLVMValueRef args[9];
 520         LLVMValueRef last_args[9] = { 0 };
 521         unsigned color_count = 0;
 522         unsigned param_count = 0;
 523         int depth_index = -1, stencil_index = -1;
 524
 525         while (!tgsi_parse_end_of_tokens(parse)) {
 526                 struct tgsi_full_declaration *d =
 527                                         &parse->FullToken.FullDeclaration;
 528                 unsigned target;
 529                 unsigned index;
 530                 int i;
 531
 532                 tgsi_parse_token(parse);
 533
 534                 if (parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_PROPERTY &&
 535                     parse->FullToken.FullProperty.Property.PropertyName ==
 536                     TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS)
 537                         shader->fs_write_all = TRUE;
 538
 539                 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
 540                         continue;
 541
 542                 switch (d->Declaration.File) {
 543                 case TGSI_FILE_INPUT:
 544                         i = shader->ninput++;
 545                         shader->input[i].name = d->Semantic.Name;
 546                         shader->input[i].sid = d->Semantic.Index;
 547                         shader->input[i].interpolate = d->Interp.Interpolate;
 548                         shader->input[i].centroid = d->Interp.Centroid;
 549                         continue;
 550
 551                 case TGSI_FILE_OUTPUT:
 552                         i = shader->noutput++;
 553                         shader->output[i].name = d->Semantic.Name;
 554                         shader->output[i].sid = d->Semantic.Index;
 555                         shader->output[i].interpolate = d->Interp.Interpolate;
 556                         break;
 557
 558                 default:
 559                         continue;
 560                 }
 561
 562                 for (index = d->Range.First; index <= d->Range.Last; index++) {
 563                         /* Select the correct target */
 564                         switch(d->Semantic.Name) {
 565                         case TGSI_SEMANTIC_PSIZE:
 566                                 target = V_008DFC_SQ_EXP_POS;
 567                                 break;
 568                         case TGSI_SEMANTIC_POSITION:
 569                                 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
 570                                         target = V_008DFC_SQ_EXP_POS;
 571                                         break;
 572                                 } else {
 573                                         depth_index = index;
 574                                         continue;
 575                                 }
 576                         case TGSI_SEMANTIC_STENCIL:
 577                                 stencil_index = index;
 578                                 continue;
 579                         case TGSI_SEMANTIC_COLOR:
 580                                 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
 581                         case TGSI_SEMANTIC_BCOLOR:
 582                                         target = V_008DFC_SQ_EXP_PARAM + param_count;
 583                                         shader->output[i].param_offset = param_count;
 584                                         param_count++;
 585                                 } else {
 586                                         target = V_008DFC_SQ_EXP_MRT + color_count;
 587                                         if (color_count == 0 &&
 588                                             si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS)
 589                                                 si_alpha_test(bld_base, index);
 590
 591                                         color_count++;
 592                                 }
 593                                 break;
 594                         case TGSI_SEMANTIC_FOG:
 595                         case TGSI_SEMANTIC_GENERIC:
 596                                 target = V_008DFC_SQ_EXP_PARAM + param_count;
 597                                 shader->output[i].param_offset = param_count;
 598                                 param_count++;
 599                                 break;
 600                         default:
 601                                 target = 0;
 602                                 fprintf(stderr,
 603                                         "Warning: SI unhandled output type:%d\n",
 604                                         d->Semantic.Name);
 605                         }
 606
 607                         si_llvm_init_export_args(bld_base, d, index, target, args);
 608
 609                         if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
 610                             (d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
 611                             (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
 612                                 if (last_args[0]) {
 613                                         lp_build_intrinsic(base->gallivm->builder,
 614                                                            "llvm.SI.export",
 615                                                            LLVMVoidTypeInContext(base->gallivm->context),
 616                                                            last_args, 9);
 617                                 }
 618
 619                                 memcpy(last_args, args, sizeof(args));
 620                         } else {
 621                                 lp_build_intrinsic(base->gallivm->builder,
 622                                                    "llvm.SI.export",
 623                                                    LLVMVoidTypeInContext(base->gallivm->context),
 624                                                    args, 9);
 625                         }
 626
 627                 }
 628         }
 629
 630         if (depth_index >= 0 || stencil_index >= 0) {
 631                 LLVMValueRef out_ptr;
 632                 unsigned mask = 0;
 633
 634                 /* Specify the target we are exporting */
 635                 args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRTZ);
 636
 637                 if (depth_index >= 0) {
 638                         out_ptr = si_shader_ctx->radeon_bld.soa.outputs[depth_index][2];
 639                         args[5] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
 640                         mask |= 0x1;
 641
 642                         if (stencil_index < 0) {
 643                                 args[6] =
 644                                 args[7] =
 645                                 args[8] = args[5];
 646                         }
 647                 }
 648
 649                 if (stencil_index >= 0) {
 650                         out_ptr = si_shader_ctx->radeon_bld.soa.outputs[stencil_index][1];
 651                         args[7] =
 652                         args[8] =
 653                         args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, "");
 654                         mask |= 0x2;
 655
 656                         if (depth_index < 0)
 657                                 args[5] = args[6];
 658                 }
 659
 660                 /* Specify which components to enable */
 661                 args[0] = lp_build_const_int32(base->gallivm, mask);
 662
 663                 args[1] =
 664                 args[2] =
 665                 args[4] = uint->zero;
 666
 667                 if (last_args[0])
 668                         lp_build_intrinsic(base->gallivm->builder,
 669                                            "llvm.SI.export",
 670                                            LLVMVoidTypeInContext(base->gallivm->context),
 671                                            args, 9);
 672                 else
 673                         memcpy(last_args, args, sizeof(args));
 674         }
 675
 676         if (!last_args[0]) {
 677                 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
 678
 679                 /* Specify which components to enable */
 680                 last_args[0] = lp_build_const_int32(base->gallivm, 0x0);
 681
 682                 /* Specify the target we are exporting */
 683                 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
 684
 685                 /* Set COMPR flag to zero to export data as 32-bit */
 686                 last_args[4] = uint->zero;
 687
 688                 /* dummy bits */
 689                 last_args[5]= uint->zero;
 690                 last_args[6]= uint->zero;
 691                 last_args[7]= uint->zero;
 692                 last_args[8]= uint->zero;
 693
 694                 si_shader_ctx->shader->spi_shader_col_format |=
 695                         V_028714_SPI_SHADER_32_ABGR;
 696         }
 697
 698         /* Specify whether the EXEC mask represents the valid mask */
 699         last_args[1] = lp_build_const_int32(base->gallivm,
 700                                             si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
 701
 702         if (shader->fs_write_all && shader->nr_cbufs > 1) {
 703                 int i;
 704
 705                 /* Specify that this is not yet the last export */
 706                 last_args[2] = lp_build_const_int32(base->gallivm, 0);
 707
 708                 for (i = 1; i < shader->nr_cbufs; i++) {
 709                         /* Specify the target we are exporting */
 710                         last_args[3] = lp_build_const_int32(base->gallivm,
 711                                                             V_008DFC_SQ_EXP_MRT + i);
 712
 713                         lp_build_intrinsic(base->gallivm->builder,
 714                                            "llvm.SI.export",
 715                                            LLVMVoidTypeInContext(base->gallivm->context),
 716                                            last_args, 9);
 717
 718                         si_shader_ctx->shader->spi_shader_col_format |=
 719                                 si_shader_ctx->shader->spi_shader_col_format << 4;
 720                 }
 721
 722                 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
 723         }
 724
 725         /* Specify that this is the last export */
 726         last_args[2] = lp_build_const_int32(base->gallivm, 1);
 727
 728         lp_build_intrinsic(base->gallivm->builder,
 729                            "llvm.SI.export",
 730                            LLVMVoidTypeInContext(base->gallivm->context),
 731                            last_args, 9);
 732
 733 /* XXX: Look up what this function does */
 734 /*              ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
 735 }
 736
 737 static void tex_fetch_args(
 738         struct lp_build_tgsi_context * bld_base,
 739         struct lp_build_emit_data * emit_data)
 740 {
 741         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
 742         struct gallivm_state *gallivm = bld_base->base.gallivm;
 743         const struct tgsi_full_instruction * inst = emit_data->inst;
 744         unsigned opcode = inst->Instruction.Opcode;
 745         unsigned target = inst->Texture.Texture;
 746         LLVMValueRef ptr;
 747         LLVMValueRef offset;
 748         LLVMValueRef coords[4];
 749         LLVMValueRef address[16];
 750         unsigned count = 0;
 751         unsigned chan;
 752
 753         /* WriteMask */
 754         /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/
 755         emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf);
 756
 757         /* Fetch and project texture coordinates */
 758         coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
 759         for (chan = 0; chan < 3; chan++ ) {
 760                 coords[chan] = lp_build_emit_fetch(bld_base,
 761                                                    emit_data->inst, 0,
 762                                                    chan);
 763                 if (opcode == TGSI_OPCODE_TXP)
 764                         coords[chan] = lp_build_emit_llvm_binary(bld_base,
 765                                                                  TGSI_OPCODE_DIV,
 766                                                                  coords[chan],
 767                                                                  coords[3]);
 768         }
 769
 770         if (opcode == TGSI_OPCODE_TXP)
 771                 coords[3] = bld_base->base.one;
 772
 773         /* Pack LOD bias value */
 774         if (opcode == TGSI_OPCODE_TXB)
 775                 address[count++] = coords[3];
 776
 777         if ((target == TGSI_TEXTURE_CUBE || target == TGSI_TEXTURE_SHADOWCUBE) &&
 778             opcode != TGSI_OPCODE_TXQ)
 779                 radeon_llvm_emit_prepare_cube_coords(bld_base, emit_data, coords);
 780
 781         /* Pack depth comparison value */
 782         switch (target) {
 783         case TGSI_TEXTURE_SHADOW1D:
 784         case TGSI_TEXTURE_SHADOW1D_ARRAY:
 785         case TGSI_TEXTURE_SHADOW2D:
 786         case TGSI_TEXTURE_SHADOWRECT:
 787                 address[count++] = coords[2];
 788                 break;
 789         case TGSI_TEXTURE_SHADOWCUBE:
 790         case TGSI_TEXTURE_SHADOW2D_ARRAY:
 791                 address[count++] = coords[3];
 792                 break;
 793         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
 794                 address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 795         }
 796
 797         /* Pack texture coordinates */
 798         address[count++] = coords[0];
 799         switch (target) {
 800         case TGSI_TEXTURE_2D:
 801         case TGSI_TEXTURE_2D_ARRAY:
 802         case TGSI_TEXTURE_3D:
 803         case TGSI_TEXTURE_CUBE:
 804         case TGSI_TEXTURE_RECT:
 805         case TGSI_TEXTURE_SHADOW2D:
 806         case TGSI_TEXTURE_SHADOWRECT:
 807         case TGSI_TEXTURE_SHADOW2D_ARRAY:
 808         case TGSI_TEXTURE_SHADOWCUBE:
 809         case TGSI_TEXTURE_2D_MSAA:
 810         case TGSI_TEXTURE_2D_ARRAY_MSAA:
 811         case TGSI_TEXTURE_CUBE_ARRAY:
 812         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
 813                 address[count++] = coords[1];
 814         }
 815         switch (target) {
 816         case TGSI_TEXTURE_3D:
 817         case TGSI_TEXTURE_CUBE:
 818         case TGSI_TEXTURE_SHADOWCUBE:
 819         case TGSI_TEXTURE_CUBE_ARRAY:
 820         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
 821                 address[count++] = coords[2];
 822         }
 823
 824         /* Pack array slice */
 825         switch (target) {
 826         case TGSI_TEXTURE_1D_ARRAY:
 827                 address[count++] = coords[1];
 828         }
 829         switch (target) {
 830         case TGSI_TEXTURE_2D_ARRAY:
 831         case TGSI_TEXTURE_2D_ARRAY_MSAA:
 832         case TGSI_TEXTURE_SHADOW2D_ARRAY:
 833                 address[count++] = coords[2];
 834         }
 835         switch (target) {
 836         case TGSI_TEXTURE_CUBE_ARRAY:
 837         case TGSI_TEXTURE_SHADOW1D_ARRAY:
 838         case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
 839                 address[count++] = coords[3];
 840         }
 841
 842         /* Pack LOD */
 843         if (opcode == TGSI_OPCODE_TXL)
 844                 address[count++] = coords[3];
 845
 846         if (count > 16) {
 847                 assert(!"Cannot handle more than 16 texture address parameters");
 848                 count = 16;
 849         }
 850
 851         for (chan = 0; chan < count; chan++ ) {
 852                 address[chan] = LLVMBuildBitCast(gallivm->builder,
 853                                                  address[chan],
 854                                                  LLVMInt32TypeInContext(gallivm->context),
 855                                                  "");
 856         }
 857
 858         /* Pad to power of two vector */
 859         while (count < util_next_power_of_two(count))
 860                 address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
 861
 862         emit_data->args[1] = lp_build_gather_values(gallivm, address, count);
 863
 864         /* Resource */
 865         ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
 866         offset = lp_build_const_int32(bld_base->base.gallivm,
 867                                   emit_data->inst->Src[1].Register.Index);
 868         emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
 869                                                 ptr, offset);
 870
 871         /* Sampler */
 872         ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_SAMPLER);
 873         offset = lp_build_const_int32(bld_base->base.gallivm,
 874                                   emit_data->inst->Src[1].Register.Index);
 875         emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
 876                                                 ptr, offset);
 877
 878         /* Dimensions */
 879         emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, target);
 880
 881         emit_data->arg_count = 5;
 882         /* XXX: To optimize, we could use a float or v2f32, if the last bits of
 883          * the writemask are clear */
 884         emit_data->dst_type = LLVMVectorType(
 885                         LLVMFloatTypeInContext(bld_base->base.gallivm->context),
 886                         4);
 887 }
 888
 889 static void build_tex_intrinsic(const struct lp_build_tgsi_action * action,
 890                                 struct lp_build_tgsi_context * bld_base,
 891                                 struct lp_build_emit_data * emit_data)
 892 {
 893         struct lp_build_context * base = &bld_base->base;
 894         char intr_name[23];
 895
 896         sprintf(intr_name, "%sv%ui32", action->intr_name,
 897                 LLVMGetVectorSize(LLVMTypeOf(emit_data->args[1])));
 898
 899         emit_data->output[emit_data->chan] = lp_build_intrinsic(
 900                 base->gallivm->builder, intr_name, emit_data->dst_type,
 901                 emit_data->args, emit_data->arg_count);
 902 }
 903
 904 static const struct lp_build_tgsi_action tex_action = {
 905         .fetch_args = tex_fetch_args,
 906         .emit = build_tex_intrinsic,
 907         .intr_name = "llvm.SI.sample."
 908 };
 909
 910 static const struct lp_build_tgsi_action txb_action = {
 911         .fetch_args = tex_fetch_args,
 912         .emit = build_tex_intrinsic,
 913         .intr_name = "llvm.SI.sampleb."
 914 };
 915
 916 static const struct lp_build_tgsi_action txl_action = {
 917         .fetch_args = tex_fetch_args,
 918         .emit = build_tex_intrinsic,
 919         .intr_name = "llvm.SI.samplel."
 920 };
 921
 922 static void create_function(struct si_shader_context *si_shader_ctx)
 923 {
 924         struct gallivm_state *gallivm = si_shader_ctx->radeon_bld.soa.bld_base.base.gallivm;
 925         LLVMTypeRef params[20], f32, i8, i32, v2i32, v3i32;
 926         unsigned i;
 927
 928         i8 = LLVMInt8TypeInContext(gallivm->context);
 929         i32 = LLVMInt32TypeInContext(gallivm->context);
 930         f32 = LLVMFloatTypeInContext(gallivm->context);
 931         v2i32 = LLVMVectorType(i32, 2);
 932         v3i32 = LLVMVectorType(i32, 3);
 933
 934         params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), CONST_ADDR_SPACE);
 935         params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST];
 936         params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE);
 937
 938         if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
 939                 params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_SAMPLER];
 940                 params[SI_PARAM_VERTEX_INDEX] = i32;
 941                 radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 5);
 942
 943         } else {
 944                 params[SI_PARAM_PRIM_MASK] = i32;
 945                 params[SI_PARAM_PERSP_SAMPLE] = v2i32;
 946                 params[SI_PARAM_PERSP_CENTER] = v2i32;
 947                 params[SI_PARAM_PERSP_CENTROID] = v2i32;
 948                 params[SI_PARAM_PERSP_PULL_MODEL] = v3i32;
 949                 params[SI_PARAM_LINEAR_SAMPLE] = v2i32;
 950                 params[SI_PARAM_LINEAR_CENTER] = v2i32;
 951                 params[SI_PARAM_LINEAR_CENTROID] = v2i32;
 952                 params[SI_PARAM_LINE_STIPPLE_TEX] = f32;
 953                 params[SI_PARAM_POS_X_FLOAT] = f32;
 954                 params[SI_PARAM_POS_Y_FLOAT] = f32;
 955                 params[SI_PARAM_POS_Z_FLOAT] = f32;
 956                 params[SI_PARAM_POS_W_FLOAT] = f32;
 957                 params[SI_PARAM_FRONT_FACE] = f32;
 958                 params[SI_PARAM_ANCILLARY] = f32;
 959                 params[SI_PARAM_SAMPLE_COVERAGE] = f32;
 960                 params[SI_PARAM_POS_FIXED_PT] = f32;
 961                 radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 20);
 962         }
 963
 964         radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
 965         for (i = SI_PARAM_CONST; i <= SI_PARAM_VERTEX_BUFFER; ++i) {
 966                 LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
 967                 LLVMAddAttribute(P, LLVMInRegAttribute);
 968         }
 969 }
 970
 971 int si_pipe_shader_create(
 972         struct pipe_context *ctx,
 973         struct si_pipe_shader *shader,
 974         struct si_shader_key key)
 975 {
 976         struct r600_context *rctx = (struct r600_context*)ctx;
 977         struct si_pipe_shader_selector *sel = shader->selector;
 978         struct si_shader_context si_shader_ctx;
 979         struct tgsi_shader_info shader_info;
 980         struct lp_build_tgsi_context * bld_base;
 981         LLVMModuleRef mod;
 982         unsigned char * inst_bytes;
 983         unsigned inst_byte_count;
 984         unsigned i;
 985         uint32_t *ptr;
 986         bool dump;
 987
 988         dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE);
 989
 990         assert(shader->shader.noutput == 0);
 991         assert(shader->shader.ninterp == 0);
 992         assert(shader->shader.ninput == 0);
 993
 994         memset(&si_shader_ctx, 0, sizeof(si_shader_ctx));
 995         radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
 996         bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
 997
 998         tgsi_scan_shader(sel->tokens, &shader_info);
 999         shader->shader.uses_kill = shader_info.uses_kill;
1000         bld_base->info = &shader_info;
1001         bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
1002         bld_base->emit_epilogue = si_llvm_emit_epilogue;
1003
1004         bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
1005         bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
1006         bld_base->op_actions[TGSI_OPCODE_TXL] = txl_action;
1007         bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
1008
1009         si_shader_ctx.radeon_bld.load_input = declare_input;
1010         si_shader_ctx.tokens = sel->tokens;
1011         tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
1012         si_shader_ctx.shader = shader;
1013         si_shader_ctx.key = key;
1014         si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
1015         si_shader_ctx.rctx = rctx;
1016
1017         create_function(&si_shader_ctx);
1018
1019         shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs;
1020
1021         /* Dump TGSI code before doing TGSI->LLVM conversion in case the
1022          * conversion fails. */
1023         if (dump) {
1024                 tgsi_dump(sel->tokens, 0);
1025         }
1026
1027         if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
1028                 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
1029                 return -EINVAL;
1030         }
1031
1032         radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
1033
1034         mod = bld_base->base.gallivm->module;
1035         if (dump) {
1036                 LLVMDumpModule(mod);
1037         }
1038         radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump);
1039         if (dump) {
1040                 fprintf(stderr, "SI CODE:\n");
1041                 for (i = 0; i < inst_byte_count; i+=4 ) {
1042                         fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
1043                                 inst_bytes[i + 2], inst_bytes[i + 1],
1044                                 inst_bytes[i]);
1045                 }
1046         }
1047
1048         shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes);
1049         shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4));
1050         shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8));
1051
1052         radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
1053         tgsi_parse_free(&si_shader_ctx.parse);
1054
1055         /* copy new shader */
1056         si_resource_reference(&shader->bo, NULL);
1057         shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE,
1058                                                inst_byte_count - 12);
1059         if (shader->bo == NULL) {
1060                 return -ENOMEM;
1061         }
1062
1063         ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE);
1064         if (0 /*R600_BIG_ENDIAN*/) {
1065                 for (i = 0; i < (inst_byte_count-12)/4; ++i) {
1066                         ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4));
1067                 }
1068         } else {
1069                 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
1070         }
1071         rctx->ws->buffer_unmap(shader->bo->cs_buf);
1072
1073         free(inst_bytes);
1074
1075         return 0;
1076 }
1077
1078 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
1079 {
1080         si_resource_reference(&shader->bo, NULL);
1081 }