src/amd/common/ac_llvm_util.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sub license, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  13  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  14  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  15  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  16  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  17  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  18  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  19  *
  20  * The above copyright notice and this permission notice (including the
  21  * next paragraph) shall be included in all copies or substantial portions
  22  * of the Software.
  23  *
  24  */
  25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
  26 #include "ac_llvm_util.h"
  27
  28 #include <llvm-c/Core.h>
  29
  30 #include "c11/threads.h"
  31
  32 #include <assert.h>
  33 #include <stdio.h>
  34
  35 #include "util/bitscan.h"
  36 #include "util/macros.h"
  37
  38 static void ac_init_llvm_target()
  39 {
  40 #if HAVE_LLVM < 0x0307
  41         LLVMInitializeR600TargetInfo();
  42         LLVMInitializeR600Target();
  43         LLVMInitializeR600TargetMC();
  44         LLVMInitializeR600AsmPrinter();
  45 #else
  46         LLVMInitializeAMDGPUTargetInfo();
  47         LLVMInitializeAMDGPUTarget();
  48         LLVMInitializeAMDGPUTargetMC();
  49         LLVMInitializeAMDGPUAsmPrinter();
  50 #endif
  51 }
  52
  53 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
  54
  55 static LLVMTargetRef ac_get_llvm_target(const char *triple)
  56 {
  57         LLVMTargetRef target = NULL;
  58         char *err_message = NULL;
  59
  60         call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
  61
  62         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
  63                 fprintf(stderr, "Cannot find target for triple %s ", triple);
  64                 if (err_message) {
  65                         fprintf(stderr, "%s\n", err_message);
  66                 }
  67                 LLVMDisposeMessage(err_message);
  68                 return NULL;
  69         }
  70         return target;
  71 }
  72
  73 static const char *ac_get_llvm_processor_name(enum radeon_family family)
  74 {
  75         switch (family) {
  76         case CHIP_TAHITI:
  77                 return "tahiti";
  78         case CHIP_PITCAIRN:
  79                 return "pitcairn";
  80         case CHIP_VERDE:
  81                 return "verde";
  82         case CHIP_OLAND:
  83                 return "oland";
  84         case CHIP_HAINAN:
  85                 return "hainan";
  86         case CHIP_BONAIRE:
  87                 return "bonaire";
  88         case CHIP_KABINI:
  89                 return "kabini";
  90         case CHIP_KAVERI:
  91                 return "kaveri";
  92         case CHIP_HAWAII:
  93                 return "hawaii";
  94         case CHIP_MULLINS:
  95                 return "mullins";
  96         case CHIP_TONGA:
  97                 return "tonga";
  98         case CHIP_ICELAND:
  99                 return "iceland";
 100         case CHIP_CARRIZO:
 101                 return "carrizo";
 102 #if HAVE_LLVM <= 0x0307
 103         case CHIP_FIJI:
 104                 return "tonga";
 105         case CHIP_STONEY:
 106                 return "carrizo";
 107 #else
 108         case CHIP_FIJI:
 109                 return "fiji";
 110         case CHIP_STONEY:
 111                 return "stoney";
 112 #endif
 113 #if HAVE_LLVM <= 0x0308
 114         case CHIP_POLARIS10:
 115                 return "tonga";
 116         case CHIP_POLARIS11:
 117                 return "tonga";
 118 #else
 119         case CHIP_POLARIS10:
 120                 return "polaris10";
 121         case CHIP_POLARIS11:
 122                 return "polaris11";
 123 #endif
 124         default:
 125                 return "";
 126         }
 127 }
 128
 129 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
 130 {
 131         assert(family >= CHIP_TAHITI);
 132
 133         const char *triple = "amdgcn--";
 134         LLVMTargetRef target = ac_get_llvm_target(triple);
 135         LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
 136                                      target,
 137                                      triple,
 138                                      ac_get_llvm_processor_name(family),
 139                                      "+DumpCode,+vgpr-spilling",
 140                                      LLVMCodeGenLevelDefault,
 141                                      LLVMRelocDefault,
 142                                      LLVMCodeModelDefault);
 143
 144         return tm;
 145 }
 146
 147 /* Initialize module-independent parts of the context.
 148  *
 149  * The caller is responsible for initializing ctx::module and ctx::builder.
 150  */
 151 void
 152 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
 153 {
 154         LLVMValueRef args[1];
 155
 156         ctx->context = context;
 157         ctx->module = NULL;
 158         ctx->builder = NULL;
 159
 160         ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
 161         ctx->f32 = LLVMFloatTypeInContext(ctx->context);
 162
 163         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
 164
 165         args[0] = LLVMConstReal(ctx->f32, 2.5);
 166         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
 167 }
 168
 169 #if HAVE_LLVM < 0x0400
 170 static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
 171 {
 172    switch (attr) {
 173    case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
 174    case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
 175    case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
 176    case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
 177    case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
 178    case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
 179    case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
 180    default:
 181            fprintf(stderr, "Unhandled function attribute: %x\n", attr);
 182            return 0;
 183    }
 184 }
 185
 186 #else
 187
 188 static const char *attr_to_str(enum ac_func_attr attr)
 189 {
 190    switch (attr) {
 191    case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
 192    case AC_FUNC_ATTR_BYVAL: return "byval";
 193    case AC_FUNC_ATTR_INREG: return "inreg";
 194    case AC_FUNC_ATTR_NOALIAS: return "noalias";
 195    case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
 196    case AC_FUNC_ATTR_READNONE: return "readnone";
 197    case AC_FUNC_ATTR_READONLY: return "readonly";
 198    default:
 199            fprintf(stderr, "Unhandled function attribute: %x\n", attr);
 200            return 0;
 201    }
 202 }
 203
 204 #endif
 205
 206 void
 207 ac_add_function_attr(LLVMValueRef function,
 208                      int attr_idx,
 209                      enum ac_func_attr attr)
 210 {
 211
 212 #if HAVE_LLVM < 0x0400
 213    LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
 214    if (attr_idx == -1) {
 215       LLVMAddFunctionAttr(function, llvm_attr);
 216    } else {
 217       LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
 218    }
 219 #else
 220    LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
 221    const char *attr_name = attr_to_str(attr);
 222    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
 223                                                       strlen(attr_name));
 224    LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
 225    LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
 226 #endif
 227 }
 228
 229 LLVMValueRef
 230 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
 231                        LLVMTypeRef return_type, LLVMValueRef *params,
 232                        unsigned param_count, unsigned attrib_mask)
 233 {
 234         LLVMValueRef function;
 235
 236         function = LLVMGetNamedFunction(ctx->module, name);
 237         if (!function) {
 238                 LLVMTypeRef param_types[32], function_type;
 239                 unsigned i;
 240
 241                 assert(param_count <= 32);
 242
 243                 for (i = 0; i < param_count; ++i) {
 244                         assert(params[i]);
 245                         param_types[i] = LLVMTypeOf(params[i]);
 246                 }
 247                 function_type =
 248                     LLVMFunctionType(return_type, param_types, param_count, 0);
 249                 function = LLVMAddFunction(ctx->module, name, function_type);
 250
 251                 LLVMSetFunctionCallConv(function, LLVMCCallConv);
 252                 LLVMSetLinkage(function, LLVMExternalLinkage);
 253
 254                 attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
 255                 while (attrib_mask) {
 256                         enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
 257                         ac_add_function_attr(function, -1, attr);
 258                 }
 259         }
 260         return LLVMBuildCall(ctx->builder, function, params, param_count, "");
 261 }
 262
 263 LLVMValueRef
 264 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
 265                                 LLVMValueRef *values,
 266                                 unsigned value_count,
 267                                 unsigned value_stride,
 268                                 bool load)
 269 {
 270         LLVMBuilderRef builder = ctx->builder;
 271         LLVMValueRef vec;
 272         unsigned i;
 273
 274
 275         if (value_count == 1) {
 276                 if (load)
 277                         return LLVMBuildLoad(builder, values[0], "");
 278                 return values[0];
 279         } else if (!value_count)
 280                 unreachable("value_count is 0");
 281
 282         for (i = 0; i < value_count; i++) {
 283                 LLVMValueRef value = values[i * value_stride];
 284                 if (load)
 285                         value = LLVMBuildLoad(builder, value, "");
 286
 287                 if (!i)
 288                         vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
 289                 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
 290                 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
 291         }
 292         return vec;
 293 }
 294
 295 LLVMValueRef
 296 ac_build_gather_values(struct ac_llvm_context *ctx,
 297                        LLVMValueRef *values,
 298                        unsigned value_count)
 299 {
 300         return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
 301 }
 302
 303 LLVMValueRef
 304 ac_emit_fdiv(struct ac_llvm_context *ctx,
 305              LLVMValueRef num,
 306              LLVMValueRef den)
 307 {
 308         LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
 309
 310         if (!LLVMIsConstant(ret))
 311                 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
 312         return ret;
 313 }
 314
 315 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
 316  * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
 317  * already multiplied by two. id is the cube face number.
 318  */
 319 struct cube_selection_coords {
 320         LLVMValueRef stc[2];
 321         LLVMValueRef ma;
 322         LLVMValueRef id;
 323 };
 324
 325 static void
 326 build_cube_intrinsic(struct ac_llvm_context *ctx,
 327                      LLVMValueRef in[3],
 328                      struct cube_selection_coords *out)
 329 {
 330         LLVMBuilderRef builder = ctx->builder;
 331
 332         if (HAVE_LLVM >= 0x0309) {
 333                 LLVMTypeRef f32 = ctx->f32;
 334
 335                 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
 336                                         f32, in, 3, AC_FUNC_ATTR_READNONE);
 337                 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
 338                                         f32, in, 3, AC_FUNC_ATTR_READNONE);
 339                 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
 340                                         f32, in, 3, AC_FUNC_ATTR_READNONE);
 341                 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
 342                                         f32, in, 3, AC_FUNC_ATTR_READNONE);
 343         } else {
 344                 LLVMValueRef c[4] = {
 345                         in[0],
 346                         in[1],
 347                         in[2],
 348                         LLVMGetUndef(LLVMTypeOf(in[0]))
 349                 };
 350                 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
 351
 352                 LLVMValueRef tmp =
 353                         ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
 354                                           LLVMTypeOf(vec), &vec, 1,
 355                                           AC_FUNC_ATTR_READNONE);
 356
 357                 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
 358                                 LLVMConstInt(ctx->i32, 0, 0), "");
 359                 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
 360                                 LLVMConstInt(ctx->i32, 1, 0), "");
 361                 out->ma = LLVMBuildExtractElement(builder, tmp,
 362                                 LLVMConstInt(ctx->i32, 2, 0), "");
 363                 out->id = LLVMBuildExtractElement(builder, tmp,
 364                                 LLVMConstInt(ctx->i32, 3, 0), "");
 365         }
 366 }
 367
 368 /**
 369  * Build a manual selection sequence for cube face sc/tc coordinates and
 370  * major axis vector (multiplied by 2 for consistency) for the given
 371  * vec3 \p coords, for the face implied by \p selcoords.
 372  *
 373  * For the major axis, we always adjust the sign to be in the direction of
 374  * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
 375  * the selcoords major axis.
 376  */
 377 static void build_cube_select(LLVMBuilderRef builder,
 378                               const struct cube_selection_coords *selcoords,
 379                               const LLVMValueRef *coords,
 380                               LLVMValueRef *out_st,
 381                               LLVMValueRef *out_ma)
 382 {
 383         LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
 384         LLVMValueRef is_ma_positive;
 385         LLVMValueRef sgn_ma;
 386         LLVMValueRef is_ma_z, is_not_ma_z;
 387         LLVMValueRef is_ma_y;
 388         LLVMValueRef is_ma_x;
 389         LLVMValueRef sgn;
 390         LLVMValueRef tmp;
 391
 392         is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
 393                 selcoords->ma, LLVMConstReal(f32, 0.0), "");
 394         sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
 395                 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
 396
 397         is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
 398         is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
 399         is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
 400                 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
 401         is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
 402
 403         /* Select sc */
 404         tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
 405         sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
 406                 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
 407                         LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
 408         out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
 409
 410         /* Select tc */
 411         tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
 412         sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
 413                 LLVMConstReal(f32, -1.0), "");
 414         out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
 415
 416         /* Select ma */
 417         tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
 418                 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
 419         sgn = LLVMBuildSelect(builder, is_ma_positive,
 420                 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
 421         *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
 422 }
 423
 424 void
 425 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 426                        bool is_deriv, bool is_array,
 427                        LLVMValueRef *coords_arg,
 428                        LLVMValueRef *derivs_arg)
 429 {
 430
 431         LLVMBuilderRef builder = ctx->builder;
 432         struct cube_selection_coords selcoords;
 433         LLVMValueRef coords[3];
 434         LLVMValueRef invma;
 435
 436         build_cube_intrinsic(ctx, coords_arg, &selcoords);
 437
 438         invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
 439                         ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
 440         invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
 441
 442         for (int i = 0; i < 2; ++i)
 443                 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
 444
 445         coords[2] = selcoords.id;
 446
 447         if (is_deriv && derivs_arg) {
 448                 LLVMValueRef derivs[4];
 449                 int axis;
 450
 451                 /* Convert cube derivatives to 2D derivatives. */
 452                 for (axis = 0; axis < 2; axis++) {
 453                         LLVMValueRef deriv_st[2];
 454                         LLVMValueRef deriv_ma;
 455
 456                         /* Transform the derivative alongside the texture
 457                          * coordinate. Mathematically, the correct formula is
 458                          * as follows. Assume we're projecting onto the +Z face
 459                          * and denote by dx/dh the derivative of the (original)
 460                          * X texture coordinate with respect to horizontal
 461                          * window coordinates. The projection onto the +Z face
 462                          * plane is:
 463                          *
 464                          *   f(x,z) = x/z
 465                          *
 466                          * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
 467                          *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
 468                          *
 469                          * This motivatives the implementation below.
 470                          *
 471                          * Whether this actually gives the expected results for
 472                          * apps that might feed in derivatives obtained via
 473                          * finite differences is anyone's guess. The OpenGL spec
 474                          * seems awfully quiet about how textureGrad for cube
 475                          * maps should be handled.
 476                          */
 477                         build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
 478                                           deriv_st, &deriv_ma);
 479
 480                         deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
 481
 482                         for (int i = 0; i < 2; ++i)
 483                                 derivs[axis * 2 + i] =
 484                                         LLVMBuildFSub(builder,
 485                                                 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
 486                                                 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
 487                 }
 488
 489                 memcpy(derivs_arg, derivs, sizeof(derivs));
 490         }
 491
 492         /* Shift the texture coordinate. This must be applied after the
 493          * derivative calculation.
 494          */
 495         for (int i = 0; i < 2; ++i)
 496                 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
 497
 498         if (is_array) {
 499                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 500                 /* coords_arg.w component - array_index for cube arrays */
 501                 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
 502                 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
 503         }
 504
 505         memcpy(coords_arg, coords, sizeof(coords));
 506 }