src/gallium/auxiliary/gallivm/gallivm.cpp

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28  /*
  29   * Authors:
  30   *   Zack Rusin zack@tungstengraphics.com
  31   */
  32 #ifdef MESA_LLVM
  33
  34 #include "gallivm.h"
  35 #include "gallivm_p.h"
  36
  37 #include "instructions.h"
  38 #include "loweringpass.h"
  39 #include "storage.h"
  40 #include "tgsitollvm.h"
  41
  42 #include "pipe/p_context.h"
  43 #include "pipe/p_shader_tokens.h"
  44
  45 #include "tgsi/tgsi_exec.h"
  46 #include "tgsi/tgsi_dump.h"
  47
  48 #include <llvm/Module.h>
  49 #include <llvm/CallingConv.h>
  50 #include <llvm/Constants.h>
  51 #include <llvm/DerivedTypes.h>
  52 #include <llvm/Instructions.h>
  53 #include <llvm/ModuleProvider.h>
  54 #include <llvm/Pass.h>
  55 #include <llvm/PassManager.h>
  56 #include <llvm/Attributes.h>
  57 #include <llvm/Support/PatternMatch.h>
  58 #include <llvm/ExecutionEngine/JIT.h>
  59 #include <llvm/ExecutionEngine/Interpreter.h>
  60 #include <llvm/ExecutionEngine/GenericValue.h>
  61 #include <llvm/Support/MemoryBuffer.h>
  62 #include <llvm/LinkAllPasses.h>
  63 #include <llvm/Analysis/Verifier.h>
  64 #include <llvm/Analysis/LoopPass.h>
  65 #include <llvm/Target/TargetData.h>
  66 #include <llvm/Bitcode/ReaderWriter.h>
  67 #include <llvm/Transforms/Utils/Cloning.h>
  68
  69 #include <sstream>
  70 #include <fstream>
  71 #include <iostream>
  72
  73 static int GLOBAL_ID = 0;
  74
  75 using namespace llvm;
  76
  77 static inline
  78 void AddStandardCompilePasses(PassManager &PM)
  79 {
  80    PM.add(new LoweringPass());
  81    PM.add(createVerifierPass());                  // Verify that input is correct
  82
  83    PM.add(createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
  84
  85    //PM.add(createStripSymbolsPass(true));
  86
  87    PM.add(createRaiseAllocationsPass());     // call %malloc -> malloc inst
  88    PM.add(createCFGSimplificationPass());    // Clean up disgusting code
  89    PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas
  90    PM.add(createGlobalOptimizerPass());      // Optimize out global vars
  91    PM.add(createGlobalDCEPass());            // Remove unused fns and globs
  92    PM.add(createIPConstantPropagationPass());// IP Constant Propagation
  93    PM.add(createDeadArgEliminationPass());   // Dead argument elimination
  94    PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
  95    PM.add(createCFGSimplificationPass());    // Clean up after IPCP & DAE
  96
  97    PM.add(createPruneEHPass());              // Remove dead EH info
  98
  99    PM.add(createFunctionInliningPass());   // Inline small functions
 100    PM.add(createArgumentPromotionPass());    // Scalarize uninlined fn args
 101
 102    PM.add(createTailDuplicationPass());      // Simplify cfg by copying code
 103    PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
 104    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
 105    PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas
 106    PM.add(createInstructionCombiningPass()); // Combine silly seq's
 107    PM.add(createCondPropagationPass());      // Propagate conditionals
 108
 109    PM.add(createTailCallEliminationPass());  // Eliminate tail calls
 110    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
 111    PM.add(createReassociatePass());          // Reassociate expressions
 112    PM.add(createLoopRotatePass());
 113    PM.add(createLICMPass());                 // Hoist loop invariants
 114    PM.add(createLoopUnswitchPass());         // Unswitch loops.
 115    PM.add(createLoopIndexSplitPass());       // Index split loops.
 116    PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc
 117    PM.add(createIndVarSimplifyPass());       // Canonicalize indvars
 118    PM.add(createLoopUnrollPass());           // Unroll small loops
 119    PM.add(createInstructionCombiningPass()); // Clean up after the unroller
 120    PM.add(createGVNPass());                  // Remove redundancies
 121    PM.add(createSCCPPass());                 // Constant prop with SCCP
 122
 123    // Run instcombine after redundancy elimination to exploit opportunities
 124    // opened up by them.
 125    PM.add(createInstructionCombiningPass());
 126    PM.add(createCondPropagationPass());      // Propagate conditionals
 127
 128    PM.add(createDeadStoreEliminationPass()); // Delete dead stores
 129    PM.add(createAggressiveDCEPass());        // SSA based 'Aggressive DCE'
 130    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
 131    PM.add(createSimplifyLibCallsPass());     // Library Call Optimizations
 132    PM.add(createDeadTypeEliminationPass());  // Eliminate dead types
 133    PM.add(createConstantMergePass());        // Merge dup global constants
 134 }
 135
 136 void gallivm_prog_delete(struct gallivm_prog *prog)
 137 {
 138    delete prog->module;
 139    prog->module = 0;
 140    prog->function = 0;
 141    free(prog);
 142 }
 143
 144 static inline void
 145 constant_interpolation(float (*inputs)[16][4],
 146                        const struct tgsi_interp_coef *coefs,
 147                        unsigned attrib,
 148                        unsigned chan)
 149 {
 150    unsigned i;
 151
 152    for (i = 0; i < QUAD_SIZE; ++i) {
 153       inputs[i][attrib][chan] = coefs[attrib].a0[chan];
 154    }
 155 }
 156
 157 static inline void
 158 linear_interpolation(float (*inputs)[16][4],
 159                      const struct tgsi_interp_coef *coefs,
 160                      unsigned attrib,
 161                      unsigned chan)
 162 {
 163    unsigned i;
 164
 165    for( i = 0; i < QUAD_SIZE; i++ ) {
 166       const float x = inputs[i][0][0];
 167       const float y = inputs[i][0][1];
 168
 169       inputs[i][attrib][chan] =
 170          coefs[attrib].a0[chan] +
 171          coefs[attrib].dadx[chan] * x +
 172          coefs[attrib].dady[chan] * y;
 173    }
 174 }
 175
 176 static inline void
 177 perspective_interpolation(float (*inputs)[16][4],
 178                           const struct tgsi_interp_coef *coefs,
 179                           unsigned attrib,
 180                           unsigned chan )
 181 {
 182    unsigned i;
 183
 184    for( i = 0; i < QUAD_SIZE; i++ ) {
 185       const float x = inputs[i][0][0];
 186       const float y = inputs[i][0][1];
 187       /* WPOS.w here is really 1/w */
 188       const float w = 1.0f / inputs[i][0][3];
 189       assert(inputs[i][0][3] != 0.0);
 190
 191       inputs[i][attrib][chan] =
 192          (coefs[attrib].a0[chan] +
 193           coefs[attrib].dadx[chan] * x +
 194           coefs[attrib].dady[chan] * y) * w;
 195    }
 196 }
 197
 198 void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
 199 {
 200    if (!ir || !ir->module)
 201       return;
 202
 203    if (file_prefix) {
 204       std::ostringstream stream;
 205       stream << file_prefix;
 206       stream << ir->id;
 207       stream << ".ll";
 208       std::string name = stream.str();
 209       std::ofstream out(name.c_str());
 210       if (!out) {
 211          std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
 212          return;
 213       }
 214       out << (*ir->module);
 215       out.close();
 216    } else {
 217       const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
 218       llvm::Module::FunctionListType::const_iterator itr;
 219       std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
 220       for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
 221          const llvm::Function &func = (*itr);
 222          std::string name = func.getName();
 223          const llvm::Function *found = 0;
 224          if (name.find("vs_shader") != std::string::npos ||
 225              name.find("fs_shader") != std::string::npos ||
 226              name.find("function") != std::string::npos)
 227             found = &func;
 228          if (found) {
 229             std::cout<<*found<<std::endl;
 230          }
 231       }
 232       std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
 233    }
 234 }
 235
 236
 237 void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
 238                                      float (*inputs)[16][4],
 239                                      const struct tgsi_interp_coef *coef)
 240 {
 241    for (int i = 0; i < prog->num_interp; ++i) {
 242       const gallivm_interpolate &interp = prog->interpolators[i];
 243       switch (interp.type) {
 244       case TGSI_INTERPOLATE_CONSTANT:
 245          constant_interpolation(inputs, coef, interp.attrib, interp.chan);
 246          break;
 247
 248       case TGSI_INTERPOLATE_LINEAR:
 249          linear_interpolation(inputs, coef, interp.attrib, interp.chan);
 250          break;
 251
 252       case TGSI_INTERPOLATE_PERSPECTIVE:
 253          perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
 254          break;
 255
 256       default:
 257          assert( 0 );
 258       }
 259    }
 260 }
 261
 262
 263 struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
 264 {
 265    struct gallivm_ir *ir =
 266       (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
 267    ++GLOBAL_ID;
 268    ir->id   = GLOBAL_ID;
 269    ir->type = type;
 270
 271    return ir;
 272 }
 273
 274 void gallivm_ir_set_layout(struct gallivm_ir *ir,
 275                            enum gallivm_vector_layout layout)
 276 {
 277    ir->layout = layout;
 278 }
 279
 280 void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
 281 {
 282    ir->num_components = num;
 283 }
 284
 285 void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
 286                                const struct tgsi_token *tokens)
 287 {
 288    std::cout << "Creating llvm from: " <<std::endl;
 289    tgsi_dump(tokens, 0);
 290
 291    llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
 292    ir->module = mod;
 293    gallivm_ir_dump(ir, 0);
 294 }
 295
 296 void gallivm_ir_delete(struct gallivm_ir *ir)
 297 {
 298    delete ir->module;
 299    free(ir);
 300 }
 301
 302 struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
 303 {
 304    struct gallivm_prog *prog =
 305       (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
 306
 307    std::cout << "Before optimizations:"<<std::endl;
 308    ir->module->dump();
 309    std::cout<<"-------------------------------"<<std::endl;
 310
 311    PassManager veri;
 312    veri.add(createVerifierPass());
 313    veri.run(*ir->module);
 314    llvm::Module *mod = llvm::CloneModule(ir->module);
 315    prog->num_consts = ir->num_consts;
 316    memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
 317    prog->num_interp = ir->num_interp;
 318
 319    /* Run optimization passes over it */
 320    PassManager passes;
 321    passes.add(new TargetData(mod));
 322    AddStandardCompilePasses(passes);
 323    passes.run(*mod);
 324    prog->module = mod;
 325
 326    std::cout << "After optimizations:"<<std::endl;
 327    mod->dump();
 328
 329    return prog;
 330 }
 331
 332 #endif /* MESA_LLVM */