src/gallium/auxiliary/gallivm/gallivm_cpu.cpp

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28  /*
  29   * Authors:
  30   *   Zack Rusin zack@tungstengraphics.com
  31   */
  32 #ifdef MESA_LLVM
  33
  34 #include "gallivm.h"
  35 #include "gallivm_p.h"
  36
  37 #include "instructions.h"
  38 #include "loweringpass.h"
  39 #include "storage.h"
  40 #include "tgsitollvm.h"
  41
  42 #include "pipe/p_context.h"
  43 #include "pipe/p_shader_tokens.h"
  44
  45 #include "tgsi/tgsi_exec.h"
  46 #include "tgsi/tgsi_dump.h"
  47
  48 #include "util/u_memory.h"
  49 #include "util/u_math.h"
  50
  51 #include <llvm/Module.h>
  52 #include <llvm/CallingConv.h>
  53 #include <llvm/Constants.h>
  54 #include <llvm/DerivedTypes.h>
  55 #include <llvm/Instructions.h>
  56 #include <llvm/ModuleProvider.h>
  57 #include <llvm/Pass.h>
  58 #include <llvm/PassManager.h>
  59 #include <llvm/ParameterAttributes.h>
  60 #include <llvm/Support/PatternMatch.h>
  61 #include <llvm/ExecutionEngine/JIT.h>
  62 #include <llvm/ExecutionEngine/Interpreter.h>
  63 #include <llvm/ExecutionEngine/GenericValue.h>
  64 #include <llvm/Support/MemoryBuffer.h>
  65 #include <llvm/LinkAllPasses.h>
  66 #include <llvm/Analysis/Verifier.h>
  67 #include <llvm/Analysis/LoopPass.h>
  68 #include <llvm/Target/TargetData.h>
  69 #include <llvm/Bitcode/ReaderWriter.h>
  70 #include <llvm/Transforms/Utils/Cloning.h>
  71
  72 #include <sstream>
  73 #include <fstream>
  74 #include <iostream>
  75
  76 struct gallivm_cpu_engine {
  77    llvm::ExecutionEngine *engine;
  78 };
  79
  80 static struct gallivm_cpu_engine *CPU = 0;
  81
  82 typedef int (*fragment_shader_runner)(float x, float y,
  83                                       float (*dests)[16][4],
  84                                       float (*inputs)[16][4],
  85                                       int num_attribs,
  86                                       float (*consts)[4], int num_consts,
  87                                       struct tgsi_sampler *samplers);
  88
  89 int gallivm_cpu_fs_exec(struct gallivm_prog *prog,
  90                         float fx, float fy,
  91                         float (*dests)[16][4],
  92                         float (*inputs)[16][4],
  93                         float (*consts)[4],
  94                         struct tgsi_sampler *samplers)
  95 {
  96    fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function);
  97    assert(runner);
  98
  99    return runner(fx, fy, dests, inputs, prog->num_interp,
 100                  consts, prog->num_consts,
 101                  samplers);
 102 }
 103
 104 static inline llvm::Function *func_for_shader(struct gallivm_prog *prog)
 105 {
 106    llvm::Module *mod = prog->module;
 107    llvm::Function *func = 0;
 108
 109    switch (prog->type) {
 110    case GALLIVM_VS:
 111       func = mod->getFunction("vs_shader");
 112       break;
 113    case GALLIVM_FS:
 114       func = mod->getFunction("fs_shader");
 115       break;
 116    default:
 117       assert(!"Unknown shader type!");
 118       break;
 119    }
 120    return func;
 121 }
 122
 123 /*!
 124   This function creates a CPU based execution engine for the given gallivm_prog.
 125   gallivm_cpu_engine should be used as a singleton throughout the library. Before
 126   executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
 127   The gallivm_prog instance which is being passed to the constructor is being
 128   automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
 129   with it again.
 130  */
 131 struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog)
 132 {
 133    struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *)
 134                                     calloc(1, sizeof(struct gallivm_cpu_engine));
 135    llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
 136    llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
 137    llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false);
 138    ee->DisableLazyCompilation();
 139    cpu->engine = ee;
 140
 141    llvm::Function *func = func_for_shader(prog);
 142
 143    prog->function = ee->getPointerToFunction(func);
 144    CPU = cpu;
 145    return cpu;
 146 }
 147
 148
 149 /*!
 150   This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
 151   The reference to the generated machine code entry point will be stored
 152   in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
 153   in order to execute the gallivm_prog on the CPU.
 154  */
 155 void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog)
 156 {
 157    llvm::Module *mod = static_cast<llvm::Module*>(prog->module);
 158    llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
 159    llvm::ExecutionEngine *ee = cpu->engine;
 160    assert(ee);
 161    /*FIXME : remove */
 162    ee->DisableLazyCompilation();
 163    ee->addModuleProvider(mp);
 164
 165    llvm::Function *func = func_for_shader(prog);
 166    prog->function = ee->getPointerToFunction(func);
 167 }
 168
 169 void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu)
 170 {
 171    free(cpu);
 172 }
 173
 174 struct gallivm_cpu_engine * gallivm_global_cpu_engine()
 175 {
 176    return CPU;
 177 }
 178
 179
 180 typedef void (*vertex_shader_runner)(void *ainputs,
 181                                      void *dests,
 182                                      float (*aconsts)[4],
 183                                      void *temps);
 184
 185 #define MAX_TGSI_VERTICES 4
 186 /*!
 187   This function is used to execute the gallivm_prog in software. Before calling
 188   this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
 189   function.
 190  */
 191 int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
 192                         struct tgsi_exec_machine *machine,
 193                         const float (*input)[4],
 194                         unsigned num_inputs,
 195                         float (*output)[4],
 196                         unsigned num_outputs,
 197                         const float (*constants)[4],
 198                         unsigned count,
 199                         unsigned input_stride,
 200                         unsigned output_stride )
 201 {
 202    unsigned int i, j;
 203    unsigned slot;
 204    vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
 205
 206    assert(runner);
 207
 208    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
 209       unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
 210
 211       /* Swizzle inputs.
 212        */
 213       for (j = 0; j < max_vertices; j++) {
 214          for (slot = 0; slot < num_inputs; slot++) {
 215             machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
 216             machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
 217             machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
 218             machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
 219          }
 220
 221          input = (const float (*)[4])((const char *)input + input_stride);
 222       }
 223
 224       /* run shader */
 225       runner(machine->Inputs,
 226              machine->Outputs,
 227              (float (*)[4]) constants,
 228              machine->Temps);
 229
 230       /* Unswizzle all output results
 231        */
 232       for (j = 0; j < max_vertices; j++) {
 233          for (slot = 0; slot < num_outputs; slot++) {
 234             output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
 235             output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
 236             output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
 237             output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
 238          }
 239          output = (float (*)[4])((char *)output + output_stride);
 240       }
 241    }
 242
 243    return 0;
 244 }
 245
 246 #endif