1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Zack Rusin zack@tungstengraphics.com
35 #include "gallivm_p.h"
37 #include "instructions.h"
38 #include "loweringpass.h"
40 #include "tgsitollvm.h"
42 #include "pipe/p_context.h"
43 #include "pipe/p_shader_tokens.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_dump.h"
48 #include "util/u_memory.h"
49 #include "util/u_math.h"
51 #include <llvm/Module.h>
52 #include <llvm/CallingConv.h>
53 #include <llvm/Constants.h>
54 #include <llvm/DerivedTypes.h>
55 #include <llvm/Instructions.h>
56 #include <llvm/ModuleProvider.h>
57 #include <llvm/Pass.h>
58 #include <llvm/PassManager.h>
59 #include <llvm/ParameterAttributes.h>
60 #include <llvm/Support/PatternMatch.h>
61 #include <llvm/ExecutionEngine/JIT.h>
62 #include <llvm/ExecutionEngine/Interpreter.h>
63 #include <llvm/ExecutionEngine/GenericValue.h>
64 #include <llvm/Support/MemoryBuffer.h>
65 #include <llvm/LinkAllPasses.h>
66 #include <llvm/Analysis/Verifier.h>
67 #include <llvm/Analysis/LoopPass.h>
68 #include <llvm/Target/TargetData.h>
69 #include <llvm/Bitcode/ReaderWriter.h>
70 #include <llvm/Transforms/Utils/Cloning.h>
76 struct gallivm_cpu_engine
{
77 llvm::ExecutionEngine
*engine
;
80 static struct gallivm_cpu_engine
*CPU
= 0;
82 typedef int (*fragment_shader_runner
)(float x
, float y
,
83 float (*dests
)[16][4],
84 float (*inputs
)[16][4],
86 float (*consts
)[4], int num_consts
,
87 struct tgsi_sampler
*samplers
);
89 int gallivm_cpu_fs_exec(struct gallivm_prog
*prog
,
91 float (*dests
)[16][4],
92 float (*inputs
)[16][4],
94 struct tgsi_sampler
*samplers
)
96 fragment_shader_runner runner
= reinterpret_cast<fragment_shader_runner
>(prog
->function
);
99 return runner(fx
, fy
, dests
, inputs
, prog
->num_interp
,
100 consts
, prog
->num_consts
,
104 static inline llvm::Function
*func_for_shader(struct gallivm_prog
*prog
)
106 llvm::Module
*mod
= prog
->module
;
107 llvm::Function
*func
= 0;
109 switch (prog
->type
) {
111 func
= mod
->getFunction("vs_shader");
114 func
= mod
->getFunction("fs_shader");
117 assert(!"Unknown shader type!");
124 This function creates a CPU based execution engine for the given gallivm_prog.
125 gallivm_cpu_engine should be used as a singleton throughout the library. Before
126 executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile.
127 The gallivm_prog instance which is being passed to the constructor is being
128 automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile
131 struct gallivm_cpu_engine
* gallivm_cpu_engine_create(struct gallivm_prog
*prog
)
133 struct gallivm_cpu_engine
*cpu
= (struct gallivm_cpu_engine
*)
134 calloc(1, sizeof(struct gallivm_cpu_engine
));
135 llvm::Module
*mod
= static_cast<llvm::Module
*>(prog
->module
);
136 llvm::ExistingModuleProvider
*mp
= new llvm::ExistingModuleProvider(mod
);
137 llvm::ExecutionEngine
*ee
= llvm::ExecutionEngine::create(mp
, false);
138 ee
->DisableLazyCompilation();
141 llvm::Function
*func
= func_for_shader(prog
);
143 prog
->function
= ee
->getPointerToFunction(func
);
150 This function JIT compiles the given gallivm_prog with the given cpu based execution engine.
151 The reference to the generated machine code entry point will be stored
152 in the gallivm_prog program. After executing this function one can call gallivm_prog_exec
153 in order to execute the gallivm_prog on the CPU.
155 void gallivm_cpu_jit_compile(struct gallivm_cpu_engine
*cpu
, struct gallivm_prog
*prog
)
157 llvm::Module
*mod
= static_cast<llvm::Module
*>(prog
->module
);
158 llvm::ExistingModuleProvider
*mp
= new llvm::ExistingModuleProvider(mod
);
159 llvm::ExecutionEngine
*ee
= cpu
->engine
;
162 ee
->DisableLazyCompilation();
163 ee
->addModuleProvider(mp
);
165 llvm::Function
*func
= func_for_shader(prog
);
166 prog
->function
= ee
->getPointerToFunction(func
);
169 void gallivm_cpu_engine_delete(struct gallivm_cpu_engine
*cpu
)
174 struct gallivm_cpu_engine
* gallivm_global_cpu_engine()
180 typedef void (*vertex_shader_runner
)(void *ainputs
,
185 #define MAX_TGSI_VERTICES 4
187 This function is used to execute the gallivm_prog in software. Before calling
188 this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile
191 int gallivm_cpu_vs_exec(struct gallivm_prog
*prog
,
192 struct tgsi_exec_machine
*machine
,
193 const float (*input
)[4],
196 unsigned num_outputs
,
197 const float (*constants
)[4],
199 unsigned input_stride
,
200 unsigned output_stride
)
204 vertex_shader_runner runner
= reinterpret_cast<vertex_shader_runner
>(prog
->function
);
208 for (i
= 0; i
< count
; i
+= MAX_TGSI_VERTICES
) {
209 unsigned int max_vertices
= MIN2(MAX_TGSI_VERTICES
, count
- i
);
213 for (j
= 0; j
< max_vertices
; j
++) {
214 for (slot
= 0; slot
< num_inputs
; slot
++) {
215 machine
->Inputs
[slot
].xyzw
[0].f
[j
] = input
[slot
][0];
216 machine
->Inputs
[slot
].xyzw
[1].f
[j
] = input
[slot
][1];
217 machine
->Inputs
[slot
].xyzw
[2].f
[j
] = input
[slot
][2];
218 machine
->Inputs
[slot
].xyzw
[3].f
[j
] = input
[slot
][3];
221 input
= (const float (*)[4])((const char *)input
+ input_stride
);
225 runner(machine
->Inputs
,
227 (float (*)[4]) constants
,
230 /* Unswizzle all output results
232 for (j
= 0; j
< max_vertices
; j
++) {
233 for (slot
= 0; slot
< num_outputs
; slot
++) {
234 output
[slot
][0] = machine
->Outputs
[slot
].xyzw
[0].f
[j
];
235 output
[slot
][1] = machine
->Outputs
[slot
].xyzw
[1].f
[j
];
236 output
[slot
][2] = machine
->Outputs
[slot
].xyzw
[2].f
[j
];
237 output
[slot
][3] = machine
->Outputs
[slot
].xyzw
[3].f
[j
];
239 output
= (float (*)[4])((char *)output
+ output_stride
);