cd183d5087c25baa11935a703a37ec60bfe85c3b
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
1 #include "draw_llvm.h"
2
3 #include "draw_context.h"
4 #include "draw_vs.h"
5
6 #include "gallivm/lp_bld_arit.h"
7 #include "gallivm/lp_bld_interp.h"
8 #include "gallivm/lp_bld_struct.h"
9 #include "gallivm/lp_bld_type.h"
10 #include "gallivm/lp_bld_flow.h"
11 #include "gallivm/lp_bld_debug.h"
12 #include "gallivm/lp_bld_tgsi.h"
13
14 #include "util/u_cpu_detect.h"
15
16 #include <llvm-c/Transforms/Scalar.h>
17
18 static void
19 init_globals(struct draw_llvm *llvm)
20 {
21 LLVMTypeRef vertex_header;
22 LLVMTypeRef texture_type;
23
24 /* struct vertex_header */
25 {
26 LLVMTypeRef elem_types[3];
27
28 elem_types[0] = LLVMIntType(32);
29 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4);
30 elem_types[2] = LLVMArrayType(elem_types[1], 0);
31
32 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
33
34 /* these are bit-fields and we can't take address of them
35 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
36 llvm->target, vertex_header,
37 DRAW_JIT_VERTEX_CLIPMASK);
38 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
39 llvm->target, vertex_header,
40 DRAW_JIT_VERTEX_EDGEFLAG);
41 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
42 llvm->target, vertex_header,
43 DRAW_JIT_VERTEX_PAD);
44 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
45 llvm->target, vertex_header,
46 DRAW_JIT_VERTEX_VERTEX_ID);
47 */
48 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
49 llvm->target, vertex_header,
50 DRAW_JIT_VERTEX_CLIP);
51 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
52 llvm->target, vertex_header,
53 DRAW_JIT_VERTEX_DATA);
54
55 LP_CHECK_STRUCT_SIZE(struct vertex_header,
56 llvm->target, vertex_header);
57
58 LLVMAddTypeName(llvm->module, "vertex_header", vertex_header);
59
60 llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
61 }
62 /* struct draw_jit_texture */
63 {
64 LLVMTypeRef elem_types[4];
65
66 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
67 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
68 elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
69 elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
70
71 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
72
73 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
74 llvm->target, texture_type,
75 DRAW_JIT_TEXTURE_WIDTH);
76 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
77 llvm->target, texture_type,
78 DRAW_JIT_TEXTURE_HEIGHT);
79 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
80 llvm->target, texture_type,
81 DRAW_JIT_TEXTURE_STRIDE);
82 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
83 llvm->target, texture_type,
84 DRAW_JIT_TEXTURE_DATA);
85 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
86 llvm->target, texture_type);
87
88 LLVMAddTypeName(llvm->module, "texture", texture_type);
89 }
90
91
92 /* struct draw_jit_context */
93 {
94 LLVMTypeRef elem_types[3];
95 LLVMTypeRef context_type;
96
97 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
98 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
99 elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
100
101 context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
102
103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
104 llvm->target, context_type, 0);
105 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
106 llvm->target, context_type, 1);
107 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
108 llvm->target, context_type,
109 DRAW_JIT_CONTEXT_TEXTURES_INDEX);
110 LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
111 llvm->target, context_type);
112
113 LLVMAddTypeName(llvm->module, "context", context_type);
114
115 llvm->context_ptr_type = LLVMPointerType(context_type, 0);
116 }
117 {
118 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
119 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
120 }
121 }
122
123 struct draw_llvm *
124 draw_llvm_create(struct draw_context *draw)
125 {
126 struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
127
128 util_cpu_detect();
129
130 llvm->draw = draw;
131 llvm->engine = draw->engine;
132
133 debug_assert(llvm->engine);
134
135 llvm->module = LLVMModuleCreateWithName("draw_llvm");
136 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
137
138 LLVMAddModuleProvider(llvm->engine, llvm->provider);
139
140 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
141
142 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
143 LLVMAddTargetData(llvm->target, llvm->pass);
144 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
145 * but there are more on SVN. */
146 /* TODO: Add more passes */
147 LLVMAddConstantPropagationPass(llvm->pass);
148 if(util_cpu_caps.has_sse4_1) {
149 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
150 * and sitofp (necessary for trunc/floor/ceil/round implementation)
151 * somehow becomes invalid code.
152 */
153 LLVMAddInstructionCombiningPass(llvm->pass);
154 }
155 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
156 LLVMAddGVNPass(llvm->pass);
157 LLVMAddCFGSimplificationPass(llvm->pass);
158
159 init_globals(llvm);
160
161
162 #if 1
163 LLVMDumpModule(llvm->module);
164 #endif
165
166 return llvm;
167 }
168
169 void
170 draw_llvm_destroy(struct draw_llvm *llvm)
171 {
172 free(llvm);
173 }
174
175 void
176 draw_llvm_prepare(struct draw_llvm *llvm)
177 {
178 draw_llvm_generate(llvm);
179 }
180
181
182 struct draw_context *draw_create_with_llvm(LLVMExecutionEngineRef engine)
183 {
184 struct draw_context *draw = CALLOC_STRUCT( draw_context );
185 if (draw == NULL)
186 goto fail;
187 draw->engine = engine;
188
189 if (!draw_init(draw))
190 goto fail;
191
192 return draw;
193
194 fail:
195 draw_destroy( draw );
196 return NULL;
197 }
198
199 static void
200 generate_vs(struct draw_llvm *llvm,
201 LLVMBuilderRef builder,
202 LLVMValueRef (*outputs)[NUM_CHANNELS],
203 const LLVMValueRef (*inputs)[NUM_CHANNELS],
204 LLVMValueRef context_ptr,
205 LLVMValueRef io)
206 {
207 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
208 struct lp_type vs_type;
209 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
210
211 memset(&vs_type, 0, sizeof vs_type);
212 vs_type.floating = TRUE; /* floating point values */
213 vs_type.sign = TRUE; /* values are signed */
214 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
215 vs_type.width = 32; /* 32-bit float */
216 vs_type.length = 4; /* 4 elements per vector */
217 #if 0
218 num_vs = 4; /* number of vertices per block */
219 #endif
220
221 lp_build_tgsi_soa(builder,
222 tokens,
223 vs_type,
224 NULL /*struct lp_build_mask_context *mask*/,
225 consts_ptr,
226 NULL /*pos*/,
227 inputs,
228 outputs,
229 NULL/*sampler*/);
230 }
231
232 static void
233 generate_fetch(LLVMBuilderRef builder,
234 LLVMValueRef vbuffers_ptr,
235 LLVMValueRef *res,
236 struct pipe_vertex_element *velem,
237 struct pipe_vertex_buffer *vbuf,
238 LLVMValueRef index)
239 {
240 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
241 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
242 &indices, 1, "");
243 LLVMValueRef stride = LLVMBuildMul(builder,
244 LLVMConstInt(LLVMInt32Type(), vbuf->stride, 0),
245 index, "");
246
247 stride = LLVMBuildAdd(builder, stride,
248 LLVMConstInt(LLVMInt32Type(), vbuf->buffer_offset, 0),
249 "");
250 stride = LLVMBuildAdd(builder, stride,
251 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
252 "");
253
254 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
255
256 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
257 }
258
259 static LLVMValueRef
260 aos_to_soa(LLVMBuilderRef builder,
261 LLVMValueRef val0,
262 LLVMValueRef val1,
263 LLVMValueRef val2,
264 LLVMValueRef val3,
265 LLVMValueRef channel)
266 {
267 LLVMValueRef ex, res;
268
269 ex = LLVMBuildExtractElement(builder, val0,
270 channel, "");
271 res = LLVMBuildInsertElement(builder,
272 LLVMConstNull(LLVMTypeOf(val0)),
273 ex,
274 LLVMConstInt(LLVMInt32Type(), 0, 0),
275 "");
276
277 ex = LLVMBuildExtractElement(builder, val1,
278 channel, "");
279 res = LLVMBuildInsertElement(builder,
280 res, ex,
281 LLVMConstInt(LLVMInt32Type(), 1, 0),
282 "");
283
284 ex = LLVMBuildExtractElement(builder, val2,
285 channel, "");
286 res = LLVMBuildInsertElement(builder,
287 res, ex,
288 LLVMConstInt(LLVMInt32Type(), 2, 0),
289 "");
290
291 ex = LLVMBuildExtractElement(builder, val3,
292 channel, "");
293 res = LLVMBuildInsertElement(builder,
294 res, ex,
295 LLVMConstInt(LLVMInt32Type(), 3, 0),
296 "");
297
298 return res;
299 }
300
301 static void
302 convert_to_soa(LLVMBuilderRef builder,
303 LLVMValueRef (*aos)[NUM_CHANNELS],
304 LLVMValueRef (*soa)[NUM_CHANNELS],
305 int num_attribs)
306 {
307 int i;
308
309 debug_assert(NUM_CHANNELS == 4);
310
311 for (i = 0; i < num_attribs; ++i) {
312 LLVMValueRef val0 = aos[i][0];
313 LLVMValueRef val1 = aos[i][1];
314 LLVMValueRef val2 = aos[i][2];
315 LLVMValueRef val3 = aos[i][3];
316
317 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
318 LLVMConstInt(LLVMInt32Type(), 0, 0));
319 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
320 LLVMConstInt(LLVMInt32Type(), 1, 0));
321 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
322 LLVMConstInt(LLVMInt32Type(), 2, 0));
323 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
324 LLVMConstInt(LLVMInt32Type(), 3, 0));
325 }
326 }
327
328 void
329 draw_llvm_generate(struct draw_llvm *llvm)
330 {
331 LLVMTypeRef arg_types[6];
332 LLVMTypeRef func_type;
333 LLVMValueRef context_ptr;
334 LLVMBasicBlockRef block;
335 LLVMBuilderRef builder;
336 LLVMValueRef function;
337 LLVMValueRef start, end, count, stride, step;
338 LLVMValueRef io_ptr, vbuffers_ptr;
339 struct draw_context *draw = llvm->draw;
340 unsigned i, j;
341 struct lp_build_context bld;
342 struct lp_build_loop_state lp_loop;
343 struct lp_type vs_type = lp_type_float_vec(32);
344 const int max_vertices = 4;
345 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
346
347 arg_types[0] = llvm->context_ptr_type; /* context */
348 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
349 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
350 arg_types[3] = LLVMInt32Type(); /* start */
351 arg_types[4] = LLVMInt32Type(); /* count */
352 arg_types[5] = LLVMInt32Type(); /* stride */
353
354 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
355
356 function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
357 LLVMSetFunctionCallConv(function, LLVMCCallConv);
358 for(i = 0; i < Elements(arg_types); ++i)
359 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
360 LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
361
362 context_ptr = LLVMGetParam(function, 0);
363 io_ptr = LLVMGetParam(function, 1);
364 vbuffers_ptr = LLVMGetParam(function, 2);
365 start = LLVMGetParam(function, 3);
366 count = LLVMGetParam(function, 4);
367 stride = LLVMGetParam(function, 5);
368
369 lp_build_name(context_ptr, "context");
370 lp_build_name(io_ptr, "io");
371 lp_build_name(vbuffers_ptr, "vbuffers");
372 lp_build_name(start, "start");
373 lp_build_name(count, "count");
374 lp_build_name(stride, "stride");
375
376 /*
377 * Function body
378 */
379
380 block = LLVMAppendBasicBlock(function, "entry");
381 builder = LLVMCreateBuilder();
382 LLVMPositionBuilderAtEnd(builder, block);
383
384 lp_build_context_init(&bld, builder, vs_type);
385
386 end = lp_build_add(&bld, start, count);
387
388 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
389 lp_build_loop_begin(builder, start, &lp_loop);
390 {
391 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
392 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
393 LLVMValueRef io = LLVMBuildGEP(builder, io_ptr, &lp_loop.counter, 1, "");
394
395 for (i = 0; i < NUM_CHANNELS; ++i) {
396 LLVMValueRef true_index = LLVMBuildAdd(
397 builder,
398 lp_loop.counter,
399 LLVMConstInt(LLVMInt32Type(), i, 0), "");
400 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
401 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
402 struct pipe_vertex_buffer *vbuf = &draw->pt.vertex_buffer[
403 velem->vertex_buffer_index];
404 generate_fetch(builder, vbuffers_ptr,
405 &aos_attribs[j][i], velem, vbuf, true_index);
406 }
407 }
408 convert_to_soa(builder, aos_attribs, inputs,
409 draw->pt.nr_vertex_elements);
410
411 generate_vs(llvm,
412 builder,
413 outputs,
414 inputs,
415 context_ptr,
416 io);
417 LLVMDumpModule(llvm->module);
418 }
419 lp_build_loop_end(builder, end, step, &lp_loop);
420
421
422 LLVMBuildRetVoid(builder);
423
424 LLVMDisposeBuilder(builder);
425
426 /*
427 * Translate the LLVM IR into machine code.
428 */
429
430 #ifdef DEBUG
431 if(LLVMVerifyFunction(function, LLVMPrintMessageAction)) {
432 LLVMDumpValue(function);
433 assert(0);
434 }
435 #endif
436
437 LLVMRunFunctionPassManager(llvm->pass, function);
438
439 if (1) {
440 LLVMDumpValue(function);
441 debug_printf("\n");
442 }
443
444 llvm->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, function);
445
446 if (1)
447 lp_disassemble(llvm->jit_func);
448 }