draw: limit the number of vertex shader variants kept around
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "draw_llvm.h"
29
30 #include "draw_context.h"
31 #include "draw_vs.h"
32
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_struct.h"
35 #include "gallivm/lp_bld_type.h"
36 #include "gallivm/lp_bld_flow.h"
37 #include "gallivm/lp_bld_debug.h"
38 #include "gallivm/lp_bld_tgsi.h"
39 #include "gallivm/lp_bld_printf.h"
40
41 #include "tgsi/tgsi_exec.h"
42 #include "tgsi/tgsi_dump.h"
43
44 #include "util/u_cpu_detect.h"
45 #include "util/u_string.h"
46 #include "util/u_pointer.h"
47
48 #include <llvm-c/Transforms/Scalar.h>
49
50 #define DEBUG_STORE 0
51
52
53 /* generates the draw jit function */
54 static void
55 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
56 static void
57 draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
58
59 static void
60 init_globals(struct draw_llvm *llvm)
61 {
62 LLVMTypeRef texture_type;
63
64 /* struct draw_jit_texture */
65 {
66 LLVMTypeRef elem_types[4];
67
68 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
69 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
70 elem_types[DRAW_JIT_TEXTURE_STRIDE] = LLVMInt32Type();
71 elem_types[DRAW_JIT_TEXTURE_DATA] = LLVMPointerType(LLVMInt8Type(), 0);
72
73 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
74
75 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
76 llvm->target, texture_type,
77 DRAW_JIT_TEXTURE_WIDTH);
78 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
79 llvm->target, texture_type,
80 DRAW_JIT_TEXTURE_HEIGHT);
81 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, stride,
82 llvm->target, texture_type,
83 DRAW_JIT_TEXTURE_STRIDE);
84 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
85 llvm->target, texture_type,
86 DRAW_JIT_TEXTURE_DATA);
87 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
88 llvm->target, texture_type);
89
90 LLVMAddTypeName(llvm->module, "texture", texture_type);
91 }
92
93
94 /* struct draw_jit_context */
95 {
96 LLVMTypeRef elem_types[3];
97 LLVMTypeRef context_type;
98
99 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
100 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
101 elem_types[2] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
102
103 context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
104
105 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
106 llvm->target, context_type, 0);
107 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
108 llvm->target, context_type, 1);
109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
110 llvm->target, context_type,
111 DRAW_JIT_CONTEXT_TEXTURES_INDEX);
112 LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
113 llvm->target, context_type);
114
115 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
116
117 llvm->context_ptr_type = LLVMPointerType(context_type, 0);
118 }
119 {
120 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
121 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
122 }
123 /* struct pipe_vertex_buffer */
124 {
125 LLVMTypeRef elem_types[4];
126 LLVMTypeRef vb_type;
127
128 elem_types[0] = LLVMInt32Type();
129 elem_types[1] = LLVMInt32Type();
130 elem_types[2] = LLVMInt32Type();
131 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
132
133 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
134
135 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
136 llvm->target, vb_type, 0);
137 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
138 llvm->target, vb_type, 2);
139 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
140 llvm->target, vb_type);
141
142 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
143
144 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
145 }
146 }
147
148 static LLVMTypeRef
149 create_vertex_header(struct draw_llvm *llvm, int data_elems)
150 {
151 /* struct vertex_header */
152 LLVMTypeRef elem_types[3];
153 LLVMTypeRef vertex_header;
154 char struct_name[24];
155
156 util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
157
158 elem_types[0] = LLVMIntType(32);
159 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4);
160 elem_types[2] = LLVMArrayType(elem_types[1], data_elems);
161
162 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
163
164 /* these are bit-fields and we can't take address of them
165 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
166 llvm->target, vertex_header,
167 DRAW_JIT_VERTEX_CLIPMASK);
168 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
169 llvm->target, vertex_header,
170 DRAW_JIT_VERTEX_EDGEFLAG);
171 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
172 llvm->target, vertex_header,
173 DRAW_JIT_VERTEX_PAD);
174 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
175 llvm->target, vertex_header,
176 DRAW_JIT_VERTEX_VERTEX_ID);
177 */
178 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
179 llvm->target, vertex_header,
180 DRAW_JIT_VERTEX_CLIP);
181 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
182 llvm->target, vertex_header,
183 DRAW_JIT_VERTEX_DATA);
184
185 LLVMAddTypeName(llvm->module, struct_name, vertex_header);
186
187 return LLVMPointerType(vertex_header, 0);
188 }
189
190 struct draw_llvm *
191 draw_llvm_create(struct draw_context *draw)
192 {
193 struct draw_llvm *llvm;
194
195 #ifdef PIPE_ARCH_X86
196 util_cpu_detect();
197 /* require SSE2 due to LLVM PR6960. */
198 if (!util_cpu_caps.has_sse2)
199 return NULL;
200 #endif
201
202 llvm = CALLOC_STRUCT( draw_llvm );
203 if (!llvm)
204 return NULL;
205
206 llvm->draw = draw;
207 llvm->engine = draw->engine;
208
209 debug_assert(llvm->engine);
210
211 llvm->module = LLVMModuleCreateWithName("draw_llvm");
212 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
213
214 LLVMAddModuleProvider(llvm->engine, llvm->provider);
215
216 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
217
218 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
219 LLVMAddTargetData(llvm->target, llvm->pass);
220
221 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
222 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
223 * but there are more on SVN. */
224 /* TODO: Add more passes */
225 LLVMAddCFGSimplificationPass(llvm->pass);
226 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
227 LLVMAddConstantPropagationPass(llvm->pass);
228 if(util_cpu_caps.has_sse4_1) {
229 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
230 * and sitofp (necessary for trunc/floor/ceil/round implementation)
231 * somehow becomes invalid code.
232 */
233 LLVMAddInstructionCombiningPass(llvm->pass);
234 }
235 LLVMAddGVNPass(llvm->pass);
236 } else {
237 /* We need at least this pass to prevent the backends to fail in
238 * unexpected ways.
239 */
240 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
241 }
242
243 init_globals(llvm);
244
245 if (gallivm_debug & GALLIVM_DEBUG_IR) {
246 LLVMDumpModule(llvm->module);
247 }
248
249 llvm->nr_variants = 0;
250 make_empty_list(&llvm->vs_variants_list);
251
252 return llvm;
253 }
254
255 void
256 draw_llvm_destroy(struct draw_llvm *llvm)
257 {
258 LLVMDisposePassManager(llvm->pass);
259
260 FREE(llvm);
261 }
262
263 struct draw_llvm_variant *
264 draw_llvm_create_variant(struct draw_llvm *llvm, int num_inputs)
265 {
266 struct draw_llvm_variant *variant = MALLOC(sizeof(struct draw_llvm_variant));
267 struct llvm_vertex_shader *shader =
268 llvm_vertex_shader(llvm->draw->vs.vertex_shader);
269
270 variant->llvm = llvm;
271
272 draw_llvm_make_variant_key(llvm, &variant->key);
273
274 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
275
276 draw_llvm_generate(llvm, variant);
277 draw_llvm_generate_elts(llvm, variant);
278
279 variant->shader = shader;
280 variant->list_item_global.base = variant;
281 variant->list_item_local.base = variant;
282 /*variant->no = */shader->variants_created++;
283 variant->list_item_global.base = variant;
284
285 return variant;
286 }
287
288 static void
289 generate_vs(struct draw_llvm *llvm,
290 LLVMBuilderRef builder,
291 LLVMValueRef (*outputs)[NUM_CHANNELS],
292 const LLVMValueRef (*inputs)[NUM_CHANNELS],
293 LLVMValueRef context_ptr)
294 {
295 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
296 struct lp_type vs_type;
297 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
298
299 memset(&vs_type, 0, sizeof vs_type);
300 vs_type.floating = TRUE; /* floating point values */
301 vs_type.sign = TRUE; /* values are signed */
302 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
303 vs_type.width = 32; /* 32-bit float */
304 vs_type.length = 4; /* 4 elements per vector */
305 #if 0
306 num_vs = 4; /* number of vertices per block */
307 #endif
308
309 if (gallivm_debug & GALLIVM_DEBUG_IR) {
310 tgsi_dump(tokens, 0);
311 }
312
313 lp_build_tgsi_soa(builder,
314 tokens,
315 vs_type,
316 NULL /*struct lp_build_mask_context *mask*/,
317 consts_ptr,
318 NULL /*pos*/,
319 inputs,
320 outputs,
321 NULL/*sampler*/,
322 &llvm->draw->vs.vertex_shader->info);
323 }
324
325 #if DEBUG_STORE
326 static void print_vectorf(LLVMBuilderRef builder,
327 LLVMValueRef vec)
328 {
329 LLVMValueRef val[4];
330 val[0] = LLVMBuildExtractElement(builder, vec,
331 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
332 val[1] = LLVMBuildExtractElement(builder, vec,
333 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
334 val[2] = LLVMBuildExtractElement(builder, vec,
335 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
336 val[3] = LLVMBuildExtractElement(builder, vec,
337 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
338 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
339 val[0], val[1], val[2], val[3]);
340 }
341 #endif
342
343 static void
344 generate_fetch(LLVMBuilderRef builder,
345 LLVMValueRef vbuffers_ptr,
346 LLVMValueRef *res,
347 struct pipe_vertex_element *velem,
348 LLVMValueRef vbuf,
349 LLVMValueRef index)
350 {
351 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
352 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
353 &indices, 1, "");
354 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
355 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
356 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
357 LLVMValueRef cond;
358 LLVMValueRef stride;
359
360 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
361
362 index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
363
364 stride = LLVMBuildMul(builder, vb_stride, index, "");
365
366 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
367
368 stride = LLVMBuildAdd(builder, stride,
369 vb_buffer_offset,
370 "");
371 stride = LLVMBuildAdd(builder, stride,
372 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
373 "");
374
375 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
376 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
377
378 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
379 }
380
381 static LLVMValueRef
382 aos_to_soa(LLVMBuilderRef builder,
383 LLVMValueRef val0,
384 LLVMValueRef val1,
385 LLVMValueRef val2,
386 LLVMValueRef val3,
387 LLVMValueRef channel)
388 {
389 LLVMValueRef ex, res;
390
391 ex = LLVMBuildExtractElement(builder, val0,
392 channel, "");
393 res = LLVMBuildInsertElement(builder,
394 LLVMConstNull(LLVMTypeOf(val0)),
395 ex,
396 LLVMConstInt(LLVMInt32Type(), 0, 0),
397 "");
398
399 ex = LLVMBuildExtractElement(builder, val1,
400 channel, "");
401 res = LLVMBuildInsertElement(builder,
402 res, ex,
403 LLVMConstInt(LLVMInt32Type(), 1, 0),
404 "");
405
406 ex = LLVMBuildExtractElement(builder, val2,
407 channel, "");
408 res = LLVMBuildInsertElement(builder,
409 res, ex,
410 LLVMConstInt(LLVMInt32Type(), 2, 0),
411 "");
412
413 ex = LLVMBuildExtractElement(builder, val3,
414 channel, "");
415 res = LLVMBuildInsertElement(builder,
416 res, ex,
417 LLVMConstInt(LLVMInt32Type(), 3, 0),
418 "");
419
420 return res;
421 }
422
423 static void
424 soa_to_aos(LLVMBuilderRef builder,
425 LLVMValueRef soa[NUM_CHANNELS],
426 LLVMValueRef aos[NUM_CHANNELS])
427 {
428 LLVMValueRef comp;
429 int i = 0;
430
431 debug_assert(NUM_CHANNELS == 4);
432
433 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
434 aos[1] = aos[2] = aos[3] = aos[0];
435
436 for (i = 0; i < NUM_CHANNELS; ++i) {
437 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
438
439 comp = LLVMBuildExtractElement(builder, soa[i],
440 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
441 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
442
443 comp = LLVMBuildExtractElement(builder, soa[i],
444 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
445 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
446
447 comp = LLVMBuildExtractElement(builder, soa[i],
448 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
449 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
450
451 comp = LLVMBuildExtractElement(builder, soa[i],
452 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
453 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
454
455 }
456 }
457
458 static void
459 convert_to_soa(LLVMBuilderRef builder,
460 LLVMValueRef (*aos)[NUM_CHANNELS],
461 LLVMValueRef (*soa)[NUM_CHANNELS],
462 int num_attribs)
463 {
464 int i;
465
466 debug_assert(NUM_CHANNELS == 4);
467
468 for (i = 0; i < num_attribs; ++i) {
469 LLVMValueRef val0 = aos[i][0];
470 LLVMValueRef val1 = aos[i][1];
471 LLVMValueRef val2 = aos[i][2];
472 LLVMValueRef val3 = aos[i][3];
473
474 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
475 LLVMConstInt(LLVMInt32Type(), 0, 0));
476 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
477 LLVMConstInt(LLVMInt32Type(), 1, 0));
478 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
479 LLVMConstInt(LLVMInt32Type(), 2, 0));
480 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
481 LLVMConstInt(LLVMInt32Type(), 3, 0));
482 }
483 }
484
485 static void
486 store_aos(LLVMBuilderRef builder,
487 LLVMValueRef io_ptr,
488 LLVMValueRef index,
489 LLVMValueRef value)
490 {
491 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
492 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
493 LLVMValueRef indices[3];
494
495 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
496 indices[1] = index;
497 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
498
499 /* undefined vertex */
500 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(),
501 0xffff, 0), id_ptr);
502
503 #if DEBUG_STORE
504 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
505 #endif
506 #if 0
507 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
508 print_vectorf(builder, value);*/
509 data_ptr = LLVMBuildBitCast(builder, data_ptr,
510 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
511 "datavec");
512 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
513
514 LLVMBuildStore(builder, value, data_ptr);
515 #else
516 {
517 LLVMValueRef x, y, z, w;
518 LLVMValueRef idx0, idx1, idx2, idx3;
519 LLVMValueRef gep0, gep1, gep2, gep3;
520 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
521
522 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
523 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
524 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
525 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
526
527 x = LLVMBuildExtractElement(builder, value,
528 idx0, "");
529 y = LLVMBuildExtractElement(builder, value,
530 idx1, "");
531 z = LLVMBuildExtractElement(builder, value,
532 idx2, "");
533 w = LLVMBuildExtractElement(builder, value,
534 idx3, "");
535
536 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
537 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
538 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
539 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
540
541 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
542 x, gep0, y, gep1, z, gep2, w, gep3);*/
543 LLVMBuildStore(builder, x, gep0);
544 LLVMBuildStore(builder, y, gep1);
545 LLVMBuildStore(builder, z, gep2);
546 LLVMBuildStore(builder, w, gep3);
547 }
548 #endif
549 }
550
551 static void
552 store_aos_array(LLVMBuilderRef builder,
553 LLVMValueRef io_ptr,
554 LLVMValueRef aos[NUM_CHANNELS],
555 int attrib,
556 int num_outputs)
557 {
558 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
559 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
560 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
561 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
562 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
563 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
564
565 debug_assert(NUM_CHANNELS == 4);
566
567 io0_ptr = LLVMBuildGEP(builder, io_ptr,
568 &ind0, 1, "");
569 io1_ptr = LLVMBuildGEP(builder, io_ptr,
570 &ind1, 1, "");
571 io2_ptr = LLVMBuildGEP(builder, io_ptr,
572 &ind2, 1, "");
573 io3_ptr = LLVMBuildGEP(builder, io_ptr,
574 &ind3, 1, "");
575
576 #if DEBUG_STORE
577 lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n",
578 io_ptr, ind0, ind1, ind2, ind3);
579 #endif
580
581 store_aos(builder, io0_ptr, attr_index, aos[0]);
582 store_aos(builder, io1_ptr, attr_index, aos[1]);
583 store_aos(builder, io2_ptr, attr_index, aos[2]);
584 store_aos(builder, io3_ptr, attr_index, aos[3]);
585 }
586
587 static void
588 convert_to_aos(LLVMBuilderRef builder,
589 LLVMValueRef io,
590 LLVMValueRef (*outputs)[NUM_CHANNELS],
591 int num_outputs,
592 int max_vertices)
593 {
594 unsigned chan, attrib;
595
596 #if DEBUG_STORE
597 lp_build_printf(builder, " # storing begin\n");
598 #endif
599 for (attrib = 0; attrib < num_outputs; ++attrib) {
600 LLVMValueRef soa[4];
601 LLVMValueRef aos[4];
602 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
603 if(outputs[attrib][chan]) {
604 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
605 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
606 /*lp_build_printf(builder, "output %d : %d ",
607 LLVMConstInt(LLVMInt32Type(), attrib, 0),
608 LLVMConstInt(LLVMInt32Type(), chan, 0));
609 print_vectorf(builder, out);*/
610 soa[chan] = out;
611 } else
612 soa[chan] = 0;
613 }
614 soa_to_aos(builder, soa, aos);
615 store_aos_array(builder,
616 io,
617 aos,
618 attrib,
619 num_outputs);
620 }
621 #if DEBUG_STORE
622 lp_build_printf(builder, " # storing end\n");
623 #endif
624 }
625
626 static void
627 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
628 {
629 LLVMTypeRef arg_types[7];
630 LLVMTypeRef func_type;
631 LLVMValueRef context_ptr;
632 LLVMBasicBlockRef block;
633 LLVMBuilderRef builder;
634 LLVMValueRef start, end, count, stride, step, io_itr;
635 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
636 struct draw_context *draw = llvm->draw;
637 unsigned i, j;
638 struct lp_build_context bld;
639 struct lp_build_loop_state lp_loop;
640 struct lp_type vs_type = lp_type_float_vec(32);
641 const int max_vertices = 4;
642 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
643 void *code;
644
645 arg_types[0] = llvm->context_ptr_type; /* context */
646 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
647 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
648 arg_types[3] = LLVMInt32Type(); /* start */
649 arg_types[4] = LLVMInt32Type(); /* count */
650 arg_types[5] = LLVMInt32Type(); /* stride */
651 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
652
653 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
654
655 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
656 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
657 for(i = 0; i < Elements(arg_types); ++i)
658 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
659 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
660
661 context_ptr = LLVMGetParam(variant->function, 0);
662 io_ptr = LLVMGetParam(variant->function, 1);
663 vbuffers_ptr = LLVMGetParam(variant->function, 2);
664 start = LLVMGetParam(variant->function, 3);
665 count = LLVMGetParam(variant->function, 4);
666 stride = LLVMGetParam(variant->function, 5);
667 vb_ptr = LLVMGetParam(variant->function, 6);
668
669 lp_build_name(context_ptr, "context");
670 lp_build_name(io_ptr, "io");
671 lp_build_name(vbuffers_ptr, "vbuffers");
672 lp_build_name(start, "start");
673 lp_build_name(count, "count");
674 lp_build_name(stride, "stride");
675 lp_build_name(vb_ptr, "vb");
676
677 /*
678 * Function body
679 */
680
681 block = LLVMAppendBasicBlock(variant->function, "entry");
682 builder = LLVMCreateBuilder();
683 LLVMPositionBuilderAtEnd(builder, block);
684
685 lp_build_context_init(&bld, builder, vs_type);
686
687 end = lp_build_add(&bld, start, count);
688
689 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
690
691 #if DEBUG_STORE
692 lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
693 start, end, step);
694 #endif
695 lp_build_loop_begin(builder, start, &lp_loop);
696 {
697 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
698 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
699 LLVMValueRef io;
700 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
701
702 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
703 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
704 #if DEBUG_STORE
705 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
706 io_itr, io, lp_loop.counter);
707 #endif
708 for (i = 0; i < NUM_CHANNELS; ++i) {
709 LLVMValueRef true_index = LLVMBuildAdd(
710 builder,
711 lp_loop.counter,
712 LLVMConstInt(LLVMInt32Type(), i, 0), "");
713 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
714 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
715 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
716 velem->vertex_buffer_index,
717 0);
718 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
719 &vb_index, 1, "");
720 generate_fetch(builder, vbuffers_ptr,
721 &aos_attribs[j][i], velem, vb, true_index);
722 }
723 }
724 convert_to_soa(builder, aos_attribs, inputs,
725 draw->pt.nr_vertex_elements);
726
727 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
728 generate_vs(llvm,
729 builder,
730 outputs,
731 ptr_aos,
732 context_ptr);
733
734 convert_to_aos(builder, io, outputs,
735 draw->vs.vertex_shader->info.num_outputs,
736 max_vertices);
737 }
738 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
739
740 LLVMBuildRetVoid(builder);
741
742 LLVMDisposeBuilder(builder);
743
744 /*
745 * Translate the LLVM IR into machine code.
746 */
747 #ifdef DEBUG
748 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
749 lp_debug_dump_value(variant->function);
750 assert(0);
751 }
752 #endif
753
754 LLVMRunFunctionPassManager(llvm->pass, variant->function);
755
756 if (gallivm_debug & GALLIVM_DEBUG_IR) {
757 lp_debug_dump_value(variant->function);
758 debug_printf("\n");
759 }
760
761 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
762 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
763
764 if (gallivm_debug & GALLIVM_DEBUG_ASM) {
765 lp_disassemble(code);
766 }
767 }
768
769
770 static void
771 draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
772 {
773 LLVMTypeRef arg_types[7];
774 LLVMTypeRef func_type;
775 LLVMValueRef context_ptr;
776 LLVMBasicBlockRef block;
777 LLVMBuilderRef builder;
778 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
779 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
780 struct draw_context *draw = llvm->draw;
781 unsigned i, j;
782 struct lp_build_context bld;
783 struct lp_build_context bld_int;
784 struct lp_build_loop_state lp_loop;
785 struct lp_type vs_type = lp_type_float_vec(32);
786 const int max_vertices = 4;
787 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
788 LLVMValueRef fetch_max;
789 void *code;
790
791 arg_types[0] = llvm->context_ptr_type; /* context */
792 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
793 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
794 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
795 arg_types[4] = LLVMInt32Type(); /* fetch_count */
796 arg_types[5] = LLVMInt32Type(); /* stride */
797 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
798
799 func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
800
801 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
802 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
803 for(i = 0; i < Elements(arg_types); ++i)
804 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
805 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
806
807 context_ptr = LLVMGetParam(variant->function_elts, 0);
808 io_ptr = LLVMGetParam(variant->function_elts, 1);
809 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
810 fetch_elts = LLVMGetParam(variant->function_elts, 3);
811 fetch_count = LLVMGetParam(variant->function_elts, 4);
812 stride = LLVMGetParam(variant->function_elts, 5);
813 vb_ptr = LLVMGetParam(variant->function_elts, 6);
814
815 lp_build_name(context_ptr, "context");
816 lp_build_name(io_ptr, "io");
817 lp_build_name(vbuffers_ptr, "vbuffers");
818 lp_build_name(fetch_elts, "fetch_elts");
819 lp_build_name(fetch_count, "fetch_count");
820 lp_build_name(stride, "stride");
821 lp_build_name(vb_ptr, "vb");
822
823 /*
824 * Function body
825 */
826
827 block = LLVMAppendBasicBlock(variant->function_elts, "entry");
828 builder = LLVMCreateBuilder();
829 LLVMPositionBuilderAtEnd(builder, block);
830
831 lp_build_context_init(&bld, builder, vs_type);
832 lp_build_context_init(&bld_int, builder, lp_type_int(32));
833
834 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
835
836 fetch_max = LLVMBuildSub(builder, fetch_count,
837 LLVMConstInt(LLVMInt32Type(), 1, 0),
838 "fetch_max");
839
840 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
841 {
842 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
843 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
844 LLVMValueRef io;
845 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
846
847 io_itr = lp_loop.counter;
848 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
849 #if DEBUG_STORE
850 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
851 io_itr, io, lp_loop.counter);
852 #endif
853 for (i = 0; i < NUM_CHANNELS; ++i) {
854 LLVMValueRef true_index = LLVMBuildAdd(
855 builder,
856 lp_loop.counter,
857 LLVMConstInt(LLVMInt32Type(), i, 0), "");
858 LLVMValueRef fetch_ptr;
859
860 /* make sure we're not out of bounds which can happen
861 * if fetch_count % 4 != 0, because on the last iteration
862 * a few of the 4 vertex fetches will be out of bounds */
863 true_index = lp_build_min(&bld_int, true_index, fetch_max);
864
865 fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
866 &true_index, 1, "");
867 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
868 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
869 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
870 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
871 velem->vertex_buffer_index,
872 0);
873 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
874 &vb_index, 1, "");
875 generate_fetch(builder, vbuffers_ptr,
876 &aos_attribs[j][i], velem, vb, true_index);
877 }
878 }
879 convert_to_soa(builder, aos_attribs, inputs,
880 draw->pt.nr_vertex_elements);
881
882 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
883 generate_vs(llvm,
884 builder,
885 outputs,
886 ptr_aos,
887 context_ptr);
888
889 convert_to_aos(builder, io, outputs,
890 draw->vs.vertex_shader->info.num_outputs,
891 max_vertices);
892 }
893 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
894
895 LLVMBuildRetVoid(builder);
896
897 LLVMDisposeBuilder(builder);
898
899 /*
900 * Translate the LLVM IR into machine code.
901 */
902 #ifdef DEBUG
903 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
904 lp_debug_dump_value(variant->function_elts);
905 assert(0);
906 }
907 #endif
908
909 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
910
911 if (gallivm_debug & GALLIVM_DEBUG_IR) {
912 lp_debug_dump_value(variant->function_elts);
913 debug_printf("\n");
914 }
915
916 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
917 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
918
919 if (gallivm_debug & GALLIVM_DEBUG_ASM) {
920 lp_disassemble(code);
921 }
922 }
923
924 void
925 draw_llvm_make_variant_key(struct draw_llvm *llvm,
926 struct draw_llvm_variant_key *key)
927 {
928 memset(key, 0, sizeof(struct draw_llvm_variant_key));
929
930 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
931
932 memcpy(key->vertex_element,
933 llvm->draw->pt.vertex_element,
934 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
935
936 memcpy(&key->vs,
937 &llvm->draw->vs.vertex_shader->state,
938 sizeof(struct pipe_shader_state));
939 }
940
941 void
942 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
943 {
944 struct draw_llvm *llvm = variant->llvm;
945 struct draw_context *draw = llvm->draw;
946
947 if (variant->function_elts) {
948 if (variant->function_elts)
949 LLVMFreeMachineCodeForFunction(draw->engine,
950 variant->function_elts);
951 LLVMDeleteFunction(variant->function_elts);
952 }
953
954 if (variant->function) {
955 if (variant->function)
956 LLVMFreeMachineCodeForFunction(draw->engine,
957 variant->function);
958 LLVMDeleteFunction(variant->function);
959 }
960
961 remove_from_list(&variant->list_item_local);
962 variant->shader->variants_cached--;
963 remove_from_list(&variant->list_item_global);
964 llvm->nr_variants--;
965 FREE(variant);
966 }