1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "draw_llvm.h"
30 #include "draw_context.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_struct.h"
35 #include "gallivm/lp_bld_type.h"
36 #include "gallivm/lp_bld_flow.h"
37 #include "gallivm/lp_bld_debug.h"
38 #include "gallivm/lp_bld_tgsi.h"
39 #include "gallivm/lp_bld_printf.h"
41 #include "tgsi/tgsi_exec.h"
42 #include "tgsi/tgsi_dump.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_string.h"
46 #include "util/u_pointer.h"
48 #include <llvm-c/Transforms/Scalar.h>
53 /* generates the draw jit function */
55 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
57 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
60 init_globals(struct draw_llvm
*llvm
)
62 LLVMTypeRef texture_type
;
64 /* struct draw_jit_texture */
66 LLVMTypeRef elem_types
[4];
68 elem_types
[DRAW_JIT_TEXTURE_WIDTH
] = LLVMInt32Type();
69 elem_types
[DRAW_JIT_TEXTURE_HEIGHT
] = LLVMInt32Type();
70 elem_types
[DRAW_JIT_TEXTURE_STRIDE
] = LLVMInt32Type();
71 elem_types
[DRAW_JIT_TEXTURE_DATA
] = LLVMPointerType(LLVMInt8Type(), 0);
73 texture_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
75 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, width
,
76 llvm
->target
, texture_type
,
77 DRAW_JIT_TEXTURE_WIDTH
);
78 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, height
,
79 llvm
->target
, texture_type
,
80 DRAW_JIT_TEXTURE_HEIGHT
);
81 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, stride
,
82 llvm
->target
, texture_type
,
83 DRAW_JIT_TEXTURE_STRIDE
);
84 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, data
,
85 llvm
->target
, texture_type
,
86 DRAW_JIT_TEXTURE_DATA
);
87 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture
,
88 llvm
->target
, texture_type
);
90 LLVMAddTypeName(llvm
->module
, "texture", texture_type
);
94 /* struct draw_jit_context */
96 LLVMTypeRef elem_types
[3];
97 LLVMTypeRef context_type
;
99 elem_types
[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
100 elem_types
[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
101 elem_types
[2] = LLVMArrayType(texture_type
, PIPE_MAX_SAMPLERS
); /* textures */
103 context_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
105 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, vs_constants
,
106 llvm
->target
, context_type
, 0);
107 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, gs_constants
,
108 llvm
->target
, context_type
, 1);
109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, textures
,
110 llvm
->target
, context_type
,
111 DRAW_JIT_CONTEXT_TEXTURES_INDEX
);
112 LP_CHECK_STRUCT_SIZE(struct draw_jit_context
,
113 llvm
->target
, context_type
);
115 LLVMAddTypeName(llvm
->module
, "draw_jit_context", context_type
);
117 llvm
->context_ptr_type
= LLVMPointerType(context_type
, 0);
120 LLVMTypeRef buffer_ptr
= LLVMPointerType(LLVMIntType(8), 0);
121 llvm
->buffer_ptr_type
= LLVMPointerType(buffer_ptr
, 0);
123 /* struct pipe_vertex_buffer */
125 LLVMTypeRef elem_types
[4];
128 elem_types
[0] = LLVMInt32Type();
129 elem_types
[1] = LLVMInt32Type();
130 elem_types
[2] = LLVMInt32Type();
131 elem_types
[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
133 vb_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
135 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, stride
,
136 llvm
->target
, vb_type
, 0);
137 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, buffer_offset
,
138 llvm
->target
, vb_type
, 2);
139 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer
,
140 llvm
->target
, vb_type
);
142 LLVMAddTypeName(llvm
->module
, "pipe_vertex_buffer", vb_type
);
144 llvm
->vb_ptr_type
= LLVMPointerType(vb_type
, 0);
149 create_vertex_header(struct draw_llvm
*llvm
, int data_elems
)
151 /* struct vertex_header */
152 LLVMTypeRef elem_types
[3];
153 LLVMTypeRef vertex_header
;
154 char struct_name
[24];
156 util_snprintf(struct_name
, 23, "vertex_header%d", data_elems
);
158 elem_types
[0] = LLVMIntType(32);
159 elem_types
[1] = LLVMArrayType(LLVMFloatType(), 4);
160 elem_types
[2] = LLVMArrayType(elem_types
[1], data_elems
);
162 vertex_header
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
164 /* these are bit-fields and we can't take address of them
165 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
166 llvm->target, vertex_header,
167 DRAW_JIT_VERTEX_CLIPMASK);
168 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
169 llvm->target, vertex_header,
170 DRAW_JIT_VERTEX_EDGEFLAG);
171 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
172 llvm->target, vertex_header,
173 DRAW_JIT_VERTEX_PAD);
174 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
175 llvm->target, vertex_header,
176 DRAW_JIT_VERTEX_VERTEX_ID);
178 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, clip
,
179 llvm
->target
, vertex_header
,
180 DRAW_JIT_VERTEX_CLIP
);
181 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, data
,
182 llvm
->target
, vertex_header
,
183 DRAW_JIT_VERTEX_DATA
);
185 LLVMAddTypeName(llvm
->module
, struct_name
, vertex_header
);
187 return LLVMPointerType(vertex_header
, 0);
191 draw_llvm_create(struct draw_context
*draw
)
193 struct draw_llvm
*llvm
;
197 /* require SSE2 due to LLVM PR6960. */
198 if (!util_cpu_caps
.has_sse2
)
202 llvm
= CALLOC_STRUCT( draw_llvm
);
207 llvm
->engine
= draw
->engine
;
209 debug_assert(llvm
->engine
);
211 llvm
->module
= LLVMModuleCreateWithName("draw_llvm");
212 llvm
->provider
= LLVMCreateModuleProviderForExistingModule(llvm
->module
);
214 LLVMAddModuleProvider(llvm
->engine
, llvm
->provider
);
216 llvm
->target
= LLVMGetExecutionEngineTargetData(llvm
->engine
);
218 llvm
->pass
= LLVMCreateFunctionPassManager(llvm
->provider
);
219 LLVMAddTargetData(llvm
->target
, llvm
->pass
);
221 if ((gallivm_debug
& GALLIVM_DEBUG_NO_OPT
) == 0) {
222 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
223 * but there are more on SVN. */
224 /* TODO: Add more passes */
225 LLVMAddCFGSimplificationPass(llvm
->pass
);
226 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
227 LLVMAddConstantPropagationPass(llvm
->pass
);
228 if(util_cpu_caps
.has_sse4_1
) {
229 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
230 * and sitofp (necessary for trunc/floor/ceil/round implementation)
231 * somehow becomes invalid code.
233 LLVMAddInstructionCombiningPass(llvm
->pass
);
235 LLVMAddGVNPass(llvm
->pass
);
237 /* We need at least this pass to prevent the backends to fail in
240 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
245 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
246 LLVMDumpModule(llvm
->module
);
249 llvm
->nr_variants
= 0;
250 make_empty_list(&llvm
->vs_variants_list
);
256 draw_llvm_destroy(struct draw_llvm
*llvm
)
258 LLVMDisposePassManager(llvm
->pass
);
263 struct draw_llvm_variant
*
264 draw_llvm_create_variant(struct draw_llvm
*llvm
, int num_inputs
)
266 struct draw_llvm_variant
*variant
= MALLOC(sizeof(struct draw_llvm_variant
));
267 struct llvm_vertex_shader
*shader
=
268 llvm_vertex_shader(llvm
->draw
->vs
.vertex_shader
);
270 variant
->llvm
= llvm
;
272 draw_llvm_make_variant_key(llvm
, &variant
->key
);
274 llvm
->vertex_header_ptr_type
= create_vertex_header(llvm
, num_inputs
);
276 draw_llvm_generate(llvm
, variant
);
277 draw_llvm_generate_elts(llvm
, variant
);
279 variant
->shader
= shader
;
280 variant
->list_item_global
.base
= variant
;
281 variant
->list_item_local
.base
= variant
;
282 /*variant->no = */shader
->variants_created
++;
283 variant
->list_item_global
.base
= variant
;
289 generate_vs(struct draw_llvm
*llvm
,
290 LLVMBuilderRef builder
,
291 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
292 const LLVMValueRef (*inputs
)[NUM_CHANNELS
],
293 LLVMValueRef context_ptr
)
295 const struct tgsi_token
*tokens
= llvm
->draw
->vs
.vertex_shader
->state
.tokens
;
296 struct lp_type vs_type
;
297 LLVMValueRef consts_ptr
= draw_jit_context_vs_constants(builder
, context_ptr
);
299 memset(&vs_type
, 0, sizeof vs_type
);
300 vs_type
.floating
= TRUE
; /* floating point values */
301 vs_type
.sign
= TRUE
; /* values are signed */
302 vs_type
.norm
= FALSE
; /* values are not limited to [0,1] or [-1,1] */
303 vs_type
.width
= 32; /* 32-bit float */
304 vs_type
.length
= 4; /* 4 elements per vector */
306 num_vs
= 4; /* number of vertices per block */
309 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
310 tgsi_dump(tokens
, 0);
313 lp_build_tgsi_soa(builder
,
316 NULL
/*struct lp_build_mask_context *mask*/,
322 &llvm
->draw
->vs
.vertex_shader
->info
);
326 static void print_vectorf(LLVMBuilderRef builder
,
330 val
[0] = LLVMBuildExtractElement(builder
, vec
,
331 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
332 val
[1] = LLVMBuildExtractElement(builder
, vec
,
333 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
334 val
[2] = LLVMBuildExtractElement(builder
, vec
,
335 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
336 val
[3] = LLVMBuildExtractElement(builder
, vec
,
337 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
338 lp_build_printf(builder
, "vector = [%f, %f, %f, %f]\n",
339 val
[0], val
[1], val
[2], val
[3]);
344 generate_fetch(LLVMBuilderRef builder
,
345 LLVMValueRef vbuffers_ptr
,
347 struct pipe_vertex_element
*velem
,
351 LLVMValueRef indices
= LLVMConstInt(LLVMInt64Type(), velem
->vertex_buffer_index
, 0);
352 LLVMValueRef vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffers_ptr
,
354 LLVMValueRef vb_stride
= draw_jit_vbuffer_stride(builder
, vbuf
);
355 LLVMValueRef vb_max_index
= draw_jit_vbuffer_max_index(builder
, vbuf
);
356 LLVMValueRef vb_buffer_offset
= draw_jit_vbuffer_offset(builder
, vbuf
);
360 cond
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, vb_max_index
, "");
362 index
= LLVMBuildSelect(builder
, cond
, index
, vb_max_index
, "");
364 stride
= LLVMBuildMul(builder
, vb_stride
, index
, "");
366 vbuffer_ptr
= LLVMBuildLoad(builder
, vbuffer_ptr
, "vbuffer");
368 stride
= LLVMBuildAdd(builder
, stride
,
371 stride
= LLVMBuildAdd(builder
, stride
,
372 LLVMConstInt(LLVMInt32Type(), velem
->src_offset
, 0),
375 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
376 vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffer_ptr
, &stride
, 1, "");
378 *res
= draw_llvm_translate_from(builder
, vbuffer_ptr
, velem
->src_format
);
382 aos_to_soa(LLVMBuilderRef builder
,
387 LLVMValueRef channel
)
389 LLVMValueRef ex
, res
;
391 ex
= LLVMBuildExtractElement(builder
, val0
,
393 res
= LLVMBuildInsertElement(builder
,
394 LLVMConstNull(LLVMTypeOf(val0
)),
396 LLVMConstInt(LLVMInt32Type(), 0, 0),
399 ex
= LLVMBuildExtractElement(builder
, val1
,
401 res
= LLVMBuildInsertElement(builder
,
403 LLVMConstInt(LLVMInt32Type(), 1, 0),
406 ex
= LLVMBuildExtractElement(builder
, val2
,
408 res
= LLVMBuildInsertElement(builder
,
410 LLVMConstInt(LLVMInt32Type(), 2, 0),
413 ex
= LLVMBuildExtractElement(builder
, val3
,
415 res
= LLVMBuildInsertElement(builder
,
417 LLVMConstInt(LLVMInt32Type(), 3, 0),
424 soa_to_aos(LLVMBuilderRef builder
,
425 LLVMValueRef soa
[NUM_CHANNELS
],
426 LLVMValueRef aos
[NUM_CHANNELS
])
431 debug_assert(NUM_CHANNELS
== 4);
433 aos
[0] = LLVMConstNull(LLVMTypeOf(soa
[0]));
434 aos
[1] = aos
[2] = aos
[3] = aos
[0];
436 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
437 LLVMValueRef channel
= LLVMConstInt(LLVMInt32Type(), i
, 0);
439 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
440 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
441 aos
[0] = LLVMBuildInsertElement(builder
, aos
[0], comp
, channel
, "");
443 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
444 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
445 aos
[1] = LLVMBuildInsertElement(builder
, aos
[1], comp
, channel
, "");
447 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
448 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
449 aos
[2] = LLVMBuildInsertElement(builder
, aos
[2], comp
, channel
, "");
451 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
452 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
453 aos
[3] = LLVMBuildInsertElement(builder
, aos
[3], comp
, channel
, "");
459 convert_to_soa(LLVMBuilderRef builder
,
460 LLVMValueRef (*aos
)[NUM_CHANNELS
],
461 LLVMValueRef (*soa
)[NUM_CHANNELS
],
466 debug_assert(NUM_CHANNELS
== 4);
468 for (i
= 0; i
< num_attribs
; ++i
) {
469 LLVMValueRef val0
= aos
[i
][0];
470 LLVMValueRef val1
= aos
[i
][1];
471 LLVMValueRef val2
= aos
[i
][2];
472 LLVMValueRef val3
= aos
[i
][3];
474 soa
[i
][0] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
475 LLVMConstInt(LLVMInt32Type(), 0, 0));
476 soa
[i
][1] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
477 LLVMConstInt(LLVMInt32Type(), 1, 0));
478 soa
[i
][2] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
479 LLVMConstInt(LLVMInt32Type(), 2, 0));
480 soa
[i
][3] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
481 LLVMConstInt(LLVMInt32Type(), 3, 0));
486 store_aos(LLVMBuilderRef builder
,
491 LLVMValueRef id_ptr
= draw_jit_header_id(builder
, io_ptr
);
492 LLVMValueRef data_ptr
= draw_jit_header_data(builder
, io_ptr
);
493 LLVMValueRef indices
[3];
495 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
497 indices
[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
499 /* undefined vertex */
500 LLVMBuildStore(builder
, LLVMConstInt(LLVMInt32Type(),
504 lp_build_printf(builder
, " ---- %p storing attribute %d (io = %p)\n", data_ptr
, index
, io_ptr
);
507 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
508 print_vectorf(builder, value);*/
509 data_ptr
= LLVMBuildBitCast(builder
, data_ptr
,
510 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
512 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 2, "");
514 LLVMBuildStore(builder
, value
, data_ptr
);
517 LLVMValueRef x
, y
, z
, w
;
518 LLVMValueRef idx0
, idx1
, idx2
, idx3
;
519 LLVMValueRef gep0
, gep1
, gep2
, gep3
;
520 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 3, "");
522 idx0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
523 idx1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
524 idx2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
525 idx3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
527 x
= LLVMBuildExtractElement(builder
, value
,
529 y
= LLVMBuildExtractElement(builder
, value
,
531 z
= LLVMBuildExtractElement(builder
, value
,
533 w
= LLVMBuildExtractElement(builder
, value
,
536 gep0
= LLVMBuildGEP(builder
, data_ptr
, &idx0
, 1, "");
537 gep1
= LLVMBuildGEP(builder
, data_ptr
, &idx1
, 1, "");
538 gep2
= LLVMBuildGEP(builder
, data_ptr
, &idx2
, 1, "");
539 gep3
= LLVMBuildGEP(builder
, data_ptr
, &idx3
, 1, "");
541 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
542 x, gep0, y, gep1, z, gep2, w, gep3);*/
543 LLVMBuildStore(builder
, x
, gep0
);
544 LLVMBuildStore(builder
, y
, gep1
);
545 LLVMBuildStore(builder
, z
, gep2
);
546 LLVMBuildStore(builder
, w
, gep3
);
552 store_aos_array(LLVMBuilderRef builder
,
554 LLVMValueRef aos
[NUM_CHANNELS
],
558 LLVMValueRef attr_index
= LLVMConstInt(LLVMInt32Type(), attrib
, 0);
559 LLVMValueRef ind0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
560 LLVMValueRef ind1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
561 LLVMValueRef ind2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
562 LLVMValueRef ind3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
563 LLVMValueRef io0_ptr
, io1_ptr
, io2_ptr
, io3_ptr
;
565 debug_assert(NUM_CHANNELS
== 4);
567 io0_ptr
= LLVMBuildGEP(builder
, io_ptr
,
569 io1_ptr
= LLVMBuildGEP(builder
, io_ptr
,
571 io2_ptr
= LLVMBuildGEP(builder
, io_ptr
,
573 io3_ptr
= LLVMBuildGEP(builder
, io_ptr
,
577 lp_build_printf(builder
, " io = %p, indexes[%d, %d, %d, %d]\n",
578 io_ptr
, ind0
, ind1
, ind2
, ind3
);
581 store_aos(builder
, io0_ptr
, attr_index
, aos
[0]);
582 store_aos(builder
, io1_ptr
, attr_index
, aos
[1]);
583 store_aos(builder
, io2_ptr
, attr_index
, aos
[2]);
584 store_aos(builder
, io3_ptr
, attr_index
, aos
[3]);
588 convert_to_aos(LLVMBuilderRef builder
,
590 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
594 unsigned chan
, attrib
;
597 lp_build_printf(builder
, " # storing begin\n");
599 for (attrib
= 0; attrib
< num_outputs
; ++attrib
) {
602 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
603 if(outputs
[attrib
][chan
]) {
604 LLVMValueRef out
= LLVMBuildLoad(builder
, outputs
[attrib
][chan
], "");
605 lp_build_name(out
, "output%u.%c", attrib
, "xyzw"[chan
]);
606 /*lp_build_printf(builder, "output %d : %d ",
607 LLVMConstInt(LLVMInt32Type(), attrib, 0),
608 LLVMConstInt(LLVMInt32Type(), chan, 0));
609 print_vectorf(builder, out);*/
614 soa_to_aos(builder
, soa
, aos
);
615 store_aos_array(builder
,
622 lp_build_printf(builder
, " # storing end\n");
627 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
629 LLVMTypeRef arg_types
[7];
630 LLVMTypeRef func_type
;
631 LLVMValueRef context_ptr
;
632 LLVMBasicBlockRef block
;
633 LLVMBuilderRef builder
;
634 LLVMValueRef start
, end
, count
, stride
, step
, io_itr
;
635 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
636 struct draw_context
*draw
= llvm
->draw
;
638 struct lp_build_context bld
;
639 struct lp_build_loop_state lp_loop
;
640 struct lp_type vs_type
= lp_type_float_vec(32);
641 const int max_vertices
= 4;
642 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
645 arg_types
[0] = llvm
->context_ptr_type
; /* context */
646 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
647 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
648 arg_types
[3] = LLVMInt32Type(); /* start */
649 arg_types
[4] = LLVMInt32Type(); /* count */
650 arg_types
[5] = LLVMInt32Type(); /* stride */
651 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
653 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
655 variant
->function
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader", func_type
);
656 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
657 for(i
= 0; i
< Elements(arg_types
); ++i
)
658 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
659 LLVMAddAttribute(LLVMGetParam(variant
->function
, i
), LLVMNoAliasAttribute
);
661 context_ptr
= LLVMGetParam(variant
->function
, 0);
662 io_ptr
= LLVMGetParam(variant
->function
, 1);
663 vbuffers_ptr
= LLVMGetParam(variant
->function
, 2);
664 start
= LLVMGetParam(variant
->function
, 3);
665 count
= LLVMGetParam(variant
->function
, 4);
666 stride
= LLVMGetParam(variant
->function
, 5);
667 vb_ptr
= LLVMGetParam(variant
->function
, 6);
669 lp_build_name(context_ptr
, "context");
670 lp_build_name(io_ptr
, "io");
671 lp_build_name(vbuffers_ptr
, "vbuffers");
672 lp_build_name(start
, "start");
673 lp_build_name(count
, "count");
674 lp_build_name(stride
, "stride");
675 lp_build_name(vb_ptr
, "vb");
681 block
= LLVMAppendBasicBlock(variant
->function
, "entry");
682 builder
= LLVMCreateBuilder();
683 LLVMPositionBuilderAtEnd(builder
, block
);
685 lp_build_context_init(&bld
, builder
, vs_type
);
687 end
= lp_build_add(&bld
, start
, count
);
689 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
692 lp_build_printf(builder
, "start = %d, end = %d, step = %d\n",
695 lp_build_loop_begin(builder
, start
, &lp_loop
);
697 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
698 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
700 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
702 io_itr
= LLVMBuildSub(builder
, lp_loop
.counter
, start
, "");
703 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
705 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
706 io_itr
, io
, lp_loop
.counter
);
708 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
709 LLVMValueRef true_index
= LLVMBuildAdd(
712 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
713 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
714 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
715 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
716 velem
->vertex_buffer_index
,
718 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
720 generate_fetch(builder
, vbuffers_ptr
,
721 &aos_attribs
[j
][i
], velem
, vb
, true_index
);
724 convert_to_soa(builder
, aos_attribs
, inputs
,
725 draw
->pt
.nr_vertex_elements
);
727 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
734 convert_to_aos(builder
, io
, outputs
,
735 draw
->vs
.vertex_shader
->info
.num_outputs
,
738 lp_build_loop_end_cond(builder
, end
, step
, LLVMIntUGE
, &lp_loop
);
740 LLVMBuildRetVoid(builder
);
742 LLVMDisposeBuilder(builder
);
745 * Translate the LLVM IR into machine code.
748 if(LLVMVerifyFunction(variant
->function
, LLVMPrintMessageAction
)) {
749 lp_debug_dump_value(variant
->function
);
754 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function
);
756 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
757 lp_debug_dump_value(variant
->function
);
761 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function
);
762 variant
->jit_func
= (draw_jit_vert_func
)pointer_to_func(code
);
764 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
765 lp_disassemble(code
);
771 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
773 LLVMTypeRef arg_types
[7];
774 LLVMTypeRef func_type
;
775 LLVMValueRef context_ptr
;
776 LLVMBasicBlockRef block
;
777 LLVMBuilderRef builder
;
778 LLVMValueRef fetch_elts
, fetch_count
, stride
, step
, io_itr
;
779 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
780 struct draw_context
*draw
= llvm
->draw
;
782 struct lp_build_context bld
;
783 struct lp_build_context bld_int
;
784 struct lp_build_loop_state lp_loop
;
785 struct lp_type vs_type
= lp_type_float_vec(32);
786 const int max_vertices
= 4;
787 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
788 LLVMValueRef fetch_max
;
791 arg_types
[0] = llvm
->context_ptr_type
; /* context */
792 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
793 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
794 arg_types
[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
795 arg_types
[4] = LLVMInt32Type(); /* fetch_count */
796 arg_types
[5] = LLVMInt32Type(); /* stride */
797 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
799 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
801 variant
->function_elts
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader_elts", func_type
);
802 LLVMSetFunctionCallConv(variant
->function_elts
, LLVMCCallConv
);
803 for(i
= 0; i
< Elements(arg_types
); ++i
)
804 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
805 LLVMAddAttribute(LLVMGetParam(variant
->function_elts
, i
), LLVMNoAliasAttribute
);
807 context_ptr
= LLVMGetParam(variant
->function_elts
, 0);
808 io_ptr
= LLVMGetParam(variant
->function_elts
, 1);
809 vbuffers_ptr
= LLVMGetParam(variant
->function_elts
, 2);
810 fetch_elts
= LLVMGetParam(variant
->function_elts
, 3);
811 fetch_count
= LLVMGetParam(variant
->function_elts
, 4);
812 stride
= LLVMGetParam(variant
->function_elts
, 5);
813 vb_ptr
= LLVMGetParam(variant
->function_elts
, 6);
815 lp_build_name(context_ptr
, "context");
816 lp_build_name(io_ptr
, "io");
817 lp_build_name(vbuffers_ptr
, "vbuffers");
818 lp_build_name(fetch_elts
, "fetch_elts");
819 lp_build_name(fetch_count
, "fetch_count");
820 lp_build_name(stride
, "stride");
821 lp_build_name(vb_ptr
, "vb");
827 block
= LLVMAppendBasicBlock(variant
->function_elts
, "entry");
828 builder
= LLVMCreateBuilder();
829 LLVMPositionBuilderAtEnd(builder
, block
);
831 lp_build_context_init(&bld
, builder
, vs_type
);
832 lp_build_context_init(&bld_int
, builder
, lp_type_int(32));
834 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
836 fetch_max
= LLVMBuildSub(builder
, fetch_count
,
837 LLVMConstInt(LLVMInt32Type(), 1, 0),
840 lp_build_loop_begin(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop
);
842 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
843 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
845 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
847 io_itr
= lp_loop
.counter
;
848 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
850 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
851 io_itr
, io
, lp_loop
.counter
);
853 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
854 LLVMValueRef true_index
= LLVMBuildAdd(
857 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
858 LLVMValueRef fetch_ptr
;
860 /* make sure we're not out of bounds which can happen
861 * if fetch_count % 4 != 0, because on the last iteration
862 * a few of the 4 vertex fetches will be out of bounds */
863 true_index
= lp_build_min(&bld_int
, true_index
, fetch_max
);
865 fetch_ptr
= LLVMBuildGEP(builder
, fetch_elts
,
867 true_index
= LLVMBuildLoad(builder
, fetch_ptr
, "fetch_elt");
868 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
869 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
870 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
871 velem
->vertex_buffer_index
,
873 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
875 generate_fetch(builder
, vbuffers_ptr
,
876 &aos_attribs
[j
][i
], velem
, vb
, true_index
);
879 convert_to_soa(builder
, aos_attribs
, inputs
,
880 draw
->pt
.nr_vertex_elements
);
882 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
889 convert_to_aos(builder
, io
, outputs
,
890 draw
->vs
.vertex_shader
->info
.num_outputs
,
893 lp_build_loop_end_cond(builder
, fetch_count
, step
, LLVMIntUGE
, &lp_loop
);
895 LLVMBuildRetVoid(builder
);
897 LLVMDisposeBuilder(builder
);
900 * Translate the LLVM IR into machine code.
903 if(LLVMVerifyFunction(variant
->function_elts
, LLVMPrintMessageAction
)) {
904 lp_debug_dump_value(variant
->function_elts
);
909 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function_elts
);
911 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
912 lp_debug_dump_value(variant
->function_elts
);
916 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function_elts
);
917 variant
->jit_func_elts
= (draw_jit_vert_func_elts
)pointer_to_func(code
);
919 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
920 lp_disassemble(code
);
925 draw_llvm_make_variant_key(struct draw_llvm
*llvm
,
926 struct draw_llvm_variant_key
*key
)
928 memset(key
, 0, sizeof(struct draw_llvm_variant_key
));
930 key
->nr_vertex_elements
= llvm
->draw
->pt
.nr_vertex_elements
;
932 memcpy(key
->vertex_element
,
933 llvm
->draw
->pt
.vertex_element
,
934 sizeof(struct pipe_vertex_element
) * key
->nr_vertex_elements
);
937 &llvm
->draw
->vs
.vertex_shader
->state
,
938 sizeof(struct pipe_shader_state
));
942 draw_llvm_destroy_variant(struct draw_llvm_variant
*variant
)
944 struct draw_llvm
*llvm
= variant
->llvm
;
945 struct draw_context
*draw
= llvm
->draw
;
947 if (variant
->function_elts
) {
948 if (variant
->function_elts
)
949 LLVMFreeMachineCodeForFunction(draw
->engine
,
950 variant
->function_elts
);
951 LLVMDeleteFunction(variant
->function_elts
);
954 if (variant
->function
) {
955 if (variant
->function
)
956 LLVMFreeMachineCodeForFunction(draw
->engine
,
958 LLVMDeleteFunction(variant
->function
);
961 remove_from_list(&variant
->list_item_local
);
962 variant
->shader
->variants_cached
--;
963 remove_from_list(&variant
->list_item_global
);