1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "draw_llvm.h"
30 #include "draw_context.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_struct.h"
35 #include "gallivm/lp_bld_type.h"
36 #include "gallivm/lp_bld_flow.h"
37 #include "gallivm/lp_bld_debug.h"
38 #include "gallivm/lp_bld_tgsi.h"
39 #include "gallivm/lp_bld_printf.h"
40 #include "gallivm/lp_bld_intr.h"
41 #include "gallivm/lp_bld_init.h"
43 #include "tgsi/tgsi_exec.h"
44 #include "tgsi/tgsi_dump.h"
46 #include "util/u_cpu_detect.h"
47 #include "util/u_math.h"
48 #include "util/u_pointer.h"
49 #include "util/u_string.h"
51 #include <llvm-c/Transforms/Scalar.h>
55 /* generates the draw jit function */
57 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
59 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
62 init_globals(struct draw_llvm
*llvm
)
64 LLVMTypeRef texture_type
;
66 /* struct draw_jit_texture */
68 LLVMTypeRef elem_types
[DRAW_JIT_TEXTURE_NUM_FIELDS
];
70 elem_types
[DRAW_JIT_TEXTURE_WIDTH
] = LLVMInt32Type();
71 elem_types
[DRAW_JIT_TEXTURE_HEIGHT
] = LLVMInt32Type();
72 elem_types
[DRAW_JIT_TEXTURE_DEPTH
] = LLVMInt32Type();
73 elem_types
[DRAW_JIT_TEXTURE_LAST_LEVEL
] = LLVMInt32Type();
74 elem_types
[DRAW_JIT_TEXTURE_ROW_STRIDE
] =
75 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS
);
76 elem_types
[DRAW_JIT_TEXTURE_IMG_STRIDE
] =
77 LLVMArrayType(LLVMInt32Type(), PIPE_MAX_TEXTURE_LEVELS
);
78 elem_types
[DRAW_JIT_TEXTURE_DATA
] =
79 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
80 PIPE_MAX_TEXTURE_LEVELS
);
81 elem_types
[DRAW_JIT_TEXTURE_MIN_LOD
] = LLVMFloatType();
82 elem_types
[DRAW_JIT_TEXTURE_MAX_LOD
] = LLVMFloatType();
83 elem_types
[DRAW_JIT_TEXTURE_LOD_BIAS
] = LLVMFloatType();
84 elem_types
[DRAW_JIT_TEXTURE_BORDER_COLOR
] =
85 LLVMArrayType(LLVMFloatType(), 4);
87 texture_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
89 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, width
,
90 llvm
->target
, texture_type
,
91 DRAW_JIT_TEXTURE_WIDTH
);
92 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, height
,
93 llvm
->target
, texture_type
,
94 DRAW_JIT_TEXTURE_HEIGHT
);
95 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, depth
,
96 llvm
->target
, texture_type
,
97 DRAW_JIT_TEXTURE_DEPTH
);
98 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, last_level
,
99 llvm
->target
, texture_type
,
100 DRAW_JIT_TEXTURE_LAST_LEVEL
);
101 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, row_stride
,
102 llvm
->target
, texture_type
,
103 DRAW_JIT_TEXTURE_ROW_STRIDE
);
104 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, img_stride
,
105 llvm
->target
, texture_type
,
106 DRAW_JIT_TEXTURE_IMG_STRIDE
);
107 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, data
,
108 llvm
->target
, texture_type
,
109 DRAW_JIT_TEXTURE_DATA
);
110 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, min_lod
,
111 llvm
->target
, texture_type
,
112 DRAW_JIT_TEXTURE_MIN_LOD
);
113 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, max_lod
,
114 llvm
->target
, texture_type
,
115 DRAW_JIT_TEXTURE_MAX_LOD
);
116 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, lod_bias
,
117 llvm
->target
, texture_type
,
118 DRAW_JIT_TEXTURE_LOD_BIAS
);
119 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, border_color
,
120 llvm
->target
, texture_type
,
121 DRAW_JIT_TEXTURE_BORDER_COLOR
);
122 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture
,
123 llvm
->target
, texture_type
);
125 LLVMAddTypeName(llvm
->module
, "texture", texture_type
);
129 /* struct draw_jit_context */
131 LLVMTypeRef elem_types
[3];
132 LLVMTypeRef context_type
;
134 elem_types
[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
135 elem_types
[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
136 elem_types
[2] = LLVMArrayType(texture_type
,
137 PIPE_MAX_VERTEX_SAMPLERS
); /* textures */
139 context_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
141 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, vs_constants
,
142 llvm
->target
, context_type
, 0);
143 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, gs_constants
,
144 llvm
->target
, context_type
, 1);
145 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, textures
,
146 llvm
->target
, context_type
,
147 DRAW_JIT_CTX_TEXTURES
);
148 LP_CHECK_STRUCT_SIZE(struct draw_jit_context
,
149 llvm
->target
, context_type
);
151 LLVMAddTypeName(llvm
->module
, "draw_jit_context", context_type
);
153 llvm
->context_ptr_type
= LLVMPointerType(context_type
, 0);
156 LLVMTypeRef buffer_ptr
= LLVMPointerType(LLVMIntType(8), 0);
157 llvm
->buffer_ptr_type
= LLVMPointerType(buffer_ptr
, 0);
159 /* struct pipe_vertex_buffer */
161 LLVMTypeRef elem_types
[4];
164 elem_types
[0] = LLVMInt32Type();
165 elem_types
[1] = LLVMInt32Type();
166 elem_types
[2] = LLVMInt32Type();
167 elem_types
[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
169 vb_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
171 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, stride
,
172 llvm
->target
, vb_type
, 0);
173 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, buffer_offset
,
174 llvm
->target
, vb_type
, 2);
175 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer
,
176 llvm
->target
, vb_type
);
178 LLVMAddTypeName(llvm
->module
, "pipe_vertex_buffer", vb_type
);
180 llvm
->vb_ptr_type
= LLVMPointerType(vb_type
, 0);
185 create_vertex_header(struct draw_llvm
*llvm
, int data_elems
)
187 /* struct vertex_header */
188 LLVMTypeRef elem_types
[3];
189 LLVMTypeRef vertex_header
;
190 char struct_name
[24];
192 util_snprintf(struct_name
, 23, "vertex_header%d", data_elems
);
194 elem_types
[0] = LLVMIntType(32);
195 elem_types
[1] = LLVMArrayType(LLVMFloatType(), 4);
196 elem_types
[2] = LLVMArrayType(elem_types
[1], data_elems
);
198 vertex_header
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
200 /* these are bit-fields and we can't take address of them
201 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
202 llvm->target, vertex_header,
203 DRAW_JIT_VERTEX_CLIPMASK);
204 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
205 llvm->target, vertex_header,
206 DRAW_JIT_VERTEX_EDGEFLAG);
207 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
208 llvm->target, vertex_header,
209 DRAW_JIT_VERTEX_PAD);
210 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
211 llvm->target, vertex_header,
212 DRAW_JIT_VERTEX_VERTEX_ID);
214 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, clip
,
215 llvm
->target
, vertex_header
,
216 DRAW_JIT_VERTEX_CLIP
);
217 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, data
,
218 llvm
->target
, vertex_header
,
219 DRAW_JIT_VERTEX_DATA
);
221 LLVMAddTypeName(llvm
->module
, struct_name
, vertex_header
);
223 return LLVMPointerType(vertex_header
, 0);
227 draw_llvm_create(struct draw_context
*draw
)
229 struct draw_llvm
*llvm
;
231 llvm
= CALLOC_STRUCT( draw_llvm
);
236 llvm
->engine
= draw
->engine
;
238 debug_assert(llvm
->engine
);
240 llvm
->module
= LLVMModuleCreateWithName("draw_llvm");
241 llvm
->provider
= LLVMCreateModuleProviderForExistingModule(llvm
->module
);
243 LLVMAddModuleProvider(llvm
->engine
, llvm
->provider
);
245 llvm
->target
= LLVMGetExecutionEngineTargetData(llvm
->engine
);
247 llvm
->pass
= LLVMCreateFunctionPassManager(llvm
->provider
);
248 LLVMAddTargetData(llvm
->target
, llvm
->pass
);
250 if ((gallivm_debug
& GALLIVM_DEBUG_NO_OPT
) == 0) {
251 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
252 * but there are more on SVN. */
253 /* TODO: Add more passes */
255 LLVMAddCFGSimplificationPass(llvm
->pass
);
257 if (HAVE_LLVM
>= 0x207 && sizeof(void*) == 4) {
258 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
259 * avoid generating bad code.
260 * Test with piglit glsl-vs-sqrt-zero test.
262 LLVMAddConstantPropagationPass(llvm
->pass
);
263 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
266 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
267 LLVMAddConstantPropagationPass(llvm
->pass
);
270 if(util_cpu_caps
.has_sse4_1
) {
271 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
272 * and sitofp (necessary for trunc/floor/ceil/round implementation)
273 * somehow becomes invalid code.
275 LLVMAddInstructionCombiningPass(llvm
->pass
);
277 LLVMAddGVNPass(llvm
->pass
);
279 /* We need at least this pass to prevent the backends to fail in
282 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
287 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
288 LLVMDumpModule(llvm
->module
);
291 llvm
->nr_variants
= 0;
292 make_empty_list(&llvm
->vs_variants_list
);
298 draw_llvm_destroy(struct draw_llvm
*llvm
)
300 LLVMDisposePassManager(llvm
->pass
);
305 struct draw_llvm_variant
*
306 draw_llvm_create_variant(struct draw_llvm
*llvm
,
308 const struct draw_llvm_variant_key
*key
)
310 struct draw_llvm_variant
*variant
;
311 struct llvm_vertex_shader
*shader
=
312 llvm_vertex_shader(llvm
->draw
->vs
.vertex_shader
);
314 variant
= MALLOC(sizeof *variant
+
315 shader
->variant_key_size
-
316 sizeof variant
->key
);
320 variant
->llvm
= llvm
;
322 memcpy(&variant
->key
, key
, shader
->variant_key_size
);
324 llvm
->vertex_header_ptr_type
= create_vertex_header(llvm
, num_inputs
);
326 draw_llvm_generate(llvm
, variant
);
327 draw_llvm_generate_elts(llvm
, variant
);
329 variant
->shader
= shader
;
330 variant
->list_item_global
.base
= variant
;
331 variant
->list_item_local
.base
= variant
;
332 /*variant->no = */shader
->variants_created
++;
333 variant
->list_item_global
.base
= variant
;
339 generate_vs(struct draw_llvm
*llvm
,
340 LLVMBuilderRef builder
,
341 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
342 const LLVMValueRef (*inputs
)[NUM_CHANNELS
],
343 LLVMValueRef context_ptr
,
344 struct lp_build_sampler_soa
*draw_sampler
)
346 const struct tgsi_token
*tokens
= llvm
->draw
->vs
.vertex_shader
->state
.tokens
;
347 struct lp_type vs_type
;
348 LLVMValueRef consts_ptr
= draw_jit_context_vs_constants(builder
, context_ptr
);
349 struct lp_build_sampler_soa
*sampler
= 0;
351 memset(&vs_type
, 0, sizeof vs_type
);
352 vs_type
.floating
= TRUE
; /* floating point values */
353 vs_type
.sign
= TRUE
; /* values are signed */
354 vs_type
.norm
= FALSE
; /* values are not limited to [0,1] or [-1,1] */
355 vs_type
.width
= 32; /* 32-bit float */
356 vs_type
.length
= 4; /* 4 elements per vector */
358 num_vs
= 4; /* number of vertices per block */
361 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
362 tgsi_dump(tokens
, 0);
365 if (llvm
->draw
->num_sampler_views
&&
366 llvm
->draw
->num_samplers
)
367 sampler
= draw_sampler
;
369 lp_build_tgsi_soa(builder
,
372 NULL
/*struct lp_build_mask_context *mask*/,
378 &llvm
->draw
->vs
.vertex_shader
->info
);
382 static void print_vectorf(LLVMBuilderRef builder
,
386 val
[0] = LLVMBuildExtractElement(builder
, vec
,
387 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
388 val
[1] = LLVMBuildExtractElement(builder
, vec
,
389 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
390 val
[2] = LLVMBuildExtractElement(builder
, vec
,
391 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
392 val
[3] = LLVMBuildExtractElement(builder
, vec
,
393 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
394 lp_build_printf(builder
, "vector = [%f, %f, %f, %f]\n",
395 val
[0], val
[1], val
[2], val
[3]);
400 generate_fetch(LLVMBuilderRef builder
,
401 LLVMValueRef vbuffers_ptr
,
403 struct pipe_vertex_element
*velem
,
406 LLVMValueRef instance_id
)
408 LLVMValueRef indices
= LLVMConstInt(LLVMInt64Type(), velem
->vertex_buffer_index
, 0);
409 LLVMValueRef vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffers_ptr
,
411 LLVMValueRef vb_stride
= draw_jit_vbuffer_stride(builder
, vbuf
);
412 LLVMValueRef vb_max_index
= draw_jit_vbuffer_max_index(builder
, vbuf
);
413 LLVMValueRef vb_buffer_offset
= draw_jit_vbuffer_offset(builder
, vbuf
);
417 if (velem
->instance_divisor
) {
418 /* array index = instance_id / instance_divisor */
419 index
= LLVMBuildUDiv(builder
, instance_id
,
420 LLVMConstInt(LLVMInt32Type(), velem
->instance_divisor
, 0),
424 /* limit index to min(inex, vb_max_index) */
425 cond
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, vb_max_index
, "");
426 index
= LLVMBuildSelect(builder
, cond
, index
, vb_max_index
, "");
428 stride
= LLVMBuildMul(builder
, vb_stride
, index
, "");
430 vbuffer_ptr
= LLVMBuildLoad(builder
, vbuffer_ptr
, "vbuffer");
432 stride
= LLVMBuildAdd(builder
, stride
,
435 stride
= LLVMBuildAdd(builder
, stride
,
436 LLVMConstInt(LLVMInt32Type(), velem
->src_offset
, 0),
439 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
440 vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffer_ptr
, &stride
, 1, "");
442 *res
= draw_llvm_translate_from(builder
, vbuffer_ptr
, velem
->src_format
);
446 aos_to_soa(LLVMBuilderRef builder
,
451 LLVMValueRef channel
)
453 LLVMValueRef ex
, res
;
455 ex
= LLVMBuildExtractElement(builder
, val0
,
457 res
= LLVMBuildInsertElement(builder
,
458 LLVMConstNull(LLVMTypeOf(val0
)),
460 LLVMConstInt(LLVMInt32Type(), 0, 0),
463 ex
= LLVMBuildExtractElement(builder
, val1
,
465 res
= LLVMBuildInsertElement(builder
,
467 LLVMConstInt(LLVMInt32Type(), 1, 0),
470 ex
= LLVMBuildExtractElement(builder
, val2
,
472 res
= LLVMBuildInsertElement(builder
,
474 LLVMConstInt(LLVMInt32Type(), 2, 0),
477 ex
= LLVMBuildExtractElement(builder
, val3
,
479 res
= LLVMBuildInsertElement(builder
,
481 LLVMConstInt(LLVMInt32Type(), 3, 0),
488 soa_to_aos(LLVMBuilderRef builder
,
489 LLVMValueRef soa
[NUM_CHANNELS
],
490 LLVMValueRef aos
[NUM_CHANNELS
])
495 debug_assert(NUM_CHANNELS
== 4);
497 aos
[0] = LLVMConstNull(LLVMTypeOf(soa
[0]));
498 aos
[1] = aos
[2] = aos
[3] = aos
[0];
500 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
501 LLVMValueRef channel
= LLVMConstInt(LLVMInt32Type(), i
, 0);
503 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
504 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
505 aos
[0] = LLVMBuildInsertElement(builder
, aos
[0], comp
, channel
, "");
507 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
508 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
509 aos
[1] = LLVMBuildInsertElement(builder
, aos
[1], comp
, channel
, "");
511 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
512 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
513 aos
[2] = LLVMBuildInsertElement(builder
, aos
[2], comp
, channel
, "");
515 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
516 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
517 aos
[3] = LLVMBuildInsertElement(builder
, aos
[3], comp
, channel
, "");
523 convert_to_soa(LLVMBuilderRef builder
,
524 LLVMValueRef (*aos
)[NUM_CHANNELS
],
525 LLVMValueRef (*soa
)[NUM_CHANNELS
],
530 debug_assert(NUM_CHANNELS
== 4);
532 for (i
= 0; i
< num_attribs
; ++i
) {
533 LLVMValueRef val0
= aos
[i
][0];
534 LLVMValueRef val1
= aos
[i
][1];
535 LLVMValueRef val2
= aos
[i
][2];
536 LLVMValueRef val3
= aos
[i
][3];
538 soa
[i
][0] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
539 LLVMConstInt(LLVMInt32Type(), 0, 0));
540 soa
[i
][1] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
541 LLVMConstInt(LLVMInt32Type(), 1, 0));
542 soa
[i
][2] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
543 LLVMConstInt(LLVMInt32Type(), 2, 0));
544 soa
[i
][3] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
545 LLVMConstInt(LLVMInt32Type(), 3, 0));
550 store_aos(LLVMBuilderRef builder
,
555 LLVMValueRef id_ptr
= draw_jit_header_id(builder
, io_ptr
);
556 LLVMValueRef data_ptr
= draw_jit_header_data(builder
, io_ptr
);
557 LLVMValueRef indices
[3];
559 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
561 indices
[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
563 /* undefined vertex */
564 LLVMBuildStore(builder
, LLVMConstInt(LLVMInt32Type(),
568 lp_build_printf(builder
, " ---- %p storing attribute %d (io = %p)\n", data_ptr
, index
, io_ptr
);
571 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
572 print_vectorf(builder, value);*/
573 data_ptr
= LLVMBuildBitCast(builder
, data_ptr
,
574 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
576 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 2, "");
578 LLVMBuildStore(builder
, value
, data_ptr
);
581 LLVMValueRef x
, y
, z
, w
;
582 LLVMValueRef idx0
, idx1
, idx2
, idx3
;
583 LLVMValueRef gep0
, gep1
, gep2
, gep3
;
584 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 3, "");
586 idx0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
587 idx1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
588 idx2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
589 idx3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
591 x
= LLVMBuildExtractElement(builder
, value
,
593 y
= LLVMBuildExtractElement(builder
, value
,
595 z
= LLVMBuildExtractElement(builder
, value
,
597 w
= LLVMBuildExtractElement(builder
, value
,
600 gep0
= LLVMBuildGEP(builder
, data_ptr
, &idx0
, 1, "");
601 gep1
= LLVMBuildGEP(builder
, data_ptr
, &idx1
, 1, "");
602 gep2
= LLVMBuildGEP(builder
, data_ptr
, &idx2
, 1, "");
603 gep3
= LLVMBuildGEP(builder
, data_ptr
, &idx3
, 1, "");
605 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
606 x, gep0, y, gep1, z, gep2, w, gep3);*/
607 LLVMBuildStore(builder
, x
, gep0
);
608 LLVMBuildStore(builder
, y
, gep1
);
609 LLVMBuildStore(builder
, z
, gep2
);
610 LLVMBuildStore(builder
, w
, gep3
);
616 store_aos_array(LLVMBuilderRef builder
,
618 LLVMValueRef aos
[NUM_CHANNELS
],
622 LLVMValueRef attr_index
= LLVMConstInt(LLVMInt32Type(), attrib
, 0);
623 LLVMValueRef ind0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
624 LLVMValueRef ind1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
625 LLVMValueRef ind2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
626 LLVMValueRef ind3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
627 LLVMValueRef io0_ptr
, io1_ptr
, io2_ptr
, io3_ptr
;
629 debug_assert(NUM_CHANNELS
== 4);
631 io0_ptr
= LLVMBuildGEP(builder
, io_ptr
,
633 io1_ptr
= LLVMBuildGEP(builder
, io_ptr
,
635 io2_ptr
= LLVMBuildGEP(builder
, io_ptr
,
637 io3_ptr
= LLVMBuildGEP(builder
, io_ptr
,
641 lp_build_printf(builder
, " io = %p, indexes[%d, %d, %d, %d]\n",
642 io_ptr
, ind0
, ind1
, ind2
, ind3
);
645 store_aos(builder
, io0_ptr
, attr_index
, aos
[0]);
646 store_aos(builder
, io1_ptr
, attr_index
, aos
[1]);
647 store_aos(builder
, io2_ptr
, attr_index
, aos
[2]);
648 store_aos(builder
, io3_ptr
, attr_index
, aos
[3]);
652 convert_to_aos(LLVMBuilderRef builder
,
654 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
658 unsigned chan
, attrib
;
661 lp_build_printf(builder
, " # storing begin\n");
663 for (attrib
= 0; attrib
< num_outputs
; ++attrib
) {
666 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
667 if(outputs
[attrib
][chan
]) {
668 LLVMValueRef out
= LLVMBuildLoad(builder
, outputs
[attrib
][chan
], "");
669 lp_build_name(out
, "output%u.%c", attrib
, "xyzw"[chan
]);
670 /*lp_build_printf(builder, "output %d : %d ",
671 LLVMConstInt(LLVMInt32Type(), attrib, 0),
672 LLVMConstInt(LLVMInt32Type(), chan, 0));
673 print_vectorf(builder, out);*/
678 soa_to_aos(builder
, soa
, aos
);
679 store_aos_array(builder
,
686 lp_build_printf(builder
, " # storing end\n");
691 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
693 LLVMTypeRef arg_types
[8];
694 LLVMTypeRef func_type
;
695 LLVMValueRef context_ptr
;
696 LLVMBasicBlockRef block
;
697 LLVMBuilderRef builder
;
698 LLVMValueRef start
, end
, count
, stride
, step
, io_itr
;
699 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
700 LLVMValueRef instance_id
;
701 struct draw_context
*draw
= llvm
->draw
;
703 struct lp_build_context bld
;
704 struct lp_build_loop_state lp_loop
;
705 const int max_vertices
= 4;
706 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
708 struct lp_build_sampler_soa
*sampler
= 0;
710 arg_types
[0] = llvm
->context_ptr_type
; /* context */
711 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
712 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
713 arg_types
[3] = LLVMInt32Type(); /* start */
714 arg_types
[4] = LLVMInt32Type(); /* count */
715 arg_types
[5] = LLVMInt32Type(); /* stride */
716 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
717 arg_types
[7] = LLVMInt32Type(); /* instance_id */
719 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
721 variant
->function
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader", func_type
);
722 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
723 for(i
= 0; i
< Elements(arg_types
); ++i
)
724 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
725 LLVMAddAttribute(LLVMGetParam(variant
->function
, i
), LLVMNoAliasAttribute
);
727 context_ptr
= LLVMGetParam(variant
->function
, 0);
728 io_ptr
= LLVMGetParam(variant
->function
, 1);
729 vbuffers_ptr
= LLVMGetParam(variant
->function
, 2);
730 start
= LLVMGetParam(variant
->function
, 3);
731 count
= LLVMGetParam(variant
->function
, 4);
732 stride
= LLVMGetParam(variant
->function
, 5);
733 vb_ptr
= LLVMGetParam(variant
->function
, 6);
734 instance_id
= LLVMGetParam(variant
->function
, 7);
736 lp_build_name(context_ptr
, "context");
737 lp_build_name(io_ptr
, "io");
738 lp_build_name(vbuffers_ptr
, "vbuffers");
739 lp_build_name(start
, "start");
740 lp_build_name(count
, "count");
741 lp_build_name(stride
, "stride");
742 lp_build_name(vb_ptr
, "vb");
743 lp_build_name(instance_id
, "instance_id");
749 block
= LLVMAppendBasicBlock(variant
->function
, "entry");
750 builder
= LLVMCreateBuilder();
751 LLVMPositionBuilderAtEnd(builder
, block
);
753 lp_build_context_init(&bld
, builder
, lp_type_int(32));
755 end
= lp_build_add(&bld
, start
, count
);
757 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
759 /* code generated texture sampling */
760 sampler
= draw_llvm_sampler_soa_create(
761 draw_llvm_variant_key_samplers(&variant
->key
),
765 lp_build_printf(builder
, "start = %d, end = %d, step = %d\n",
768 lp_build_loop_begin(builder
, start
, &lp_loop
);
770 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
771 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
773 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
775 io_itr
= LLVMBuildSub(builder
, lp_loop
.counter
, start
, "");
776 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
778 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
779 io_itr
, io
, lp_loop
.counter
);
781 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
782 LLVMValueRef true_index
= LLVMBuildAdd(
785 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
786 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
787 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
788 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
789 velem
->vertex_buffer_index
,
791 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
793 generate_fetch(builder
, vbuffers_ptr
,
794 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
798 convert_to_soa(builder
, aos_attribs
, inputs
,
799 draw
->pt
.nr_vertex_elements
);
801 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
809 convert_to_aos(builder
, io
, outputs
,
810 draw
->vs
.vertex_shader
->info
.num_outputs
,
813 lp_build_loop_end_cond(builder
, end
, step
, LLVMIntUGE
, &lp_loop
);
815 sampler
->destroy(sampler
);
818 /* Avoid corrupting the FPU stack on 32bit OSes. */
819 lp_build_intrinsic(builder
, "llvm.x86.mmx.emms", LLVMVoidType(), NULL
, 0);
822 LLVMBuildRetVoid(builder
);
824 LLVMDisposeBuilder(builder
);
827 * Translate the LLVM IR into machine code.
830 if(LLVMVerifyFunction(variant
->function
, LLVMPrintMessageAction
)) {
831 lp_debug_dump_value(variant
->function
);
836 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function
);
838 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
839 lp_debug_dump_value(variant
->function
);
843 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function
);
844 variant
->jit_func
= (draw_jit_vert_func
)pointer_to_func(code
);
846 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
847 lp_disassemble(code
);
849 lp_func_delete_body(variant
->function
);
854 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
856 LLVMTypeRef arg_types
[8];
857 LLVMTypeRef func_type
;
858 LLVMValueRef context_ptr
;
859 LLVMBasicBlockRef block
;
860 LLVMBuilderRef builder
;
861 LLVMValueRef fetch_elts
, fetch_count
, stride
, step
, io_itr
;
862 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
863 LLVMValueRef instance_id
;
864 struct draw_context
*draw
= llvm
->draw
;
866 struct lp_build_context bld
;
867 struct lp_build_loop_state lp_loop
;
868 const int max_vertices
= 4;
869 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
870 LLVMValueRef fetch_max
;
872 struct lp_build_sampler_soa
*sampler
= 0;
874 arg_types
[0] = llvm
->context_ptr_type
; /* context */
875 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
876 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
877 arg_types
[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
878 arg_types
[4] = LLVMInt32Type(); /* fetch_count */
879 arg_types
[5] = LLVMInt32Type(); /* stride */
880 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
881 arg_types
[7] = LLVMInt32Type(); /* instance_id */
883 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
885 variant
->function_elts
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader_elts",
887 LLVMSetFunctionCallConv(variant
->function_elts
, LLVMCCallConv
);
888 for(i
= 0; i
< Elements(arg_types
); ++i
)
889 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
890 LLVMAddAttribute(LLVMGetParam(variant
->function_elts
, i
),
891 LLVMNoAliasAttribute
);
893 context_ptr
= LLVMGetParam(variant
->function_elts
, 0);
894 io_ptr
= LLVMGetParam(variant
->function_elts
, 1);
895 vbuffers_ptr
= LLVMGetParam(variant
->function_elts
, 2);
896 fetch_elts
= LLVMGetParam(variant
->function_elts
, 3);
897 fetch_count
= LLVMGetParam(variant
->function_elts
, 4);
898 stride
= LLVMGetParam(variant
->function_elts
, 5);
899 vb_ptr
= LLVMGetParam(variant
->function_elts
, 6);
900 instance_id
= LLVMGetParam(variant
->function_elts
, 7);
902 lp_build_name(context_ptr
, "context");
903 lp_build_name(io_ptr
, "io");
904 lp_build_name(vbuffers_ptr
, "vbuffers");
905 lp_build_name(fetch_elts
, "fetch_elts");
906 lp_build_name(fetch_count
, "fetch_count");
907 lp_build_name(stride
, "stride");
908 lp_build_name(vb_ptr
, "vb");
909 lp_build_name(instance_id
, "instance_id");
915 block
= LLVMAppendBasicBlock(variant
->function_elts
, "entry");
916 builder
= LLVMCreateBuilder();
917 LLVMPositionBuilderAtEnd(builder
, block
);
919 lp_build_context_init(&bld
, builder
, lp_type_int(32));
921 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
923 /* code generated texture sampling */
924 sampler
= draw_llvm_sampler_soa_create(
925 draw_llvm_variant_key_samplers(&variant
->key
),
928 fetch_max
= LLVMBuildSub(builder
, fetch_count
,
929 LLVMConstInt(LLVMInt32Type(), 1, 0),
932 lp_build_loop_begin(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop
);
934 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
935 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
937 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
939 io_itr
= lp_loop
.counter
;
940 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
942 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
943 io_itr
, io
, lp_loop
.counter
);
945 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
946 LLVMValueRef true_index
= LLVMBuildAdd(
949 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
950 LLVMValueRef fetch_ptr
;
952 /* make sure we're not out of bounds which can happen
953 * if fetch_count % 4 != 0, because on the last iteration
954 * a few of the 4 vertex fetches will be out of bounds */
955 true_index
= lp_build_min(&bld
, true_index
, fetch_max
);
957 fetch_ptr
= LLVMBuildGEP(builder
, fetch_elts
,
959 true_index
= LLVMBuildLoad(builder
, fetch_ptr
, "fetch_elt");
960 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
961 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
962 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
963 velem
->vertex_buffer_index
,
965 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
967 generate_fetch(builder
, vbuffers_ptr
,
968 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
972 convert_to_soa(builder
, aos_attribs
, inputs
,
973 draw
->pt
.nr_vertex_elements
);
975 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
983 convert_to_aos(builder
, io
, outputs
,
984 draw
->vs
.vertex_shader
->info
.num_outputs
,
987 lp_build_loop_end_cond(builder
, fetch_count
, step
, LLVMIntUGE
, &lp_loop
);
989 sampler
->destroy(sampler
);
992 /* Avoid corrupting the FPU stack on 32bit OSes. */
993 lp_build_intrinsic(builder
, "llvm.x86.mmx.emms", LLVMVoidType(), NULL
, 0);
996 LLVMBuildRetVoid(builder
);
998 LLVMDisposeBuilder(builder
);
1001 * Translate the LLVM IR into machine code.
1004 if(LLVMVerifyFunction(variant
->function_elts
, LLVMPrintMessageAction
)) {
1005 lp_debug_dump_value(variant
->function_elts
);
1010 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function_elts
);
1012 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
1013 lp_debug_dump_value(variant
->function_elts
);
1017 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function_elts
);
1018 variant
->jit_func_elts
= (draw_jit_vert_func_elts
)pointer_to_func(code
);
1020 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
1021 lp_disassemble(code
);
1023 lp_func_delete_body(variant
->function_elts
);
1027 struct draw_llvm_variant_key
*
1028 draw_llvm_make_variant_key(struct draw_llvm
*llvm
, char *store
)
1031 struct draw_llvm_variant_key
*key
;
1032 struct lp_sampler_static_state
*sampler
;
1034 key
= (struct draw_llvm_variant_key
*)store
;
1036 /* Presumably all variants of the shader should have the same
1037 * number of vertex elements - ie the number of shader inputs.
1039 key
->nr_vertex_elements
= llvm
->draw
->pt
.nr_vertex_elements
;
1041 /* All variants of this shader will have the same value for
1042 * nr_samplers. Not yet trying to compact away holes in the
1045 key
->nr_samplers
= llvm
->draw
->vs
.vertex_shader
->info
.file_max
[TGSI_FILE_SAMPLER
] + 1;
1047 sampler
= draw_llvm_variant_key_samplers(key
);
1049 memcpy(key
->vertex_element
,
1050 llvm
->draw
->pt
.vertex_element
,
1051 sizeof(struct pipe_vertex_element
) * key
->nr_vertex_elements
);
1053 memset(sampler
, 0, key
->nr_samplers
* sizeof *sampler
);
1055 for (i
= 0 ; i
< key
->nr_samplers
; i
++) {
1056 lp_sampler_static_state(&sampler
[i
],
1057 llvm
->draw
->sampler_views
[i
],
1058 llvm
->draw
->samplers
[i
]);
1065 draw_llvm_set_mapped_texture(struct draw_context
*draw
,
1066 unsigned sampler_idx
,
1067 uint32_t width
, uint32_t height
, uint32_t depth
,
1068 uint32_t last_level
,
1069 uint32_t row_stride
[PIPE_MAX_TEXTURE_LEVELS
],
1070 uint32_t img_stride
[PIPE_MAX_TEXTURE_LEVELS
],
1071 const void *data
[PIPE_MAX_TEXTURE_LEVELS
])
1074 struct draw_jit_texture
*jit_tex
;
1076 assert(sampler_idx
< PIPE_MAX_VERTEX_SAMPLERS
);
1079 jit_tex
= &draw
->llvm
->jit_context
.textures
[sampler_idx
];
1081 jit_tex
->width
= width
;
1082 jit_tex
->height
= height
;
1083 jit_tex
->depth
= depth
;
1084 jit_tex
->last_level
= last_level
;
1086 for (j
= 0; j
<= last_level
; j
++) {
1087 jit_tex
->data
[j
] = data
[j
];
1088 jit_tex
->row_stride
[j
] = row_stride
[j
];
1089 jit_tex
->img_stride
[j
] = img_stride
[j
];
1095 draw_llvm_set_sampler_state(struct draw_context
*draw
)
1099 for (i
= 0; i
< draw
->num_samplers
; i
++) {
1100 struct draw_jit_texture
*jit_tex
= &draw
->llvm
->jit_context
.textures
[i
];
1102 if (draw
->samplers
[i
]) {
1103 jit_tex
->min_lod
= draw
->samplers
[i
]->min_lod
;
1104 jit_tex
->max_lod
= draw
->samplers
[i
]->max_lod
;
1105 jit_tex
->lod_bias
= draw
->samplers
[i
]->lod_bias
;
1106 COPY_4V(jit_tex
->border_color
, draw
->samplers
[i
]->border_color
);
1113 draw_llvm_destroy_variant(struct draw_llvm_variant
*variant
)
1115 struct draw_llvm
*llvm
= variant
->llvm
;
1116 struct draw_context
*draw
= llvm
->draw
;
1118 if (variant
->function_elts
) {
1119 if (variant
->function_elts
)
1120 LLVMFreeMachineCodeForFunction(draw
->engine
,
1121 variant
->function_elts
);
1122 LLVMDeleteFunction(variant
->function_elts
);
1125 if (variant
->function
) {
1126 if (variant
->function
)
1127 LLVMFreeMachineCodeForFunction(draw
->engine
,
1129 LLVMDeleteFunction(variant
->function
);
1132 remove_from_list(&variant
->list_item_local
);
1133 variant
->shader
->variants_cached
--;
1134 remove_from_list(&variant
->list_item_global
);
1135 llvm
->nr_variants
--;