1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "draw_llvm.h"
30 #include "draw_context.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_struct.h"
35 #include "gallivm/lp_bld_type.h"
36 #include "gallivm/lp_bld_flow.h"
37 #include "gallivm/lp_bld_debug.h"
38 #include "gallivm/lp_bld_tgsi.h"
39 #include "gallivm/lp_bld_printf.h"
41 #include "tgsi/tgsi_exec.h"
42 #include "tgsi/tgsi_dump.h"
44 #include "util/u_cpu_detect.h"
45 #include "util/u_pointer.h"
46 #include "util/u_string.h"
48 #include <llvm-c/Transforms/Scalar.h>
52 /* generates the draw jit function */
54 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
56 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
59 init_globals(struct draw_llvm
*llvm
)
61 LLVMTypeRef texture_type
;
63 /* struct draw_jit_texture */
65 LLVMTypeRef elem_types
[DRAW_JIT_TEXTURE_NUM_FIELDS
];
67 elem_types
[DRAW_JIT_TEXTURE_WIDTH
] = LLVMInt32Type();
68 elem_types
[DRAW_JIT_TEXTURE_HEIGHT
] = LLVMInt32Type();
69 elem_types
[DRAW_JIT_TEXTURE_DEPTH
] = LLVMInt32Type();
70 elem_types
[DRAW_JIT_TEXTURE_LAST_LEVEL
] = LLVMInt32Type();
71 elem_types
[DRAW_JIT_TEXTURE_ROW_STRIDE
] =
72 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS
);
73 elem_types
[DRAW_JIT_TEXTURE_IMG_STRIDE
] =
74 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS
);
75 elem_types
[DRAW_JIT_TEXTURE_DATA
] =
76 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
77 DRAW_MAX_TEXTURE_LEVELS
);
79 texture_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
81 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, width
,
82 llvm
->target
, texture_type
,
83 DRAW_JIT_TEXTURE_WIDTH
);
84 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, height
,
85 llvm
->target
, texture_type
,
86 DRAW_JIT_TEXTURE_HEIGHT
);
87 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, depth
,
88 llvm
->target
, texture_type
,
89 DRAW_JIT_TEXTURE_DEPTH
);
90 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, last_level
,
91 llvm
->target
, texture_type
,
92 DRAW_JIT_TEXTURE_LAST_LEVEL
);
93 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, row_stride
,
94 llvm
->target
, texture_type
,
95 DRAW_JIT_TEXTURE_ROW_STRIDE
);
96 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, img_stride
,
97 llvm
->target
, texture_type
,
98 DRAW_JIT_TEXTURE_IMG_STRIDE
);
99 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, data
,
100 llvm
->target
, texture_type
,
101 DRAW_JIT_TEXTURE_DATA
);
102 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture
,
103 llvm
->target
, texture_type
);
105 LLVMAddTypeName(llvm
->module
, "texture", texture_type
);
109 /* struct draw_jit_context */
111 LLVMTypeRef elem_types
[3];
112 LLVMTypeRef context_type
;
114 elem_types
[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
115 elem_types
[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
116 elem_types
[2] = LLVMArrayType(texture_type
,
117 PIPE_MAX_VERTEX_SAMPLERS
); /* textures */
119 context_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, vs_constants
,
122 llvm
->target
, context_type
, 0);
123 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, gs_constants
,
124 llvm
->target
, context_type
, 1);
125 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, textures
,
126 llvm
->target
, context_type
,
127 DRAW_JIT_CTX_TEXTURES
);
128 LP_CHECK_STRUCT_SIZE(struct draw_jit_context
,
129 llvm
->target
, context_type
);
131 LLVMAddTypeName(llvm
->module
, "draw_jit_context", context_type
);
133 llvm
->context_ptr_type
= LLVMPointerType(context_type
, 0);
136 LLVMTypeRef buffer_ptr
= LLVMPointerType(LLVMIntType(8), 0);
137 llvm
->buffer_ptr_type
= LLVMPointerType(buffer_ptr
, 0);
139 /* struct pipe_vertex_buffer */
141 LLVMTypeRef elem_types
[4];
144 elem_types
[0] = LLVMInt32Type();
145 elem_types
[1] = LLVMInt32Type();
146 elem_types
[2] = LLVMInt32Type();
147 elem_types
[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
149 vb_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
151 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, stride
,
152 llvm
->target
, vb_type
, 0);
153 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, buffer_offset
,
154 llvm
->target
, vb_type
, 2);
155 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer
,
156 llvm
->target
, vb_type
);
158 LLVMAddTypeName(llvm
->module
, "pipe_vertex_buffer", vb_type
);
160 llvm
->vb_ptr_type
= LLVMPointerType(vb_type
, 0);
165 create_vertex_header(struct draw_llvm
*llvm
, int data_elems
)
167 /* struct vertex_header */
168 LLVMTypeRef elem_types
[3];
169 LLVMTypeRef vertex_header
;
170 char struct_name
[24];
172 util_snprintf(struct_name
, 23, "vertex_header%d", data_elems
);
174 elem_types
[0] = LLVMIntType(32);
175 elem_types
[1] = LLVMArrayType(LLVMFloatType(), 4);
176 elem_types
[2] = LLVMArrayType(elem_types
[1], data_elems
);
178 vertex_header
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
180 /* these are bit-fields and we can't take address of them
181 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
182 llvm->target, vertex_header,
183 DRAW_JIT_VERTEX_CLIPMASK);
184 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
185 llvm->target, vertex_header,
186 DRAW_JIT_VERTEX_EDGEFLAG);
187 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
188 llvm->target, vertex_header,
189 DRAW_JIT_VERTEX_PAD);
190 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
191 llvm->target, vertex_header,
192 DRAW_JIT_VERTEX_VERTEX_ID);
194 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, clip
,
195 llvm
->target
, vertex_header
,
196 DRAW_JIT_VERTEX_CLIP
);
197 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, data
,
198 llvm
->target
, vertex_header
,
199 DRAW_JIT_VERTEX_DATA
);
201 LLVMAddTypeName(llvm
->module
, struct_name
, vertex_header
);
203 return LLVMPointerType(vertex_header
, 0);
207 draw_llvm_create(struct draw_context
*draw
)
209 struct draw_llvm
*llvm
;
213 /* require SSE2 due to LLVM PR6960. */
214 if (!util_cpu_caps
.has_sse2
)
218 llvm
= CALLOC_STRUCT( draw_llvm
);
223 llvm
->engine
= draw
->engine
;
225 debug_assert(llvm
->engine
);
227 llvm
->module
= LLVMModuleCreateWithName("draw_llvm");
228 llvm
->provider
= LLVMCreateModuleProviderForExistingModule(llvm
->module
);
230 LLVMAddModuleProvider(llvm
->engine
, llvm
->provider
);
232 llvm
->target
= LLVMGetExecutionEngineTargetData(llvm
->engine
);
234 llvm
->pass
= LLVMCreateFunctionPassManager(llvm
->provider
);
235 LLVMAddTargetData(llvm
->target
, llvm
->pass
);
237 if ((gallivm_debug
& GALLIVM_DEBUG_NO_OPT
) == 0) {
238 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
239 * but there are more on SVN. */
240 /* TODO: Add more passes */
242 LLVMAddCFGSimplificationPass(llvm
->pass
);
244 if (HAVE_LLVM
>= 0x207 && sizeof(void*) == 4) {
245 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
246 * avoid generating bad code.
247 * Test with piglit glsl-vs-sqrt-zero test.
249 LLVMAddConstantPropagationPass(llvm
->pass
);
250 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
253 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
254 LLVMAddConstantPropagationPass(llvm
->pass
);
257 if(util_cpu_caps
.has_sse4_1
) {
258 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
259 * and sitofp (necessary for trunc/floor/ceil/round implementation)
260 * somehow becomes invalid code.
262 LLVMAddInstructionCombiningPass(llvm
->pass
);
264 LLVMAddGVNPass(llvm
->pass
);
266 /* We need at least this pass to prevent the backends to fail in
269 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
274 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
275 LLVMDumpModule(llvm
->module
);
278 llvm
->nr_variants
= 0;
279 make_empty_list(&llvm
->vs_variants_list
);
285 draw_llvm_destroy(struct draw_llvm
*llvm
)
287 LLVMDisposePassManager(llvm
->pass
);
292 struct draw_llvm_variant
*
293 draw_llvm_create_variant(struct draw_llvm
*llvm
, int num_inputs
)
295 struct draw_llvm_variant
*variant
= MALLOC(sizeof(struct draw_llvm_variant
));
296 struct llvm_vertex_shader
*shader
=
297 llvm_vertex_shader(llvm
->draw
->vs
.vertex_shader
);
299 variant
->llvm
= llvm
;
301 draw_llvm_make_variant_key(llvm
, &variant
->key
);
303 llvm
->vertex_header_ptr_type
= create_vertex_header(llvm
, num_inputs
);
305 draw_llvm_generate(llvm
, variant
);
306 draw_llvm_generate_elts(llvm
, variant
);
308 variant
->shader
= shader
;
309 variant
->list_item_global
.base
= variant
;
310 variant
->list_item_local
.base
= variant
;
311 /*variant->no = */shader
->variants_created
++;
312 variant
->list_item_global
.base
= variant
;
318 generate_vs(struct draw_llvm
*llvm
,
319 LLVMBuilderRef builder
,
320 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
321 const LLVMValueRef (*inputs
)[NUM_CHANNELS
],
322 LLVMValueRef context_ptr
,
323 struct lp_build_sampler_soa
*draw_sampler
)
325 const struct tgsi_token
*tokens
= llvm
->draw
->vs
.vertex_shader
->state
.tokens
;
326 struct lp_type vs_type
;
327 LLVMValueRef consts_ptr
= draw_jit_context_vs_constants(builder
, context_ptr
);
328 struct lp_build_sampler_soa
*sampler
= 0;
330 memset(&vs_type
, 0, sizeof vs_type
);
331 vs_type
.floating
= TRUE
; /* floating point values */
332 vs_type
.sign
= TRUE
; /* values are signed */
333 vs_type
.norm
= FALSE
; /* values are not limited to [0,1] or [-1,1] */
334 vs_type
.width
= 32; /* 32-bit float */
335 vs_type
.length
= 4; /* 4 elements per vector */
337 num_vs
= 4; /* number of vertices per block */
340 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
341 tgsi_dump(tokens
, 0);
344 if (llvm
->draw
->num_sampler_views
&&
345 llvm
->draw
->num_samplers
)
346 sampler
= draw_sampler
;
348 lp_build_tgsi_soa(builder
,
351 NULL
/*struct lp_build_mask_context *mask*/,
357 &llvm
->draw
->vs
.vertex_shader
->info
);
361 static void print_vectorf(LLVMBuilderRef builder
,
365 val
[0] = LLVMBuildExtractElement(builder
, vec
,
366 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
367 val
[1] = LLVMBuildExtractElement(builder
, vec
,
368 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
369 val
[2] = LLVMBuildExtractElement(builder
, vec
,
370 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
371 val
[3] = LLVMBuildExtractElement(builder
, vec
,
372 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
373 lp_build_printf(builder
, "vector = [%f, %f, %f, %f]\n",
374 val
[0], val
[1], val
[2], val
[3]);
379 generate_fetch(LLVMBuilderRef builder
,
380 LLVMValueRef vbuffers_ptr
,
382 struct pipe_vertex_element
*velem
,
385 LLVMValueRef instance_id
)
387 LLVMValueRef indices
= LLVMConstInt(LLVMInt64Type(), velem
->vertex_buffer_index
, 0);
388 LLVMValueRef vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffers_ptr
,
390 LLVMValueRef vb_stride
= draw_jit_vbuffer_stride(builder
, vbuf
);
391 LLVMValueRef vb_max_index
= draw_jit_vbuffer_max_index(builder
, vbuf
);
392 LLVMValueRef vb_buffer_offset
= draw_jit_vbuffer_offset(builder
, vbuf
);
396 if (velem
->instance_divisor
) {
397 /* array index = instance_id / instance_divisor */
398 index
= LLVMBuildUDiv(builder
, instance_id
,
399 LLVMConstInt(LLVMInt32Type(), velem
->instance_divisor
, 0),
403 /* limit index to min(inex, vb_max_index) */
404 cond
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, vb_max_index
, "");
405 index
= LLVMBuildSelect(builder
, cond
, index
, vb_max_index
, "");
407 stride
= LLVMBuildMul(builder
, vb_stride
, index
, "");
409 vbuffer_ptr
= LLVMBuildLoad(builder
, vbuffer_ptr
, "vbuffer");
411 stride
= LLVMBuildAdd(builder
, stride
,
414 stride
= LLVMBuildAdd(builder
, stride
,
415 LLVMConstInt(LLVMInt32Type(), velem
->src_offset
, 0),
418 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
419 vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffer_ptr
, &stride
, 1, "");
421 *res
= draw_llvm_translate_from(builder
, vbuffer_ptr
, velem
->src_format
);
425 aos_to_soa(LLVMBuilderRef builder
,
430 LLVMValueRef channel
)
432 LLVMValueRef ex
, res
;
434 ex
= LLVMBuildExtractElement(builder
, val0
,
436 res
= LLVMBuildInsertElement(builder
,
437 LLVMConstNull(LLVMTypeOf(val0
)),
439 LLVMConstInt(LLVMInt32Type(), 0, 0),
442 ex
= LLVMBuildExtractElement(builder
, val1
,
444 res
= LLVMBuildInsertElement(builder
,
446 LLVMConstInt(LLVMInt32Type(), 1, 0),
449 ex
= LLVMBuildExtractElement(builder
, val2
,
451 res
= LLVMBuildInsertElement(builder
,
453 LLVMConstInt(LLVMInt32Type(), 2, 0),
456 ex
= LLVMBuildExtractElement(builder
, val3
,
458 res
= LLVMBuildInsertElement(builder
,
460 LLVMConstInt(LLVMInt32Type(), 3, 0),
467 soa_to_aos(LLVMBuilderRef builder
,
468 LLVMValueRef soa
[NUM_CHANNELS
],
469 LLVMValueRef aos
[NUM_CHANNELS
])
474 debug_assert(NUM_CHANNELS
== 4);
476 aos
[0] = LLVMConstNull(LLVMTypeOf(soa
[0]));
477 aos
[1] = aos
[2] = aos
[3] = aos
[0];
479 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
480 LLVMValueRef channel
= LLVMConstInt(LLVMInt32Type(), i
, 0);
482 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
483 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
484 aos
[0] = LLVMBuildInsertElement(builder
, aos
[0], comp
, channel
, "");
486 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
487 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
488 aos
[1] = LLVMBuildInsertElement(builder
, aos
[1], comp
, channel
, "");
490 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
491 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
492 aos
[2] = LLVMBuildInsertElement(builder
, aos
[2], comp
, channel
, "");
494 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
495 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
496 aos
[3] = LLVMBuildInsertElement(builder
, aos
[3], comp
, channel
, "");
502 convert_to_soa(LLVMBuilderRef builder
,
503 LLVMValueRef (*aos
)[NUM_CHANNELS
],
504 LLVMValueRef (*soa
)[NUM_CHANNELS
],
509 debug_assert(NUM_CHANNELS
== 4);
511 for (i
= 0; i
< num_attribs
; ++i
) {
512 LLVMValueRef val0
= aos
[i
][0];
513 LLVMValueRef val1
= aos
[i
][1];
514 LLVMValueRef val2
= aos
[i
][2];
515 LLVMValueRef val3
= aos
[i
][3];
517 soa
[i
][0] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
518 LLVMConstInt(LLVMInt32Type(), 0, 0));
519 soa
[i
][1] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
520 LLVMConstInt(LLVMInt32Type(), 1, 0));
521 soa
[i
][2] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
522 LLVMConstInt(LLVMInt32Type(), 2, 0));
523 soa
[i
][3] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
524 LLVMConstInt(LLVMInt32Type(), 3, 0));
529 store_aos(LLVMBuilderRef builder
,
534 LLVMValueRef id_ptr
= draw_jit_header_id(builder
, io_ptr
);
535 LLVMValueRef data_ptr
= draw_jit_header_data(builder
, io_ptr
);
536 LLVMValueRef indices
[3];
538 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
540 indices
[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
542 /* undefined vertex */
543 LLVMBuildStore(builder
, LLVMConstInt(LLVMInt32Type(),
547 lp_build_printf(builder
, " ---- %p storing attribute %d (io = %p)\n", data_ptr
, index
, io_ptr
);
550 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
551 print_vectorf(builder, value);*/
552 data_ptr
= LLVMBuildBitCast(builder
, data_ptr
,
553 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
555 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 2, "");
557 LLVMBuildStore(builder
, value
, data_ptr
);
560 LLVMValueRef x
, y
, z
, w
;
561 LLVMValueRef idx0
, idx1
, idx2
, idx3
;
562 LLVMValueRef gep0
, gep1
, gep2
, gep3
;
563 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 3, "");
565 idx0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
566 idx1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
567 idx2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
568 idx3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
570 x
= LLVMBuildExtractElement(builder
, value
,
572 y
= LLVMBuildExtractElement(builder
, value
,
574 z
= LLVMBuildExtractElement(builder
, value
,
576 w
= LLVMBuildExtractElement(builder
, value
,
579 gep0
= LLVMBuildGEP(builder
, data_ptr
, &idx0
, 1, "");
580 gep1
= LLVMBuildGEP(builder
, data_ptr
, &idx1
, 1, "");
581 gep2
= LLVMBuildGEP(builder
, data_ptr
, &idx2
, 1, "");
582 gep3
= LLVMBuildGEP(builder
, data_ptr
, &idx3
, 1, "");
584 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
585 x, gep0, y, gep1, z, gep2, w, gep3);*/
586 LLVMBuildStore(builder
, x
, gep0
);
587 LLVMBuildStore(builder
, y
, gep1
);
588 LLVMBuildStore(builder
, z
, gep2
);
589 LLVMBuildStore(builder
, w
, gep3
);
595 store_aos_array(LLVMBuilderRef builder
,
597 LLVMValueRef aos
[NUM_CHANNELS
],
601 LLVMValueRef attr_index
= LLVMConstInt(LLVMInt32Type(), attrib
, 0);
602 LLVMValueRef ind0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
603 LLVMValueRef ind1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
604 LLVMValueRef ind2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
605 LLVMValueRef ind3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
606 LLVMValueRef io0_ptr
, io1_ptr
, io2_ptr
, io3_ptr
;
608 debug_assert(NUM_CHANNELS
== 4);
610 io0_ptr
= LLVMBuildGEP(builder
, io_ptr
,
612 io1_ptr
= LLVMBuildGEP(builder
, io_ptr
,
614 io2_ptr
= LLVMBuildGEP(builder
, io_ptr
,
616 io3_ptr
= LLVMBuildGEP(builder
, io_ptr
,
620 lp_build_printf(builder
, " io = %p, indexes[%d, %d, %d, %d]\n",
621 io_ptr
, ind0
, ind1
, ind2
, ind3
);
624 store_aos(builder
, io0_ptr
, attr_index
, aos
[0]);
625 store_aos(builder
, io1_ptr
, attr_index
, aos
[1]);
626 store_aos(builder
, io2_ptr
, attr_index
, aos
[2]);
627 store_aos(builder
, io3_ptr
, attr_index
, aos
[3]);
631 convert_to_aos(LLVMBuilderRef builder
,
633 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
637 unsigned chan
, attrib
;
640 lp_build_printf(builder
, " # storing begin\n");
642 for (attrib
= 0; attrib
< num_outputs
; ++attrib
) {
645 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
646 if(outputs
[attrib
][chan
]) {
647 LLVMValueRef out
= LLVMBuildLoad(builder
, outputs
[attrib
][chan
], "");
648 lp_build_name(out
, "output%u.%c", attrib
, "xyzw"[chan
]);
649 /*lp_build_printf(builder, "output %d : %d ",
650 LLVMConstInt(LLVMInt32Type(), attrib, 0),
651 LLVMConstInt(LLVMInt32Type(), chan, 0));
652 print_vectorf(builder, out);*/
657 soa_to_aos(builder
, soa
, aos
);
658 store_aos_array(builder
,
665 lp_build_printf(builder
, " # storing end\n");
670 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
672 LLVMTypeRef arg_types
[8];
673 LLVMTypeRef func_type
;
674 LLVMValueRef context_ptr
;
675 LLVMBasicBlockRef block
;
676 LLVMBuilderRef builder
;
677 LLVMValueRef start
, end
, count
, stride
, step
, io_itr
;
678 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
679 LLVMValueRef instance_id
;
680 struct draw_context
*draw
= llvm
->draw
;
682 struct lp_build_context bld
;
683 struct lp_build_loop_state lp_loop
;
684 struct lp_type vs_type
= lp_type_float_vec(32);
685 const int max_vertices
= 4;
686 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
688 struct lp_build_sampler_soa
*sampler
= 0;
690 arg_types
[0] = llvm
->context_ptr_type
; /* context */
691 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
692 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
693 arg_types
[3] = LLVMInt32Type(); /* start */
694 arg_types
[4] = LLVMInt32Type(); /* count */
695 arg_types
[5] = LLVMInt32Type(); /* stride */
696 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
697 arg_types
[7] = LLVMInt32Type(); /* instance_id */
699 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
701 variant
->function
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader", func_type
);
702 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
703 for(i
= 0; i
< Elements(arg_types
); ++i
)
704 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
705 LLVMAddAttribute(LLVMGetParam(variant
->function
, i
), LLVMNoAliasAttribute
);
707 context_ptr
= LLVMGetParam(variant
->function
, 0);
708 io_ptr
= LLVMGetParam(variant
->function
, 1);
709 vbuffers_ptr
= LLVMGetParam(variant
->function
, 2);
710 start
= LLVMGetParam(variant
->function
, 3);
711 count
= LLVMGetParam(variant
->function
, 4);
712 stride
= LLVMGetParam(variant
->function
, 5);
713 vb_ptr
= LLVMGetParam(variant
->function
, 6);
714 instance_id
= LLVMGetParam(variant
->function
, 7);
716 lp_build_name(context_ptr
, "context");
717 lp_build_name(io_ptr
, "io");
718 lp_build_name(vbuffers_ptr
, "vbuffers");
719 lp_build_name(start
, "start");
720 lp_build_name(count
, "count");
721 lp_build_name(stride
, "stride");
722 lp_build_name(vb_ptr
, "vb");
723 lp_build_name(instance_id
, "instance_id");
729 block
= LLVMAppendBasicBlock(variant
->function
, "entry");
730 builder
= LLVMCreateBuilder();
731 LLVMPositionBuilderAtEnd(builder
, block
);
733 lp_build_context_init(&bld
, builder
, vs_type
);
735 end
= lp_build_add(&bld
, start
, count
);
737 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
739 /* code generated texture sampling */
740 sampler
= draw_llvm_sampler_soa_create(variant
->key
.sampler
,
744 lp_build_printf(builder
, "start = %d, end = %d, step = %d\n",
747 lp_build_loop_begin(builder
, start
, &lp_loop
);
749 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
750 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
752 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
754 io_itr
= LLVMBuildSub(builder
, lp_loop
.counter
, start
, "");
755 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
757 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
758 io_itr
, io
, lp_loop
.counter
);
760 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
761 LLVMValueRef true_index
= LLVMBuildAdd(
764 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
765 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
766 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
767 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
768 velem
->vertex_buffer_index
,
770 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
772 generate_fetch(builder
, vbuffers_ptr
,
773 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
777 convert_to_soa(builder
, aos_attribs
, inputs
,
778 draw
->pt
.nr_vertex_elements
);
780 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
788 convert_to_aos(builder
, io
, outputs
,
789 draw
->vs
.vertex_shader
->info
.num_outputs
,
792 lp_build_loop_end_cond(builder
, end
, step
, LLVMIntUGE
, &lp_loop
);
794 sampler
->destroy(sampler
);
796 LLVMBuildRetVoid(builder
);
798 LLVMDisposeBuilder(builder
);
801 * Translate the LLVM IR into machine code.
804 if(LLVMVerifyFunction(variant
->function
, LLVMPrintMessageAction
)) {
805 lp_debug_dump_value(variant
->function
);
810 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function
);
812 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
813 lp_debug_dump_value(variant
->function
);
817 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function
);
818 variant
->jit_func
= (draw_jit_vert_func
)pointer_to_func(code
);
820 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
821 lp_disassemble(code
);
827 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
829 LLVMTypeRef arg_types
[8];
830 LLVMTypeRef func_type
;
831 LLVMValueRef context_ptr
;
832 LLVMBasicBlockRef block
;
833 LLVMBuilderRef builder
;
834 LLVMValueRef fetch_elts
, fetch_count
, stride
, step
, io_itr
;
835 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
836 LLVMValueRef instance_id
;
837 struct draw_context
*draw
= llvm
->draw
;
839 struct lp_build_context bld
;
840 struct lp_build_context bld_int
;
841 struct lp_build_loop_state lp_loop
;
842 struct lp_type vs_type
= lp_type_float_vec(32);
843 const int max_vertices
= 4;
844 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
845 LLVMValueRef fetch_max
;
847 struct lp_build_sampler_soa
*sampler
= 0;
849 arg_types
[0] = llvm
->context_ptr_type
; /* context */
850 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
851 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
852 arg_types
[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
853 arg_types
[4] = LLVMInt32Type(); /* fetch_count */
854 arg_types
[5] = LLVMInt32Type(); /* stride */
855 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
856 arg_types
[7] = LLVMInt32Type(); /* instance_id */
858 func_type
= LLVMFunctionType(LLVMVoidType(), arg_types
, Elements(arg_types
), 0);
860 variant
->function_elts
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader_elts",
862 LLVMSetFunctionCallConv(variant
->function_elts
, LLVMCCallConv
);
863 for(i
= 0; i
< Elements(arg_types
); ++i
)
864 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
865 LLVMAddAttribute(LLVMGetParam(variant
->function_elts
, i
),
866 LLVMNoAliasAttribute
);
868 context_ptr
= LLVMGetParam(variant
->function_elts
, 0);
869 io_ptr
= LLVMGetParam(variant
->function_elts
, 1);
870 vbuffers_ptr
= LLVMGetParam(variant
->function_elts
, 2);
871 fetch_elts
= LLVMGetParam(variant
->function_elts
, 3);
872 fetch_count
= LLVMGetParam(variant
->function_elts
, 4);
873 stride
= LLVMGetParam(variant
->function_elts
, 5);
874 vb_ptr
= LLVMGetParam(variant
->function_elts
, 6);
875 instance_id
= LLVMGetParam(variant
->function_elts
, 7);
877 lp_build_name(context_ptr
, "context");
878 lp_build_name(io_ptr
, "io");
879 lp_build_name(vbuffers_ptr
, "vbuffers");
880 lp_build_name(fetch_elts
, "fetch_elts");
881 lp_build_name(fetch_count
, "fetch_count");
882 lp_build_name(stride
, "stride");
883 lp_build_name(vb_ptr
, "vb");
884 lp_build_name(instance_id
, "instance_id");
890 block
= LLVMAppendBasicBlock(variant
->function_elts
, "entry");
891 builder
= LLVMCreateBuilder();
892 LLVMPositionBuilderAtEnd(builder
, block
);
894 lp_build_context_init(&bld
, builder
, vs_type
);
895 lp_build_context_init(&bld_int
, builder
, lp_type_int(32));
897 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
899 /* code generated texture sampling */
900 sampler
= draw_llvm_sampler_soa_create(variant
->key
.sampler
,
903 fetch_max
= LLVMBuildSub(builder
, fetch_count
,
904 LLVMConstInt(LLVMInt32Type(), 1, 0),
907 lp_build_loop_begin(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop
);
909 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
910 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
912 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
914 io_itr
= lp_loop
.counter
;
915 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
917 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
918 io_itr
, io
, lp_loop
.counter
);
920 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
921 LLVMValueRef true_index
= LLVMBuildAdd(
924 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
925 LLVMValueRef fetch_ptr
;
927 /* make sure we're not out of bounds which can happen
928 * if fetch_count % 4 != 0, because on the last iteration
929 * a few of the 4 vertex fetches will be out of bounds */
930 true_index
= lp_build_min(&bld_int
, true_index
, fetch_max
);
932 fetch_ptr
= LLVMBuildGEP(builder
, fetch_elts
,
934 true_index
= LLVMBuildLoad(builder
, fetch_ptr
, "fetch_elt");
935 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
936 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
937 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
938 velem
->vertex_buffer_index
,
940 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
942 generate_fetch(builder
, vbuffers_ptr
,
943 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
947 convert_to_soa(builder
, aos_attribs
, inputs
,
948 draw
->pt
.nr_vertex_elements
);
950 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
958 convert_to_aos(builder
, io
, outputs
,
959 draw
->vs
.vertex_shader
->info
.num_outputs
,
962 lp_build_loop_end_cond(builder
, fetch_count
, step
, LLVMIntUGE
, &lp_loop
);
964 sampler
->destroy(sampler
);
966 LLVMBuildRetVoid(builder
);
968 LLVMDisposeBuilder(builder
);
971 * Translate the LLVM IR into machine code.
974 if(LLVMVerifyFunction(variant
->function_elts
, LLVMPrintMessageAction
)) {
975 lp_debug_dump_value(variant
->function_elts
);
980 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function_elts
);
982 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
983 lp_debug_dump_value(variant
->function_elts
);
987 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function_elts
);
988 variant
->jit_func_elts
= (draw_jit_vert_func_elts
)pointer_to_func(code
);
990 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
991 lp_disassemble(code
);
996 draw_llvm_make_variant_key(struct draw_llvm
*llvm
,
997 struct draw_llvm_variant_key
*key
)
1001 memset(key
, 0, sizeof(struct draw_llvm_variant_key
));
1003 key
->nr_vertex_elements
= llvm
->draw
->pt
.nr_vertex_elements
;
1005 memcpy(key
->vertex_element
,
1006 llvm
->draw
->pt
.vertex_element
,
1007 sizeof(struct pipe_vertex_element
) * key
->nr_vertex_elements
);
1010 &llvm
->draw
->vs
.vertex_shader
->state
,
1011 sizeof(struct pipe_shader_state
));
1013 /* if the driver implemented the sampling hooks then
1014 * setup our sampling state */
1015 if (llvm
->draw
->num_sampler_views
&& llvm
->draw
->num_samplers
) {
1016 for(i
= 0; i
< PIPE_MAX_VERTEX_SAMPLERS
; ++i
) {
1017 struct draw_vertex_shader
*shader
= llvm
->draw
->vs
.vertex_shader
;
1018 if(shader
->info
.file_mask
[TGSI_FILE_SAMPLER
] & (1 << i
))
1019 lp_sampler_static_state(&key
->sampler
[i
],
1020 llvm
->draw
->sampler_views
[i
],
1021 llvm
->draw
->samplers
[i
]);
1027 draw_llvm_set_mapped_texture(struct draw_context
*draw
,
1028 unsigned sampler_idx
,
1029 uint32_t width
, uint32_t height
, uint32_t depth
,
1030 uint32_t last_level
,
1031 uint32_t row_stride
[DRAW_MAX_TEXTURE_LEVELS
],
1032 uint32_t img_stride
[DRAW_MAX_TEXTURE_LEVELS
],
1033 const void *data
[DRAW_MAX_TEXTURE_LEVELS
])
1036 struct draw_jit_texture
*jit_tex
;
1038 assert(sampler_idx
< PIPE_MAX_VERTEX_SAMPLERS
);
1041 jit_tex
= &draw
->llvm
->jit_context
.textures
[sampler_idx
];
1043 jit_tex
->width
= width
;
1044 jit_tex
->height
= height
;
1045 jit_tex
->depth
= depth
;
1046 jit_tex
->last_level
= last_level
;
1048 for (j
= 0; j
<= last_level
; j
++) {
1049 jit_tex
->data
[j
] = data
[j
];
1050 jit_tex
->row_stride
[j
] = row_stride
[j
];
1051 jit_tex
->img_stride
[j
] = img_stride
[j
];
1056 draw_llvm_destroy_variant(struct draw_llvm_variant
*variant
)
1058 struct draw_llvm
*llvm
= variant
->llvm
;
1059 struct draw_context
*draw
= llvm
->draw
;
1061 if (variant
->function_elts
) {
1062 if (variant
->function_elts
)
1063 LLVMFreeMachineCodeForFunction(draw
->engine
,
1064 variant
->function_elts
);
1065 LLVMDeleteFunction(variant
->function_elts
);
1068 if (variant
->function
) {
1069 if (variant
->function
)
1070 LLVMFreeMachineCodeForFunction(draw
->engine
,
1072 LLVMDeleteFunction(variant
->function
);
1075 remove_from_list(&variant
->list_item_local
);
1076 variant
->shader
->variants_cached
--;
1077 remove_from_list(&variant
->list_item_global
);
1078 llvm
->nr_variants
--;