1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "draw_llvm.h"
30 #include "draw_context.h"
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_logic.h"
35 #include "gallivm/lp_bld_const.h"
36 #include "gallivm/lp_bld_swizzle.h"
37 #include "gallivm/lp_bld_struct.h"
38 #include "gallivm/lp_bld_type.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_debug.h"
41 #include "gallivm/lp_bld_tgsi.h"
42 #include "gallivm/lp_bld_printf.h"
43 #include "gallivm/lp_bld_intr.h"
44 #include "gallivm/lp_bld_init.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
49 #include "util/u_cpu_detect.h"
50 #include "util/u_pointer.h"
51 #include "util/u_string.h"
53 #include <llvm-c/Transforms/Scalar.h>
57 /* generates the draw jit function */
59 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
61 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*var
);
64 init_globals(struct draw_llvm
*llvm
)
66 LLVMTypeRef texture_type
;
68 /* struct draw_jit_texture */
70 LLVMTypeRef elem_types
[DRAW_JIT_TEXTURE_NUM_FIELDS
];
72 elem_types
[DRAW_JIT_TEXTURE_WIDTH
] = LLVMInt32Type();
73 elem_types
[DRAW_JIT_TEXTURE_HEIGHT
] = LLVMInt32Type();
74 elem_types
[DRAW_JIT_TEXTURE_DEPTH
] = LLVMInt32Type();
75 elem_types
[DRAW_JIT_TEXTURE_LAST_LEVEL
] = LLVMInt32Type();
76 elem_types
[DRAW_JIT_TEXTURE_ROW_STRIDE
] =
77 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS
);
78 elem_types
[DRAW_JIT_TEXTURE_IMG_STRIDE
] =
79 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS
);
80 elem_types
[DRAW_JIT_TEXTURE_DATA
] =
81 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
82 DRAW_MAX_TEXTURE_LEVELS
);
83 elem_types
[DRAW_JIT_TEXTURE_MIN_LOD
] = LLVMFloatType();
84 elem_types
[DRAW_JIT_TEXTURE_MAX_LOD
] = LLVMFloatType();
85 elem_types
[DRAW_JIT_TEXTURE_LOD_BIAS
] = LLVMFloatType();
86 elem_types
[DRAW_JIT_TEXTURE_BORDER_COLOR
] =
87 LLVMArrayType(LLVMFloatType(), 4);
89 texture_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
91 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, width
,
92 llvm
->target
, texture_type
,
93 DRAW_JIT_TEXTURE_WIDTH
);
94 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, height
,
95 llvm
->target
, texture_type
,
96 DRAW_JIT_TEXTURE_HEIGHT
);
97 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, depth
,
98 llvm
->target
, texture_type
,
99 DRAW_JIT_TEXTURE_DEPTH
);
100 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, last_level
,
101 llvm
->target
, texture_type
,
102 DRAW_JIT_TEXTURE_LAST_LEVEL
);
103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, row_stride
,
104 llvm
->target
, texture_type
,
105 DRAW_JIT_TEXTURE_ROW_STRIDE
);
106 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, img_stride
,
107 llvm
->target
, texture_type
,
108 DRAW_JIT_TEXTURE_IMG_STRIDE
);
109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, data
,
110 llvm
->target
, texture_type
,
111 DRAW_JIT_TEXTURE_DATA
);
112 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, min_lod
,
113 llvm
->target
, texture_type
,
114 DRAW_JIT_TEXTURE_MIN_LOD
);
115 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, max_lod
,
116 llvm
->target
, texture_type
,
117 DRAW_JIT_TEXTURE_MAX_LOD
);
118 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, lod_bias
,
119 llvm
->target
, texture_type
,
120 DRAW_JIT_TEXTURE_LOD_BIAS
);
121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture
, border_color
,
122 llvm
->target
, texture_type
,
123 DRAW_JIT_TEXTURE_BORDER_COLOR
);
124 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture
,
125 llvm
->target
, texture_type
);
127 LLVMAddTypeName(llvm
->module
, "texture", texture_type
);
131 /* struct draw_jit_context */
133 LLVMTypeRef elem_types
[4];
134 LLVMTypeRef context_type
;
136 elem_types
[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
137 elem_types
[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */
138 elem_types
[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */
139 elem_types
[3] = LLVMArrayType(texture_type
,
140 PIPE_MAX_VERTEX_SAMPLERS
); /* textures */
142 context_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
144 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, vs_constants
,
145 llvm
->target
, context_type
, 0);
146 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, gs_constants
,
147 llvm
->target
, context_type
, 1);
148 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, planes
,
149 llvm
->target
, context_type
, 2);
150 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context
, textures
,
151 llvm
->target
, context_type
,
152 DRAW_JIT_CTX_TEXTURES
);
153 LP_CHECK_STRUCT_SIZE(struct draw_jit_context
,
154 llvm
->target
, context_type
);
156 LLVMAddTypeName(llvm
->module
, "draw_jit_context", context_type
);
158 llvm
->context_ptr_type
= LLVMPointerType(context_type
, 0);
161 LLVMTypeRef buffer_ptr
= LLVMPointerType(LLVMIntType(8), 0);
162 llvm
->buffer_ptr_type
= LLVMPointerType(buffer_ptr
, 0);
164 /* struct pipe_vertex_buffer */
166 LLVMTypeRef elem_types
[4];
169 elem_types
[0] = LLVMInt32Type();
170 elem_types
[1] = LLVMInt32Type();
171 elem_types
[2] = LLVMInt32Type();
172 elem_types
[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
174 vb_type
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
176 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, stride
,
177 llvm
->target
, vb_type
, 0);
178 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer
, buffer_offset
,
179 llvm
->target
, vb_type
, 2);
180 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer
,
181 llvm
->target
, vb_type
);
183 LLVMAddTypeName(llvm
->module
, "pipe_vertex_buffer", vb_type
);
185 llvm
->vb_ptr_type
= LLVMPointerType(vb_type
, 0);
190 create_vertex_header(struct draw_llvm
*llvm
, int data_elems
)
192 /* struct vertex_header */
193 LLVMTypeRef elem_types
[3];
194 LLVMTypeRef vertex_header
;
195 char struct_name
[24];
197 util_snprintf(struct_name
, 23, "vertex_header%d", data_elems
);
199 elem_types
[0] = LLVMIntType(32);
200 elem_types
[1] = LLVMArrayType(LLVMFloatType(), 4);
201 elem_types
[2] = LLVMArrayType(elem_types
[1], data_elems
);
203 vertex_header
= LLVMStructType(elem_types
, Elements(elem_types
), 0);
205 /* these are bit-fields and we can't take address of them
206 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
207 llvm->target, vertex_header,
208 DRAW_JIT_VERTEX_CLIPMASK);
209 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
210 llvm->target, vertex_header,
211 DRAW_JIT_VERTEX_EDGEFLAG);
212 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
213 llvm->target, vertex_header,
214 DRAW_JIT_VERTEX_PAD);
215 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
216 llvm->target, vertex_header,
217 DRAW_JIT_VERTEX_VERTEX_ID);
219 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, clip
,
220 llvm
->target
, vertex_header
,
221 DRAW_JIT_VERTEX_CLIP
);
222 LP_CHECK_MEMBER_OFFSET(struct vertex_header
, data
,
223 llvm
->target
, vertex_header
,
224 DRAW_JIT_VERTEX_DATA
);
226 LLVMAddTypeName(llvm
->module
, struct_name
, vertex_header
);
228 return LLVMPointerType(vertex_header
, 0);
232 draw_llvm_create(struct draw_context
*draw
)
234 struct draw_llvm
*llvm
;
236 llvm
= CALLOC_STRUCT( draw_llvm
);
241 llvm
->engine
= draw
->engine
;
243 debug_assert(llvm
->engine
);
245 llvm
->module
= LLVMModuleCreateWithName("draw_llvm");
246 llvm
->provider
= LLVMCreateModuleProviderForExistingModule(llvm
->module
);
248 LLVMAddModuleProvider(llvm
->engine
, llvm
->provider
);
250 llvm
->target
= LLVMGetExecutionEngineTargetData(llvm
->engine
);
252 llvm
->pass
= LLVMCreateFunctionPassManager(llvm
->provider
);
253 LLVMAddTargetData(llvm
->target
, llvm
->pass
);
255 if ((gallivm_debug
& GALLIVM_DEBUG_NO_OPT
) == 0) {
256 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
257 * but there are more on SVN. */
258 /* TODO: Add more passes */
260 LLVMAddCFGSimplificationPass(llvm
->pass
);
262 if (HAVE_LLVM
>= 0x207 && sizeof(void*) == 4) {
263 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
264 * avoid generating bad code.
265 * Test with piglit glsl-vs-sqrt-zero test.
267 LLVMAddConstantPropagationPass(llvm
->pass
);
268 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
271 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
272 LLVMAddConstantPropagationPass(llvm
->pass
);
275 if(util_cpu_caps
.has_sse4_1
) {
276 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
277 * and sitofp (necessary for trunc/floor/ceil/round implementation)
278 * somehow becomes invalid code.
280 LLVMAddInstructionCombiningPass(llvm
->pass
);
282 LLVMAddGVNPass(llvm
->pass
);
284 /* We need at least this pass to prevent the backends to fail in
287 LLVMAddPromoteMemoryToRegisterPass(llvm
->pass
);
292 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
293 LLVMDumpModule(llvm
->module
);
296 llvm
->nr_variants
= 0;
297 make_empty_list(&llvm
->vs_variants_list
);
303 draw_llvm_destroy(struct draw_llvm
*llvm
)
305 LLVMDisposePassManager(llvm
->pass
);
310 struct draw_llvm_variant
*
311 draw_llvm_create_variant(struct draw_llvm
*llvm
,
313 const struct draw_llvm_variant_key
*key
)
315 struct draw_llvm_variant
*variant
;
316 struct llvm_vertex_shader
*shader
=
317 llvm_vertex_shader(llvm
->draw
->vs
.vertex_shader
);
319 variant
= MALLOC(sizeof *variant
+
320 shader
->variant_key_size
-
321 sizeof variant
->key
);
325 variant
->llvm
= llvm
;
327 memcpy(&variant
->key
, key
, shader
->variant_key_size
);
329 llvm
->vertex_header_ptr_type
= create_vertex_header(llvm
, num_inputs
);
331 draw_llvm_generate(llvm
, variant
);
332 draw_llvm_generate_elts(llvm
, variant
);
334 variant
->shader
= shader
;
335 variant
->list_item_global
.base
= variant
;
336 variant
->list_item_local
.base
= variant
;
337 /*variant->no = */shader
->variants_created
++;
338 variant
->list_item_global
.base
= variant
;
344 generate_vs(struct draw_llvm
*llvm
,
345 LLVMBuilderRef builder
,
346 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
347 const LLVMValueRef (*inputs
)[NUM_CHANNELS
],
348 LLVMValueRef context_ptr
,
349 struct lp_build_sampler_soa
*draw_sampler
)
351 const struct tgsi_token
*tokens
= llvm
->draw
->vs
.vertex_shader
->state
.tokens
;
352 struct lp_type vs_type
;
353 LLVMValueRef consts_ptr
= draw_jit_context_vs_constants(builder
, context_ptr
);
354 struct lp_build_sampler_soa
*sampler
= 0;
356 memset(&vs_type
, 0, sizeof vs_type
);
357 vs_type
.floating
= TRUE
; /* floating point values */
358 vs_type
.sign
= TRUE
; /* values are signed */
359 vs_type
.norm
= FALSE
; /* values are not limited to [0,1] or [-1,1] */
360 vs_type
.width
= 32; /* 32-bit float */
361 vs_type
.length
= 4; /* 4 elements per vector */
363 num_vs
= 4; /* number of vertices per block */
366 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
367 tgsi_dump(tokens
, 0);
370 if (llvm
->draw
->num_sampler_views
&&
371 llvm
->draw
->num_samplers
)
372 sampler
= draw_sampler
;
374 lp_build_tgsi_soa(builder
,
377 NULL
/*struct lp_build_mask_context *mask*/,
383 &llvm
->draw
->vs
.vertex_shader
->info
);
387 static void print_vectorf(LLVMBuilderRef builder
,
391 val
[0] = LLVMBuildExtractElement(builder
, vec
,
392 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
393 val
[1] = LLVMBuildExtractElement(builder
, vec
,
394 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
395 val
[2] = LLVMBuildExtractElement(builder
, vec
,
396 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
397 val
[3] = LLVMBuildExtractElement(builder
, vec
,
398 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
399 lp_build_printf(builder
, "vector = [%f, %f, %f, %f]\n",
400 val
[0], val
[1], val
[2], val
[3]);
405 generate_fetch(LLVMBuilderRef builder
,
406 LLVMValueRef vbuffers_ptr
,
408 struct pipe_vertex_element
*velem
,
411 LLVMValueRef instance_id
)
413 LLVMValueRef indices
= LLVMConstInt(LLVMInt64Type(), velem
->vertex_buffer_index
, 0);
414 LLVMValueRef vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffers_ptr
,
416 LLVMValueRef vb_stride
= draw_jit_vbuffer_stride(builder
, vbuf
);
417 LLVMValueRef vb_max_index
= draw_jit_vbuffer_max_index(builder
, vbuf
);
418 LLVMValueRef vb_buffer_offset
= draw_jit_vbuffer_offset(builder
, vbuf
);
422 if (velem
->instance_divisor
) {
423 /* array index = instance_id / instance_divisor */
424 index
= LLVMBuildUDiv(builder
, instance_id
,
425 LLVMConstInt(LLVMInt32Type(), velem
->instance_divisor
, 0),
429 /* limit index to min(inex, vb_max_index) */
430 cond
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, vb_max_index
, "");
431 index
= LLVMBuildSelect(builder
, cond
, index
, vb_max_index
, "");
433 stride
= LLVMBuildMul(builder
, vb_stride
, index
, "");
435 vbuffer_ptr
= LLVMBuildLoad(builder
, vbuffer_ptr
, "vbuffer");
437 stride
= LLVMBuildAdd(builder
, stride
,
440 stride
= LLVMBuildAdd(builder
, stride
,
441 LLVMConstInt(LLVMInt32Type(), velem
->src_offset
, 0),
444 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
445 vbuffer_ptr
= LLVMBuildGEP(builder
, vbuffer_ptr
, &stride
, 1, "");
447 *res
= draw_llvm_translate_from(builder
, vbuffer_ptr
, velem
->src_format
);
451 aos_to_soa(LLVMBuilderRef builder
,
456 LLVMValueRef channel
)
458 LLVMValueRef ex
, res
;
460 ex
= LLVMBuildExtractElement(builder
, val0
,
462 res
= LLVMBuildInsertElement(builder
,
463 LLVMConstNull(LLVMTypeOf(val0
)),
465 LLVMConstInt(LLVMInt32Type(), 0, 0),
468 ex
= LLVMBuildExtractElement(builder
, val1
,
470 res
= LLVMBuildInsertElement(builder
,
472 LLVMConstInt(LLVMInt32Type(), 1, 0),
475 ex
= LLVMBuildExtractElement(builder
, val2
,
477 res
= LLVMBuildInsertElement(builder
,
479 LLVMConstInt(LLVMInt32Type(), 2, 0),
482 ex
= LLVMBuildExtractElement(builder
, val3
,
484 res
= LLVMBuildInsertElement(builder
,
486 LLVMConstInt(LLVMInt32Type(), 3, 0),
493 soa_to_aos(LLVMBuilderRef builder
,
494 LLVMValueRef soa
[NUM_CHANNELS
],
495 LLVMValueRef aos
[NUM_CHANNELS
])
500 debug_assert(NUM_CHANNELS
== 4);
502 aos
[0] = LLVMConstNull(LLVMTypeOf(soa
[0]));
503 aos
[1] = aos
[2] = aos
[3] = aos
[0];
505 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
506 LLVMValueRef channel
= LLVMConstInt(LLVMInt32Type(), i
, 0);
508 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
509 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
510 aos
[0] = LLVMBuildInsertElement(builder
, aos
[0], comp
, channel
, "");
512 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
513 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
514 aos
[1] = LLVMBuildInsertElement(builder
, aos
[1], comp
, channel
, "");
516 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
517 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
518 aos
[2] = LLVMBuildInsertElement(builder
, aos
[2], comp
, channel
, "");
520 comp
= LLVMBuildExtractElement(builder
, soa
[i
],
521 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
522 aos
[3] = LLVMBuildInsertElement(builder
, aos
[3], comp
, channel
, "");
528 convert_to_soa(LLVMBuilderRef builder
,
529 LLVMValueRef (*aos
)[NUM_CHANNELS
],
530 LLVMValueRef (*soa
)[NUM_CHANNELS
],
535 debug_assert(NUM_CHANNELS
== 4);
537 for (i
= 0; i
< num_attribs
; ++i
) {
538 LLVMValueRef val0
= aos
[i
][0];
539 LLVMValueRef val1
= aos
[i
][1];
540 LLVMValueRef val2
= aos
[i
][2];
541 LLVMValueRef val3
= aos
[i
][3];
543 soa
[i
][0] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
544 LLVMConstInt(LLVMInt32Type(), 0, 0));
545 soa
[i
][1] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
546 LLVMConstInt(LLVMInt32Type(), 1, 0));
547 soa
[i
][2] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
548 LLVMConstInt(LLVMInt32Type(), 2, 0));
549 soa
[i
][3] = aos_to_soa(builder
, val0
, val1
, val2
, val3
,
550 LLVMConstInt(LLVMInt32Type(), 3, 0));
555 store_aos(LLVMBuilderRef builder
,
559 LLVMValueRef clipmask
)
561 LLVMValueRef id_ptr
= draw_jit_header_id(builder
, io_ptr
);
562 LLVMValueRef data_ptr
= draw_jit_header_data(builder
, io_ptr
);
563 LLVMValueRef indices
[3];
564 LLVMValueRef val
, shift
;
566 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
568 indices
[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
570 /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
571 val
= LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
572 shift
= LLVMConstInt(LLVMInt32Type(), 12, 0);
573 val
= LLVMBuildShl(builder
, val
, shift
, "");
574 /* add clipmask:12 */
575 val
= LLVMBuildOr(builder
, val
, clipmask
, "");
577 /* store vertex header */
578 LLVMBuildStore(builder
, val
, id_ptr
);
582 lp_build_printf(builder
, " ---- %p storing attribute %d (io = %p)\n", data_ptr
, index
, io_ptr
);
585 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
586 print_vectorf(builder, value);*/
587 data_ptr
= LLVMBuildBitCast(builder
, data_ptr
,
588 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
590 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 2, "");
592 LLVMBuildStore(builder
, value
, data_ptr
);
595 LLVMValueRef x
, y
, z
, w
;
596 LLVMValueRef idx0
, idx1
, idx2
, idx3
;
597 LLVMValueRef gep0
, gep1
, gep2
, gep3
;
598 data_ptr
= LLVMBuildGEP(builder
, data_ptr
, indices
, 3, "");
600 idx0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
601 idx1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
602 idx2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
603 idx3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
605 x
= LLVMBuildExtractElement(builder
, value
,
607 y
= LLVMBuildExtractElement(builder
, value
,
609 z
= LLVMBuildExtractElement(builder
, value
,
611 w
= LLVMBuildExtractElement(builder
, value
,
614 gep0
= LLVMBuildGEP(builder
, data_ptr
, &idx0
, 1, "");
615 gep1
= LLVMBuildGEP(builder
, data_ptr
, &idx1
, 1, "");
616 gep2
= LLVMBuildGEP(builder
, data_ptr
, &idx2
, 1, "");
617 gep3
= LLVMBuildGEP(builder
, data_ptr
, &idx3
, 1, "");
619 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
620 x, gep0, y, gep1, z, gep2, w, gep3);*/
621 LLVMBuildStore(builder
, x
, gep0
);
622 LLVMBuildStore(builder
, y
, gep1
);
623 LLVMBuildStore(builder
, z
, gep2
);
624 LLVMBuildStore(builder
, w
, gep3
);
630 store_aos_array(LLVMBuilderRef builder
,
632 LLVMValueRef aos
[NUM_CHANNELS
],
635 LLVMValueRef clipmask
)
637 LLVMValueRef attr_index
= LLVMConstInt(LLVMInt32Type(), attrib
, 0);
638 LLVMValueRef ind0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
639 LLVMValueRef ind1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
640 LLVMValueRef ind2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
641 LLVMValueRef ind3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
642 LLVMValueRef io0_ptr
, io1_ptr
, io2_ptr
, io3_ptr
;
643 LLVMValueRef clipmask0
, clipmask1
, clipmask2
, clipmask3
;
645 debug_assert(NUM_CHANNELS
== 4);
647 io0_ptr
= LLVMBuildGEP(builder
, io_ptr
,
649 io1_ptr
= LLVMBuildGEP(builder
, io_ptr
,
651 io2_ptr
= LLVMBuildGEP(builder
, io_ptr
,
653 io3_ptr
= LLVMBuildGEP(builder
, io_ptr
,
656 clipmask0
= LLVMBuildExtractElement(builder
, clipmask
,
658 clipmask1
= LLVMBuildExtractElement(builder
, clipmask
,
660 clipmask2
= LLVMBuildExtractElement(builder
, clipmask
,
662 clipmask3
= LLVMBuildExtractElement(builder
, clipmask
,
666 lp_build_printf(builder
, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
667 io_ptr
, ind0
, ind1
, ind2
, ind3
, clipmask0
, clipmask1
, clipmask2
, clipmask3
);
669 /* store for each of the 4 vertices */
670 store_aos(builder
, io0_ptr
, attr_index
, aos
[0], clipmask0
);
671 store_aos(builder
, io1_ptr
, attr_index
, aos
[1], clipmask1
);
672 store_aos(builder
, io2_ptr
, attr_index
, aos
[2], clipmask2
);
673 store_aos(builder
, io3_ptr
, attr_index
, aos
[3], clipmask3
);
677 convert_to_aos(LLVMBuilderRef builder
,
679 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
680 LLVMValueRef clipmask
,
684 unsigned chan
, attrib
;
687 lp_build_printf(builder
, " # storing begin\n");
689 for (attrib
= 0; attrib
< num_outputs
; ++attrib
) {
692 for(chan
= 0; chan
< NUM_CHANNELS
; ++chan
) {
693 if(outputs
[attrib
][chan
]) {
694 LLVMValueRef out
= LLVMBuildLoad(builder
, outputs
[attrib
][chan
], "");
695 lp_build_name(out
, "output%u.%c", attrib
, "xyzw"[chan
]);
696 /*lp_build_printf(builder, "output %d : %d ",
697 LLVMConstInt(LLVMInt32Type(), attrib, 0),
698 LLVMConstInt(LLVMInt32Type(), chan, 0));
699 print_vectorf(builder, out);*/
704 soa_to_aos(builder
, soa
, aos
);
705 store_aos_array(builder
,
713 lp_build_printf(builder
, " # storing end\n");
718 * Stores original vertex positions in clip coordinates
719 * There is probably a more efficient way to do this, 4 floats at once
720 * rather than extracting each element one by one.
723 store_clip(LLVMBuilderRef builder
,
725 LLVMValueRef (*outputs
)[NUM_CHANNELS
])
728 LLVMValueRef indices
[2];
729 LLVMValueRef io0_ptr
, io1_ptr
, io2_ptr
, io3_ptr
;
730 LLVMValueRef clip_ptr0
, clip_ptr1
, clip_ptr2
, clip_ptr3
;
731 LLVMValueRef clip0_ptr
, clip1_ptr
, clip2_ptr
, clip3_ptr
;
732 LLVMValueRef out0elem
, out1elem
, out2elem
, out3elem
;
734 LLVMValueRef ind0
= LLVMConstInt(LLVMInt32Type(), 0, 0);
735 LLVMValueRef ind1
= LLVMConstInt(LLVMInt32Type(), 1, 0);
736 LLVMValueRef ind2
= LLVMConstInt(LLVMInt32Type(), 2, 0);
737 LLVMValueRef ind3
= LLVMConstInt(LLVMInt32Type(), 3, 0);
739 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
740 indices
[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
742 out
[0] = LLVMBuildLoad(builder
, outputs
[0][0], ""); /*x0 x1 x2 x3*/
743 out
[1] = LLVMBuildLoad(builder
, outputs
[0][1], ""); /*y0 y1 y2 y3*/
744 out
[2] = LLVMBuildLoad(builder
, outputs
[0][2], ""); /*z0 z1 z2 z3*/
745 out
[3] = LLVMBuildLoad(builder
, outputs
[0][3], ""); /*w0 w1 w2 w3*/
747 io0_ptr
= LLVMBuildGEP(builder
, io_ptr
, &ind0
, 1, "");
748 io1_ptr
= LLVMBuildGEP(builder
, io_ptr
, &ind1
, 1, "");
749 io2_ptr
= LLVMBuildGEP(builder
, io_ptr
, &ind2
, 1, "");
750 io3_ptr
= LLVMBuildGEP(builder
, io_ptr
, &ind3
, 1, "");
752 clip_ptr0
= draw_jit_header_clip(builder
, io0_ptr
);
753 clip_ptr1
= draw_jit_header_clip(builder
, io1_ptr
);
754 clip_ptr2
= draw_jit_header_clip(builder
, io2_ptr
);
755 clip_ptr3
= draw_jit_header_clip(builder
, io3_ptr
);
757 for (int i
= 0; i
<4; i
++){
758 clip0_ptr
= LLVMBuildGEP(builder
, clip_ptr0
,
759 indices
, 2, ""); //x0
760 clip1_ptr
= LLVMBuildGEP(builder
, clip_ptr1
,
761 indices
, 2, ""); //x1
762 clip2_ptr
= LLVMBuildGEP(builder
, clip_ptr2
,
763 indices
, 2, ""); //x2
764 clip3_ptr
= LLVMBuildGEP(builder
, clip_ptr3
,
765 indices
, 2, ""); //x3
767 out0elem
= LLVMBuildExtractElement(builder
, out
[i
],
769 out1elem
= LLVMBuildExtractElement(builder
, out
[i
],
771 out2elem
= LLVMBuildExtractElement(builder
, out
[i
],
773 out3elem
= LLVMBuildExtractElement(builder
, out
[i
],
776 LLVMBuildStore(builder
, out0elem
, clip0_ptr
);
777 LLVMBuildStore(builder
, out1elem
, clip1_ptr
);
778 LLVMBuildStore(builder
, out2elem
, clip2_ptr
);
779 LLVMBuildStore(builder
, out3elem
, clip3_ptr
);
781 indices
[1]= LLVMBuildAdd(builder
, indices
[1], ind1
, "");
787 * Transforms the outputs for viewport mapping
790 generate_viewport(struct draw_llvm
*llvm
,
791 LLVMBuilderRef builder
,
792 LLVMValueRef (*outputs
)[NUM_CHANNELS
])
795 const float *scaleA
= llvm
->draw
->viewport
.scale
;
796 const float *transA
= llvm
->draw
->viewport
.translate
;
797 struct lp_type f32_type
= lp_type_float_vec(32);
798 LLVMValueRef out3
= LLVMBuildLoad(builder
, outputs
[0][3], ""); /*w0 w1 w2 w3*/
799 LLVMValueRef const1
= lp_build_const_vec(f32_type
, 1.0); /*1.0 1.0 1.0 1.0*/
801 /* for 1/w convention*/
802 out3
= LLVMBuildFDiv(builder
, const1
, out3
, "");
803 LLVMBuildStore(builder
, out3
, outputs
[0][3]);
805 /* Viewport Mapping */
807 LLVMValueRef out
= LLVMBuildLoad(builder
, outputs
[0][i
], ""); /*x0 x1 x2 x3*/
808 LLVMValueRef scale
= lp_build_const_vec(f32_type
, scaleA
[i
]); /*sx sx sx sx*/
809 LLVMValueRef trans
= lp_build_const_vec(f32_type
, transA
[i
]); /*tx tx tx tx*/
812 out
= LLVMBuildMul(builder
, out
, out3
, "");
814 out
= LLVMBuildMul(builder
, out
, scale
, "");
815 /* add translation */
816 out
= LLVMBuildAdd(builder
, out
, trans
, "");
818 /* store transformed outputs */
819 LLVMBuildStore(builder
, out
, outputs
[0][i
]);
824 /* Equivalent of _mm_set1_ps(a)
826 static LLVMValueRef
vec4f_from_scalar(LLVMBuilderRef bld
,
830 LLVMValueRef res
= LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
833 for(i
= 0; i
< 4; ++i
) {
834 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), i
, 0);
835 res
= LLVMBuildInsertElement(bld
, res
, a
, index
, i
== 3 ? name
: "");
842 * Returns clipmask as 4xi32 bitmask for the 4 vertices
845 generate_clipmask(LLVMBuilderRef builder
,
846 LLVMValueRef (*outputs
)[NUM_CHANNELS
],
852 LLVMValueRef context_ptr
)
854 LLVMValueRef mask
; /* stores the <4xi32> clipmasks */
855 LLVMValueRef test
, temp
;
856 LLVMValueRef zero
, shift
;
857 LLVMValueRef pos_x
, pos_y
, pos_z
, pos_w
;
858 LLVMValueRef plane1
, planes
, plane_ptr
, sum
;
862 struct lp_type f32_type
= lp_type_float_vec(32);
864 zero
= lp_build_const_vec(f32_type
, 0); /* 0.0f 0.0f 0.0f 0.0f */
865 shift
= lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */
867 /* Assuming position stored at output[0] */
868 pos_x
= LLVMBuildLoad(builder
, outputs
[0][0], ""); /*x0 x1 x2 x3*/
869 pos_y
= LLVMBuildLoad(builder
, outputs
[0][1], ""); /*y0 y1 y2 y3*/
870 pos_z
= LLVMBuildLoad(builder
, outputs
[0][2], ""); /*z0 z1 z2 z3*/
871 pos_w
= LLVMBuildLoad(builder
, outputs
[0][3], ""); /*w0 w1 w2 w3*/
873 /* Cliptest, for hardwired planes */
876 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, pos_x
, pos_w
);
878 test
= LLVMBuildAnd(builder
, test
, temp
, "");
882 test
= LLVMBuildFAdd(builder
, pos_x
, pos_w
, "");
883 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, zero
, test
);
884 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
885 test
= LLVMBuildAnd(builder
, test
, temp
, "");
886 mask
= LLVMBuildOr(builder
, mask
, test
, "");
889 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, pos_y
, pos_w
);
890 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
891 test
= LLVMBuildAnd(builder
, test
, temp
, "");
892 mask
= LLVMBuildOr(builder
, mask
, test
, "");
895 test
= LLVMBuildFAdd(builder
, pos_y
, pos_w
, "");
896 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, zero
, test
);
897 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
898 test
= LLVMBuildAnd(builder
, test
, temp
, "");
899 mask
= LLVMBuildOr(builder
, mask
, test
, "");
905 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, zero
, pos_z
);
906 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
907 test
= LLVMBuildAnd(builder
, test
, temp
, "");
908 mask
= LLVMBuildOr(builder
, mask
, test
, "");
912 test
= LLVMBuildFAdd(builder
, pos_z
, pos_w
, "");
913 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, zero
, test
);
914 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
915 test
= LLVMBuildAnd(builder
, test
, temp
, "");
916 mask
= LLVMBuildOr(builder
, mask
, test
, "");
919 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, pos_z
, pos_w
);
920 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
921 test
= LLVMBuildAnd(builder
, test
, temp
, "");
922 mask
= LLVMBuildOr(builder
, mask
, test
, "");
926 LLVMValueRef planes_ptr
= draw_jit_context_planes(builder
, context_ptr
);
927 LLVMValueRef indices
[3];
929 /* userclip planes */
930 for (i
= 6; i
< nr
; i
++) {
931 indices
[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
932 indices
[1] = LLVMConstInt(LLVMInt32Type(), i
, 0);
934 indices
[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
935 plane_ptr
= LLVMBuildGEP(builder
, planes_ptr
, indices
, 3, "");
936 plane1
= LLVMBuildLoad(builder
, plane_ptr
, "plane_x");
937 planes
= vec4f_from_scalar(builder
, plane1
, "plane4_x");
938 sum
= LLVMBuildMul(builder
, planes
, pos_x
, "");
940 indices
[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
941 plane_ptr
= LLVMBuildGEP(builder
, planes_ptr
, indices
, 3, "");
942 plane1
= LLVMBuildLoad(builder
, plane_ptr
, "plane_y");
943 planes
= vec4f_from_scalar(builder
, plane1
, "plane4_y");
944 test
= LLVMBuildMul(builder
, planes
, pos_y
, "");
945 sum
= LLVMBuildFAdd(builder
, sum
, test
, "");
947 indices
[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
948 plane_ptr
= LLVMBuildGEP(builder
, planes_ptr
, indices
, 3, "");
949 plane1
= LLVMBuildLoad(builder
, plane_ptr
, "plane_z");
950 planes
= vec4f_from_scalar(builder
, plane1
, "plane4_z");
951 test
= LLVMBuildMul(builder
, planes
, pos_z
, "");
952 sum
= LLVMBuildFAdd(builder
, sum
, test
, "");
954 indices
[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
955 plane_ptr
= LLVMBuildGEP(builder
, planes_ptr
, indices
, 3, "");
956 plane1
= LLVMBuildLoad(builder
, plane_ptr
, "plane_w");
957 planes
= vec4f_from_scalar(builder
, plane1
, "plane4_w");
958 test
= LLVMBuildMul(builder
, planes
, pos_w
, "");
959 sum
= LLVMBuildFAdd(builder
, sum
, test
, "");
961 test
= lp_build_compare(builder
, f32_type
, PIPE_FUNC_GREATER
, zero
, sum
);
962 temp
= LLVMBuildShl(builder
, temp
, shift
, "");
963 test
= LLVMBuildAnd(builder
, test
, temp
, "");
964 mask
= LLVMBuildOr(builder
, mask
, test
, "");
971 * Returns boolean if any clipping has occurred
972 * Used zero/non-zero i32 value to represent boolean
975 clipmask_bool(LLVMBuilderRef builder
,
976 LLVMValueRef clipmask
,
977 LLVMValueRef ret_ptr
)
979 LLVMValueRef ret
= LLVMBuildLoad(builder
, ret_ptr
, "");
984 temp
= LLVMBuildExtractElement(builder
, clipmask
,
985 LLVMConstInt(LLVMInt32Type(), i
, 0) , "");
986 ret
= LLVMBuildOr(builder
, ret
, temp
, "");
989 LLVMBuildStore(builder
, ret
, ret_ptr
);
993 draw_llvm_generate(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
995 LLVMTypeRef arg_types
[8];
996 LLVMTypeRef func_type
;
997 LLVMValueRef context_ptr
;
998 LLVMBasicBlockRef block
;
999 LLVMBuilderRef builder
;
1000 LLVMValueRef start
, end
, count
, stride
, step
, io_itr
;
1001 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
1002 LLVMValueRef instance_id
;
1003 struct draw_context
*draw
= llvm
->draw
;
1005 struct lp_build_context bld
;
1006 struct lp_build_loop_state lp_loop
;
1007 const int max_vertices
= 4;
1008 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
1010 struct lp_build_sampler_soa
*sampler
= 0;
1011 LLVMValueRef ret
, ret_ptr
;
1012 boolean bypass_viewport
= variant
->key
.bypass_viewport
;
1013 boolean enable_cliptest
= variant
->key
.clip_xy
||
1014 variant
->key
.clip_z
||
1015 variant
->key
.clip_user
;
1017 arg_types
[0] = llvm
->context_ptr_type
; /* context */
1018 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
1019 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
1020 arg_types
[3] = LLVMInt32Type(); /* start */
1021 arg_types
[4] = LLVMInt32Type(); /* count */
1022 arg_types
[5] = LLVMInt32Type(); /* stride */
1023 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
1024 arg_types
[7] = LLVMInt32Type(); /* instance_id */
1026 func_type
= LLVMFunctionType(LLVMInt32Type(), arg_types
, Elements(arg_types
), 0);
1028 variant
->function
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader", func_type
);
1029 LLVMSetFunctionCallConv(variant
->function
, LLVMCCallConv
);
1030 for(i
= 0; i
< Elements(arg_types
); ++i
)
1031 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
1032 LLVMAddAttribute(LLVMGetParam(variant
->function
, i
), LLVMNoAliasAttribute
);
1034 context_ptr
= LLVMGetParam(variant
->function
, 0);
1035 io_ptr
= LLVMGetParam(variant
->function
, 1);
1036 vbuffers_ptr
= LLVMGetParam(variant
->function
, 2);
1037 start
= LLVMGetParam(variant
->function
, 3);
1038 count
= LLVMGetParam(variant
->function
, 4);
1039 stride
= LLVMGetParam(variant
->function
, 5);
1040 vb_ptr
= LLVMGetParam(variant
->function
, 6);
1041 instance_id
= LLVMGetParam(variant
->function
, 7);
1043 lp_build_name(context_ptr
, "context");
1044 lp_build_name(io_ptr
, "io");
1045 lp_build_name(vbuffers_ptr
, "vbuffers");
1046 lp_build_name(start
, "start");
1047 lp_build_name(count
, "count");
1048 lp_build_name(stride
, "stride");
1049 lp_build_name(vb_ptr
, "vb");
1050 lp_build_name(instance_id
, "instance_id");
1056 block
= LLVMAppendBasicBlock(variant
->function
, "entry");
1057 builder
= LLVMCreateBuilder();
1058 LLVMPositionBuilderAtEnd(builder
, block
);
1060 lp_build_context_init(&bld
, builder
, lp_type_int(32));
1062 end
= lp_build_add(&bld
, start
, count
);
1064 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
1066 /* function will return non-zero i32 value if any clipped vertices */
1067 ret_ptr
= lp_build_alloca(builder
, LLVMInt32Type(), "");
1068 LLVMBuildStore(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr
);
1070 /* code generated texture sampling */
1071 sampler
= draw_llvm_sampler_soa_create(
1072 draw_llvm_variant_key_samplers(&variant
->key
),
1076 lp_build_printf(builder
, "start = %d, end = %d, step = %d\n",
1079 lp_build_loop_begin(builder
, start
, &lp_loop
);
1081 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
1082 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
1084 LLVMValueRef clipmask
; /* holds the clipmask value */
1085 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
1087 io_itr
= LLVMBuildSub(builder
, lp_loop
.counter
, start
, "");
1088 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
1090 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
1091 io_itr
, io
, lp_loop
.counter
);
1093 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
1094 LLVMValueRef true_index
= LLVMBuildAdd(
1097 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
1098 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
1099 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
1100 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
1101 velem
->vertex_buffer_index
,
1103 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
1105 generate_fetch(builder
, vbuffers_ptr
,
1106 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
1110 convert_to_soa(builder
, aos_attribs
, inputs
,
1111 draw
->pt
.nr_vertex_elements
);
1113 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
1121 /* store original positions in clip before further manipulation */
1122 store_clip(builder
, io
, outputs
);
1125 if (enable_cliptest
){
1126 /* allocate clipmask, assign it integer type */
1127 clipmask
= generate_clipmask(builder
, outputs
,
1128 variant
->key
.clip_xy
,
1129 variant
->key
.clip_z
,
1130 variant
->key
.clip_user
,
1131 variant
->key
.clip_halfz
,
1132 variant
->key
.nr_planes
,
1134 /* return clipping boolean value for function */
1135 clipmask_bool(builder
, clipmask
, ret_ptr
);
1138 clipmask
= lp_build_const_int_vec(lp_type_int_vec(32), 0);
1141 /* do viewport mapping */
1142 if (!bypass_viewport
){
1143 generate_viewport(llvm
, builder
, outputs
);
1146 /* store clipmask in vertex header and positions in data */
1147 convert_to_aos(builder
, io
, outputs
, clipmask
,
1148 draw
->vs
.vertex_shader
->info
.num_outputs
,
1152 lp_build_loop_end_cond(builder
, end
, step
, LLVMIntUGE
, &lp_loop
);
1154 sampler
->destroy(sampler
);
1156 #ifdef PIPE_ARCH_X86
1157 /* Avoid corrupting the FPU stack on 32bit OSes. */
1158 lp_build_intrinsic(builder
, "llvm.x86.mmx.emms", LLVMVoidType(), NULL
, 0);
1161 ret
= LLVMBuildLoad(builder
, ret_ptr
,"");
1162 LLVMBuildRet(builder
, ret
);
1164 LLVMDisposeBuilder(builder
);
1167 * Translate the LLVM IR into machine code.
1170 if(LLVMVerifyFunction(variant
->function
, LLVMPrintMessageAction
)) {
1171 lp_debug_dump_value(variant
->function
);
1176 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function
);
1178 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
1179 lp_debug_dump_value(variant
->function
);
1183 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function
);
1184 variant
->jit_func
= (draw_jit_vert_func
)pointer_to_func(code
);
1186 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
1187 lp_disassemble(code
);
1189 lp_func_delete_body(variant
->function
);
1194 draw_llvm_generate_elts(struct draw_llvm
*llvm
, struct draw_llvm_variant
*variant
)
1196 LLVMTypeRef arg_types
[8];
1197 LLVMTypeRef func_type
;
1198 LLVMValueRef context_ptr
;
1199 LLVMBasicBlockRef block
;
1200 LLVMBuilderRef builder
;
1201 LLVMValueRef fetch_elts
, fetch_count
, stride
, step
, io_itr
;
1202 LLVMValueRef io_ptr
, vbuffers_ptr
, vb_ptr
;
1203 LLVMValueRef instance_id
;
1204 struct draw_context
*draw
= llvm
->draw
;
1206 struct lp_build_context bld
;
1207 struct lp_build_loop_state lp_loop
;
1208 const int max_vertices
= 4;
1209 LLVMValueRef outputs
[PIPE_MAX_SHADER_OUTPUTS
][NUM_CHANNELS
];
1210 LLVMValueRef fetch_max
;
1212 struct lp_build_sampler_soa
*sampler
= 0;
1213 LLVMValueRef ret
, ret_ptr
;
1214 boolean bypass_viewport
= variant
->key
.bypass_viewport
;
1215 boolean enable_cliptest
= variant
->key
.clip_xy
||
1216 variant
->key
.clip_z
||
1217 variant
->key
.clip_user
;
1219 arg_types
[0] = llvm
->context_ptr_type
; /* context */
1220 arg_types
[1] = llvm
->vertex_header_ptr_type
; /* vertex_header */
1221 arg_types
[2] = llvm
->buffer_ptr_type
; /* vbuffers */
1222 arg_types
[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
1223 arg_types
[4] = LLVMInt32Type(); /* fetch_count */
1224 arg_types
[5] = LLVMInt32Type(); /* stride */
1225 arg_types
[6] = llvm
->vb_ptr_type
; /* pipe_vertex_buffer's */
1226 arg_types
[7] = LLVMInt32Type(); /* instance_id */
1228 func_type
= LLVMFunctionType(LLVMInt32Type(), arg_types
, Elements(arg_types
), 0);
1230 variant
->function_elts
= LLVMAddFunction(llvm
->module
, "draw_llvm_shader_elts", func_type
);
1231 LLVMSetFunctionCallConv(variant
->function_elts
, LLVMCCallConv
);
1232 for(i
= 0; i
< Elements(arg_types
); ++i
)
1233 if(LLVMGetTypeKind(arg_types
[i
]) == LLVMPointerTypeKind
)
1234 LLVMAddAttribute(LLVMGetParam(variant
->function_elts
, i
),
1235 LLVMNoAliasAttribute
);
1237 context_ptr
= LLVMGetParam(variant
->function_elts
, 0);
1238 io_ptr
= LLVMGetParam(variant
->function_elts
, 1);
1239 vbuffers_ptr
= LLVMGetParam(variant
->function_elts
, 2);
1240 fetch_elts
= LLVMGetParam(variant
->function_elts
, 3);
1241 fetch_count
= LLVMGetParam(variant
->function_elts
, 4);
1242 stride
= LLVMGetParam(variant
->function_elts
, 5);
1243 vb_ptr
= LLVMGetParam(variant
->function_elts
, 6);
1244 instance_id
= LLVMGetParam(variant
->function_elts
, 7);
1246 lp_build_name(context_ptr
, "context");
1247 lp_build_name(io_ptr
, "io");
1248 lp_build_name(vbuffers_ptr
, "vbuffers");
1249 lp_build_name(fetch_elts
, "fetch_elts");
1250 lp_build_name(fetch_count
, "fetch_count");
1251 lp_build_name(stride
, "stride");
1252 lp_build_name(vb_ptr
, "vb");
1253 lp_build_name(instance_id
, "instance_id");
1259 block
= LLVMAppendBasicBlock(variant
->function_elts
, "entry");
1260 builder
= LLVMCreateBuilder();
1261 LLVMPositionBuilderAtEnd(builder
, block
);
1263 lp_build_context_init(&bld
, builder
, lp_type_int(32));
1265 step
= LLVMConstInt(LLVMInt32Type(), max_vertices
, 0);
1267 /* code generated texture sampling */
1268 sampler
= draw_llvm_sampler_soa_create(
1269 draw_llvm_variant_key_samplers(&variant
->key
),
1272 fetch_max
= LLVMBuildSub(builder
, fetch_count
,
1273 LLVMConstInt(LLVMInt32Type(), 1, 0),
1276 /* function returns non-zero i32 value if any clipped vertices */
1277 ret_ptr
= lp_build_alloca(builder
, LLVMInt32Type(), "");
1278 LLVMBuildStore(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr
);
1280 lp_build_loop_begin(builder
, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop
);
1282 LLVMValueRef inputs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
];
1283 LLVMValueRef aos_attribs
[PIPE_MAX_SHADER_INPUTS
][NUM_CHANNELS
] = { { 0 } };
1285 LLVMValueRef clipmask
; /* holds the clipmask value */
1286 const LLVMValueRef (*ptr_aos
)[NUM_CHANNELS
];
1288 io_itr
= lp_loop
.counter
;
1289 io
= LLVMBuildGEP(builder
, io_ptr
, &io_itr
, 1, "");
1291 lp_build_printf(builder
, " --- io %d = %p, loop counter %d\n",
1292 io_itr
, io
, lp_loop
.counter
);
1294 for (i
= 0; i
< NUM_CHANNELS
; ++i
) {
1295 LLVMValueRef true_index
= LLVMBuildAdd(
1298 LLVMConstInt(LLVMInt32Type(), i
, 0), "");
1299 LLVMValueRef fetch_ptr
;
1301 /* make sure we're not out of bounds which can happen
1302 * if fetch_count % 4 != 0, because on the last iteration
1303 * a few of the 4 vertex fetches will be out of bounds */
1304 true_index
= lp_build_min(&bld
, true_index
, fetch_max
);
1306 fetch_ptr
= LLVMBuildGEP(builder
, fetch_elts
,
1307 &true_index
, 1, "");
1308 true_index
= LLVMBuildLoad(builder
, fetch_ptr
, "fetch_elt");
1309 for (j
= 0; j
< draw
->pt
.nr_vertex_elements
; ++j
) {
1310 struct pipe_vertex_element
*velem
= &draw
->pt
.vertex_element
[j
];
1311 LLVMValueRef vb_index
= LLVMConstInt(LLVMInt32Type(),
1312 velem
->vertex_buffer_index
,
1314 LLVMValueRef vb
= LLVMBuildGEP(builder
, vb_ptr
,
1316 generate_fetch(builder
, vbuffers_ptr
,
1317 &aos_attribs
[j
][i
], velem
, vb
, true_index
,
1321 convert_to_soa(builder
, aos_attribs
, inputs
,
1322 draw
->pt
.nr_vertex_elements
);
1324 ptr_aos
= (const LLVMValueRef (*)[NUM_CHANNELS
]) inputs
;
1332 /* store original positions in clip before further manipulation */
1333 store_clip(builder
, io
, outputs
);
1336 if (enable_cliptest
){
1337 /* allocate clipmask, assign it integer type */
1338 clipmask
= generate_clipmask(builder
, outputs
,
1339 variant
->key
.clip_xy
,
1340 variant
->key
.clip_z
,
1341 variant
->key
.clip_user
,
1342 variant
->key
.clip_halfz
,
1343 variant
->key
.nr_planes
,
1345 /* return clipping boolean value for function */
1346 clipmask_bool(builder
, clipmask
, ret_ptr
);
1349 clipmask
= lp_build_const_int_vec(lp_type_int_vec(32), 0);
1352 /* do viewport mapping */
1353 if (!bypass_viewport
){
1354 generate_viewport(llvm
, builder
, outputs
);
1357 /* store clipmask in vertex header,
1358 * original positions in clip
1359 * and transformed positions in data
1361 convert_to_aos(builder
, io
, outputs
, clipmask
,
1362 draw
->vs
.vertex_shader
->info
.num_outputs
,
1366 lp_build_loop_end_cond(builder
, fetch_count
, step
, LLVMIntUGE
, &lp_loop
);
1368 sampler
->destroy(sampler
);
1370 #ifdef PIPE_ARCH_X86
1371 /* Avoid corrupting the FPU stack on 32bit OSes. */
1372 lp_build_intrinsic(builder
, "llvm.x86.mmx.emms", LLVMVoidType(), NULL
, 0);
1375 ret
= LLVMBuildLoad(builder
, ret_ptr
,"");
1376 LLVMBuildRet(builder
, ret
);
1378 LLVMDisposeBuilder(builder
);
1381 * Translate the LLVM IR into machine code.
1384 if(LLVMVerifyFunction(variant
->function_elts
, LLVMPrintMessageAction
)) {
1385 lp_debug_dump_value(variant
->function_elts
);
1390 LLVMRunFunctionPassManager(llvm
->pass
, variant
->function_elts
);
1392 if (gallivm_debug
& GALLIVM_DEBUG_IR
) {
1393 lp_debug_dump_value(variant
->function_elts
);
1397 code
= LLVMGetPointerToGlobal(llvm
->draw
->engine
, variant
->function_elts
);
1398 variant
->jit_func_elts
= (draw_jit_vert_func_elts
)pointer_to_func(code
);
1400 if (gallivm_debug
& GALLIVM_DEBUG_ASM
) {
1401 lp_disassemble(code
);
1403 lp_func_delete_body(variant
->function_elts
);
1407 struct draw_llvm_variant_key
*
1408 draw_llvm_make_variant_key(struct draw_llvm
*llvm
, char *store
)
1411 struct draw_llvm_variant_key
*key
;
1412 struct lp_sampler_static_state
*sampler
;
1414 key
= (struct draw_llvm_variant_key
*)store
;
1416 /* Presumably all variants of the shader should have the same
1417 * number of vertex elements - ie the number of shader inputs.
1419 key
->nr_vertex_elements
= llvm
->draw
->pt
.nr_vertex_elements
;
1421 /* will have to rig this up properly later */
1422 key
->clip_xy
= llvm
->draw
->clip_xy
;
1423 key
->clip_z
= llvm
->draw
->clip_z
;
1424 key
->clip_user
= llvm
->draw
->clip_user
;
1425 key
->bypass_viewport
= llvm
->draw
->identity_viewport
;
1426 key
->clip_halfz
= !llvm
->draw
->rasterizer
->gl_rasterization_rules
;
1427 key
->need_edgeflags
= (llvm
->draw
->vs
.edgeflag_output
? TRUE
: FALSE
);
1428 key
->nr_planes
= llvm
->draw
->nr_planes
;
1431 /* All variants of this shader will have the same value for
1432 * nr_samplers. Not yet trying to compact away holes in the
1435 key
->nr_samplers
= llvm
->draw
->vs
.vertex_shader
->info
.file_max
[TGSI_FILE_SAMPLER
] + 1;
1437 sampler
= draw_llvm_variant_key_samplers(key
);
1439 memcpy(key
->vertex_element
,
1440 llvm
->draw
->pt
.vertex_element
,
1441 sizeof(struct pipe_vertex_element
) * key
->nr_vertex_elements
);
1443 memset(sampler
, 0, key
->nr_samplers
* sizeof *sampler
);
1445 for (i
= 0 ; i
< key
->nr_samplers
; i
++) {
1446 lp_sampler_static_state(&sampler
[i
],
1447 llvm
->draw
->sampler_views
[i
],
1448 llvm
->draw
->samplers
[i
]);
1455 draw_llvm_set_mapped_texture(struct draw_context
*draw
,
1456 unsigned sampler_idx
,
1457 uint32_t width
, uint32_t height
, uint32_t depth
,
1458 uint32_t last_level
,
1459 uint32_t row_stride
[DRAW_MAX_TEXTURE_LEVELS
],
1460 uint32_t img_stride
[DRAW_MAX_TEXTURE_LEVELS
],
1461 const void *data
[DRAW_MAX_TEXTURE_LEVELS
])
1464 struct draw_jit_texture
*jit_tex
;
1466 assert(sampler_idx
< PIPE_MAX_VERTEX_SAMPLERS
);
1469 jit_tex
= &draw
->llvm
->jit_context
.textures
[sampler_idx
];
1471 jit_tex
->width
= width
;
1472 jit_tex
->height
= height
;
1473 jit_tex
->depth
= depth
;
1474 jit_tex
->last_level
= last_level
;
1476 for (j
= 0; j
<= last_level
; j
++) {
1477 jit_tex
->data
[j
] = data
[j
];
1478 jit_tex
->row_stride
[j
] = row_stride
[j
];
1479 jit_tex
->img_stride
[j
] = img_stride
[j
];
1484 draw_llvm_destroy_variant(struct draw_llvm_variant
*variant
)
1486 struct draw_llvm
*llvm
= variant
->llvm
;
1487 struct draw_context
*draw
= llvm
->draw
;
1489 if (variant
->function_elts
) {
1490 if (variant
->function_elts
)
1491 LLVMFreeMachineCodeForFunction(draw
->engine
,
1492 variant
->function_elts
);
1493 LLVMDeleteFunction(variant
->function_elts
);
1496 if (variant
->function
) {
1497 if (variant
->function
)
1498 LLVMFreeMachineCodeForFunction(draw
->engine
,
1500 LLVMDeleteFunction(variant
->function
);
1503 remove_from_list(&variant
->list_item_local
);
1504 variant
->shader
->variants_cached
--;
1505 remove_from_list(&variant
->list_item_global
);
1506 llvm
->nr_variants
--;