llvmpipe: clean up fields in draw_llvm_variant_key
[mesa.git] / src / gallium / auxiliary / draw / draw_llvm.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include "draw_llvm.h"
29
30 #include "draw_context.h"
31 #include "draw_vs.h"
32
33 #include "gallivm/lp_bld_arit.h"
34 #include "gallivm/lp_bld_logic.h"
35 #include "gallivm/lp_bld_const.h"
36 #include "gallivm/lp_bld_swizzle.h"
37 #include "gallivm/lp_bld_struct.h"
38 #include "gallivm/lp_bld_type.h"
39 #include "gallivm/lp_bld_flow.h"
40 #include "gallivm/lp_bld_debug.h"
41 #include "gallivm/lp_bld_tgsi.h"
42 #include "gallivm/lp_bld_printf.h"
43 #include "gallivm/lp_bld_intr.h"
44 #include "gallivm/lp_bld_init.h"
45
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48
49 #include "util/u_cpu_detect.h"
50 #include "util/u_pointer.h"
51 #include "util/u_string.h"
52
53 #include <llvm-c/Transforms/Scalar.h>
54
55 #define DEBUG_STORE 0
56
57 /* generates the draw jit function */
58 static void
59 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
60 static void
61 draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
62
63 static void
64 init_globals(struct draw_llvm *llvm)
65 {
66 LLVMTypeRef texture_type;
67
68 /* struct draw_jit_texture */
69 {
70 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
71
72 elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type();
73 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type();
74 elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type();
75 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type();
76 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
77 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
78 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
79 LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS);
80 elem_types[DRAW_JIT_TEXTURE_DATA] =
81 LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0),
82 DRAW_MAX_TEXTURE_LEVELS);
83 elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType();
84 elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType();
85 elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType();
86 elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] =
87 LLVMArrayType(LLVMFloatType(), 4);
88
89 texture_type = LLVMStructType(elem_types, Elements(elem_types), 0);
90
91 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
92 llvm->target, texture_type,
93 DRAW_JIT_TEXTURE_WIDTH);
94 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
95 llvm->target, texture_type,
96 DRAW_JIT_TEXTURE_HEIGHT);
97 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
98 llvm->target, texture_type,
99 DRAW_JIT_TEXTURE_DEPTH);
100 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
101 llvm->target, texture_type,
102 DRAW_JIT_TEXTURE_LAST_LEVEL);
103 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
104 llvm->target, texture_type,
105 DRAW_JIT_TEXTURE_ROW_STRIDE);
106 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
107 llvm->target, texture_type,
108 DRAW_JIT_TEXTURE_IMG_STRIDE);
109 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data,
110 llvm->target, texture_type,
111 DRAW_JIT_TEXTURE_DATA);
112 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod,
113 llvm->target, texture_type,
114 DRAW_JIT_TEXTURE_MIN_LOD);
115 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod,
116 llvm->target, texture_type,
117 DRAW_JIT_TEXTURE_MAX_LOD);
118 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias,
119 llvm->target, texture_type,
120 DRAW_JIT_TEXTURE_LOD_BIAS);
121 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color,
122 llvm->target, texture_type,
123 DRAW_JIT_TEXTURE_BORDER_COLOR);
124 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture,
125 llvm->target, texture_type);
126
127 LLVMAddTypeName(llvm->module, "texture", texture_type);
128 }
129
130
131 /* struct draw_jit_context */
132 {
133 LLVMTypeRef elem_types[4];
134 LLVMTypeRef context_type;
135
136 elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */
137 elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* gs_constants */
138 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(LLVMFloatType(), 4), 12), 0); /* planes */
139 elem_types[3] = LLVMArrayType(texture_type,
140 PIPE_MAX_VERTEX_SAMPLERS); /* textures */
141
142 context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
143
144 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
145 llvm->target, context_type, 0);
146 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants,
147 llvm->target, context_type, 1);
148 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
149 llvm->target, context_type, 2);
150 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
151 llvm->target, context_type,
152 DRAW_JIT_CTX_TEXTURES);
153 LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
154 llvm->target, context_type);
155
156 LLVMAddTypeName(llvm->module, "draw_jit_context", context_type);
157
158 llvm->context_ptr_type = LLVMPointerType(context_type, 0);
159 }
160 {
161 LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0);
162 llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0);
163 }
164 /* struct pipe_vertex_buffer */
165 {
166 LLVMTypeRef elem_types[4];
167 LLVMTypeRef vb_type;
168
169 elem_types[0] = LLVMInt32Type();
170 elem_types[1] = LLVMInt32Type();
171 elem_types[2] = LLVMInt32Type();
172 elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */
173
174 vb_type = LLVMStructType(elem_types, Elements(elem_types), 0);
175
176 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
177 llvm->target, vb_type, 0);
178 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
179 llvm->target, vb_type, 2);
180 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer,
181 llvm->target, vb_type);
182
183 LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type);
184
185 llvm->vb_ptr_type = LLVMPointerType(vb_type, 0);
186 }
187 }
188
189 static LLVMTypeRef
190 create_vertex_header(struct draw_llvm *llvm, int data_elems)
191 {
192 /* struct vertex_header */
193 LLVMTypeRef elem_types[3];
194 LLVMTypeRef vertex_header;
195 char struct_name[24];
196
197 util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
198
199 elem_types[0] = LLVMIntType(32);
200 elem_types[1] = LLVMArrayType(LLVMFloatType(), 4);
201 elem_types[2] = LLVMArrayType(elem_types[1], data_elems);
202
203 vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0);
204
205 /* these are bit-fields and we can't take address of them
206 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
207 llvm->target, vertex_header,
208 DRAW_JIT_VERTEX_CLIPMASK);
209 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
210 llvm->target, vertex_header,
211 DRAW_JIT_VERTEX_EDGEFLAG);
212 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
213 llvm->target, vertex_header,
214 DRAW_JIT_VERTEX_PAD);
215 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
216 llvm->target, vertex_header,
217 DRAW_JIT_VERTEX_VERTEX_ID);
218 */
219 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
220 llvm->target, vertex_header,
221 DRAW_JIT_VERTEX_CLIP);
222 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
223 llvm->target, vertex_header,
224 DRAW_JIT_VERTEX_DATA);
225
226 LLVMAddTypeName(llvm->module, struct_name, vertex_header);
227
228 return LLVMPointerType(vertex_header, 0);
229 }
230
231 struct draw_llvm *
232 draw_llvm_create(struct draw_context *draw)
233 {
234 struct draw_llvm *llvm;
235
236 llvm = CALLOC_STRUCT( draw_llvm );
237 if (!llvm)
238 return NULL;
239
240 llvm->draw = draw;
241 llvm->engine = draw->engine;
242
243 debug_assert(llvm->engine);
244
245 llvm->module = LLVMModuleCreateWithName("draw_llvm");
246 llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module);
247
248 LLVMAddModuleProvider(llvm->engine, llvm->provider);
249
250 llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine);
251
252 llvm->pass = LLVMCreateFunctionPassManager(llvm->provider);
253 LLVMAddTargetData(llvm->target, llvm->pass);
254
255 if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
256 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
257 * but there are more on SVN. */
258 /* TODO: Add more passes */
259
260 LLVMAddCFGSimplificationPass(llvm->pass);
261
262 if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) {
263 /* For LLVM >= 2.7 and 32-bit build, use this order of passes to
264 * avoid generating bad code.
265 * Test with piglit glsl-vs-sqrt-zero test.
266 */
267 LLVMAddConstantPropagationPass(llvm->pass);
268 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
269 }
270 else {
271 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
272 LLVMAddConstantPropagationPass(llvm->pass);
273 }
274
275 if(util_cpu_caps.has_sse4_1) {
276 /* FIXME: There is a bug in this pass, whereby the combination of fptosi
277 * and sitofp (necessary for trunc/floor/ceil/round implementation)
278 * somehow becomes invalid code.
279 */
280 LLVMAddInstructionCombiningPass(llvm->pass);
281 }
282 LLVMAddGVNPass(llvm->pass);
283 } else {
284 /* We need at least this pass to prevent the backends to fail in
285 * unexpected ways.
286 */
287 LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
288 }
289
290 init_globals(llvm);
291
292 if (gallivm_debug & GALLIVM_DEBUG_IR) {
293 LLVMDumpModule(llvm->module);
294 }
295
296 llvm->nr_variants = 0;
297 make_empty_list(&llvm->vs_variants_list);
298
299 return llvm;
300 }
301
302 void
303 draw_llvm_destroy(struct draw_llvm *llvm)
304 {
305 LLVMDisposePassManager(llvm->pass);
306
307 FREE(llvm);
308 }
309
310 struct draw_llvm_variant *
311 draw_llvm_create_variant(struct draw_llvm *llvm,
312 unsigned num_inputs,
313 const struct draw_llvm_variant_key *key)
314 {
315 struct draw_llvm_variant *variant;
316 struct llvm_vertex_shader *shader =
317 llvm_vertex_shader(llvm->draw->vs.vertex_shader);
318
319 variant = MALLOC(sizeof *variant +
320 shader->variant_key_size -
321 sizeof variant->key);
322 if (variant == NULL)
323 return NULL;
324
325 variant->llvm = llvm;
326
327 memcpy(&variant->key, key, shader->variant_key_size);
328
329 llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
330
331 draw_llvm_generate(llvm, variant);
332 draw_llvm_generate_elts(llvm, variant);
333
334 variant->shader = shader;
335 variant->list_item_global.base = variant;
336 variant->list_item_local.base = variant;
337 /*variant->no = */shader->variants_created++;
338 variant->list_item_global.base = variant;
339
340 return variant;
341 }
342
343 static void
344 generate_vs(struct draw_llvm *llvm,
345 LLVMBuilderRef builder,
346 LLVMValueRef (*outputs)[NUM_CHANNELS],
347 const LLVMValueRef (*inputs)[NUM_CHANNELS],
348 LLVMValueRef context_ptr,
349 struct lp_build_sampler_soa *draw_sampler)
350 {
351 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
352 struct lp_type vs_type;
353 LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr);
354 struct lp_build_sampler_soa *sampler = 0;
355
356 memset(&vs_type, 0, sizeof vs_type);
357 vs_type.floating = TRUE; /* floating point values */
358 vs_type.sign = TRUE; /* values are signed */
359 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
360 vs_type.width = 32; /* 32-bit float */
361 vs_type.length = 4; /* 4 elements per vector */
362 #if 0
363 num_vs = 4; /* number of vertices per block */
364 #endif
365
366 if (gallivm_debug & GALLIVM_DEBUG_IR) {
367 tgsi_dump(tokens, 0);
368 }
369
370 if (llvm->draw->num_sampler_views &&
371 llvm->draw->num_samplers)
372 sampler = draw_sampler;
373
374 lp_build_tgsi_soa(builder,
375 tokens,
376 vs_type,
377 NULL /*struct lp_build_mask_context *mask*/,
378 consts_ptr,
379 NULL /*pos*/,
380 inputs,
381 outputs,
382 sampler,
383 &llvm->draw->vs.vertex_shader->info);
384 }
385
386 #if DEBUG_STORE
387 static void print_vectorf(LLVMBuilderRef builder,
388 LLVMValueRef vec)
389 {
390 LLVMValueRef val[4];
391 val[0] = LLVMBuildExtractElement(builder, vec,
392 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
393 val[1] = LLVMBuildExtractElement(builder, vec,
394 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
395 val[2] = LLVMBuildExtractElement(builder, vec,
396 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
397 val[3] = LLVMBuildExtractElement(builder, vec,
398 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
399 lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n",
400 val[0], val[1], val[2], val[3]);
401 }
402 #endif
403
404 static void
405 generate_fetch(LLVMBuilderRef builder,
406 LLVMValueRef vbuffers_ptr,
407 LLVMValueRef *res,
408 struct pipe_vertex_element *velem,
409 LLVMValueRef vbuf,
410 LLVMValueRef index,
411 LLVMValueRef instance_id)
412 {
413 LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0);
414 LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
415 &indices, 1, "");
416 LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
417 LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
418 LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
419 LLVMValueRef cond;
420 LLVMValueRef stride;
421
422 if (velem->instance_divisor) {
423 /* array index = instance_id / instance_divisor */
424 index = LLVMBuildUDiv(builder, instance_id,
425 LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0),
426 "instance_divisor");
427 }
428
429 /* limit index to min(inex, vb_max_index) */
430 cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
431 index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
432
433 stride = LLVMBuildMul(builder, vb_stride, index, "");
434
435 vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
436
437 stride = LLVMBuildAdd(builder, stride,
438 vb_buffer_offset,
439 "");
440 stride = LLVMBuildAdd(builder, stride,
441 LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0),
442 "");
443
444 /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/
445 vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, "");
446
447 *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format);
448 }
449
450 static LLVMValueRef
451 aos_to_soa(LLVMBuilderRef builder,
452 LLVMValueRef val0,
453 LLVMValueRef val1,
454 LLVMValueRef val2,
455 LLVMValueRef val3,
456 LLVMValueRef channel)
457 {
458 LLVMValueRef ex, res;
459
460 ex = LLVMBuildExtractElement(builder, val0,
461 channel, "");
462 res = LLVMBuildInsertElement(builder,
463 LLVMConstNull(LLVMTypeOf(val0)),
464 ex,
465 LLVMConstInt(LLVMInt32Type(), 0, 0),
466 "");
467
468 ex = LLVMBuildExtractElement(builder, val1,
469 channel, "");
470 res = LLVMBuildInsertElement(builder,
471 res, ex,
472 LLVMConstInt(LLVMInt32Type(), 1, 0),
473 "");
474
475 ex = LLVMBuildExtractElement(builder, val2,
476 channel, "");
477 res = LLVMBuildInsertElement(builder,
478 res, ex,
479 LLVMConstInt(LLVMInt32Type(), 2, 0),
480 "");
481
482 ex = LLVMBuildExtractElement(builder, val3,
483 channel, "");
484 res = LLVMBuildInsertElement(builder,
485 res, ex,
486 LLVMConstInt(LLVMInt32Type(), 3, 0),
487 "");
488
489 return res;
490 }
491
492 static void
493 soa_to_aos(LLVMBuilderRef builder,
494 LLVMValueRef soa[NUM_CHANNELS],
495 LLVMValueRef aos[NUM_CHANNELS])
496 {
497 LLVMValueRef comp;
498 int i = 0;
499
500 debug_assert(NUM_CHANNELS == 4);
501
502 aos[0] = LLVMConstNull(LLVMTypeOf(soa[0]));
503 aos[1] = aos[2] = aos[3] = aos[0];
504
505 for (i = 0; i < NUM_CHANNELS; ++i) {
506 LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0);
507
508 comp = LLVMBuildExtractElement(builder, soa[i],
509 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
510 aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, "");
511
512 comp = LLVMBuildExtractElement(builder, soa[i],
513 LLVMConstInt(LLVMInt32Type(), 1, 0), "");
514 aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, "");
515
516 comp = LLVMBuildExtractElement(builder, soa[i],
517 LLVMConstInt(LLVMInt32Type(), 2, 0), "");
518 aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, "");
519
520 comp = LLVMBuildExtractElement(builder, soa[i],
521 LLVMConstInt(LLVMInt32Type(), 3, 0), "");
522 aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, "");
523
524 }
525 }
526
527 static void
528 convert_to_soa(LLVMBuilderRef builder,
529 LLVMValueRef (*aos)[NUM_CHANNELS],
530 LLVMValueRef (*soa)[NUM_CHANNELS],
531 int num_attribs)
532 {
533 int i;
534
535 debug_assert(NUM_CHANNELS == 4);
536
537 for (i = 0; i < num_attribs; ++i) {
538 LLVMValueRef val0 = aos[i][0];
539 LLVMValueRef val1 = aos[i][1];
540 LLVMValueRef val2 = aos[i][2];
541 LLVMValueRef val3 = aos[i][3];
542
543 soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3,
544 LLVMConstInt(LLVMInt32Type(), 0, 0));
545 soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3,
546 LLVMConstInt(LLVMInt32Type(), 1, 0));
547 soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3,
548 LLVMConstInt(LLVMInt32Type(), 2, 0));
549 soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3,
550 LLVMConstInt(LLVMInt32Type(), 3, 0));
551 }
552 }
553
554 static void
555 store_aos(LLVMBuilderRef builder,
556 LLVMValueRef io_ptr,
557 LLVMValueRef index,
558 LLVMValueRef value,
559 LLVMValueRef clipmask)
560 {
561 LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr);
562 LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr);
563 LLVMValueRef indices[3];
564 LLVMValueRef val, shift;
565
566 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
567 indices[1] = index;
568 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
569
570 /* initialize vertex id:16 = 0xffff, pad:3 = 0, edgeflag:1 = 1 */
571 val = LLVMConstInt(LLVMInt32Type(), 0xffff1, 0);
572 shift = LLVMConstInt(LLVMInt32Type(), 12, 0);
573 val = LLVMBuildShl(builder, val, shift, "");
574 /* add clipmask:12 */
575 val = LLVMBuildOr(builder, val, clipmask, "");
576
577 /* store vertex header */
578 LLVMBuildStore(builder, val, id_ptr);
579
580
581 #if DEBUG_STORE
582 lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
583 #endif
584 #if 0
585 /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr);
586 print_vectorf(builder, value);*/
587 data_ptr = LLVMBuildBitCast(builder, data_ptr,
588 LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0),
589 "datavec");
590 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, "");
591
592 LLVMBuildStore(builder, value, data_ptr);
593 #else
594 {
595 LLVMValueRef x, y, z, w;
596 LLVMValueRef idx0, idx1, idx2, idx3;
597 LLVMValueRef gep0, gep1, gep2, gep3;
598 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
599
600 idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
601 idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
602 idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
603 idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
604
605 x = LLVMBuildExtractElement(builder, value,
606 idx0, "");
607 y = LLVMBuildExtractElement(builder, value,
608 idx1, "");
609 z = LLVMBuildExtractElement(builder, value,
610 idx2, "");
611 w = LLVMBuildExtractElement(builder, value,
612 idx3, "");
613
614 gep0 = LLVMBuildGEP(builder, data_ptr, &idx0, 1, "");
615 gep1 = LLVMBuildGEP(builder, data_ptr, &idx1, 1, "");
616 gep2 = LLVMBuildGEP(builder, data_ptr, &idx2, 1, "");
617 gep3 = LLVMBuildGEP(builder, data_ptr, &idx3, 1, "");
618
619 /*lp_build_printf(builder, "##### x = %f (%p), y = %f (%p), z = %f (%p), w = %f (%p)\n",
620 x, gep0, y, gep1, z, gep2, w, gep3);*/
621 LLVMBuildStore(builder, x, gep0);
622 LLVMBuildStore(builder, y, gep1);
623 LLVMBuildStore(builder, z, gep2);
624 LLVMBuildStore(builder, w, gep3);
625 }
626 #endif
627 }
628
629 static void
630 store_aos_array(LLVMBuilderRef builder,
631 LLVMValueRef io_ptr,
632 LLVMValueRef aos[NUM_CHANNELS],
633 int attrib,
634 int num_outputs,
635 LLVMValueRef clipmask)
636 {
637 LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0);
638 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
639 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
640 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
641 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
642 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
643 LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3;
644
645 debug_assert(NUM_CHANNELS == 4);
646
647 io0_ptr = LLVMBuildGEP(builder, io_ptr,
648 &ind0, 1, "");
649 io1_ptr = LLVMBuildGEP(builder, io_ptr,
650 &ind1, 1, "");
651 io2_ptr = LLVMBuildGEP(builder, io_ptr,
652 &ind2, 1, "");
653 io3_ptr = LLVMBuildGEP(builder, io_ptr,
654 &ind3, 1, "");
655
656 clipmask0 = LLVMBuildExtractElement(builder, clipmask,
657 ind0, "");
658 clipmask1 = LLVMBuildExtractElement(builder, clipmask,
659 ind1, "");
660 clipmask2 = LLVMBuildExtractElement(builder, clipmask,
661 ind2, "");
662 clipmask3 = LLVMBuildExtractElement(builder, clipmask,
663 ind3, "");
664
665 #if DEBUG_STORE
666 lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n",
667 io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3);
668 #endif
669 /* store for each of the 4 vertices */
670 store_aos(builder, io0_ptr, attr_index, aos[0], clipmask0);
671 store_aos(builder, io1_ptr, attr_index, aos[1], clipmask1);
672 store_aos(builder, io2_ptr, attr_index, aos[2], clipmask2);
673 store_aos(builder, io3_ptr, attr_index, aos[3], clipmask3);
674 }
675
676 static void
677 convert_to_aos(LLVMBuilderRef builder,
678 LLVMValueRef io,
679 LLVMValueRef (*outputs)[NUM_CHANNELS],
680 LLVMValueRef clipmask,
681 int num_outputs,
682 int max_vertices)
683 {
684 unsigned chan, attrib;
685
686 #if DEBUG_STORE
687 lp_build_printf(builder, " # storing begin\n");
688 #endif
689 for (attrib = 0; attrib < num_outputs; ++attrib) {
690 LLVMValueRef soa[4];
691 LLVMValueRef aos[4];
692 for(chan = 0; chan < NUM_CHANNELS; ++chan) {
693 if(outputs[attrib][chan]) {
694 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
695 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
696 /*lp_build_printf(builder, "output %d : %d ",
697 LLVMConstInt(LLVMInt32Type(), attrib, 0),
698 LLVMConstInt(LLVMInt32Type(), chan, 0));
699 print_vectorf(builder, out);*/
700 soa[chan] = out;
701 } else
702 soa[chan] = 0;
703 }
704 soa_to_aos(builder, soa, aos);
705 store_aos_array(builder,
706 io,
707 aos,
708 attrib,
709 num_outputs,
710 clipmask);
711 }
712 #if DEBUG_STORE
713 lp_build_printf(builder, " # storing end\n");
714 #endif
715 }
716
717 /*
718 * Stores original vertex positions in clip coordinates
719 * There is probably a more efficient way to do this, 4 floats at once
720 * rather than extracting each element one by one.
721 */
722 static void
723 store_clip(LLVMBuilderRef builder,
724 LLVMValueRef io_ptr,
725 LLVMValueRef (*outputs)[NUM_CHANNELS])
726 {
727 LLVMValueRef out[4];
728 LLVMValueRef indices[2];
729 LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr;
730 LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3;
731 LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr;
732 LLVMValueRef out0elem, out1elem, out2elem, out3elem;
733
734 LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0);
735 LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0);
736 LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0);
737 LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0);
738
739 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
740 indices[1] = LLVMConstInt(LLVMInt32Type(), 0, 0);
741
742 out[0] = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
743 out[1] = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
744 out[2] = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
745 out[3] = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
746
747 io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, "");
748 io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, "");
749 io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, "");
750 io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, "");
751
752 clip_ptr0 = draw_jit_header_clip(builder, io0_ptr);
753 clip_ptr1 = draw_jit_header_clip(builder, io1_ptr);
754 clip_ptr2 = draw_jit_header_clip(builder, io2_ptr);
755 clip_ptr3 = draw_jit_header_clip(builder, io3_ptr);
756
757 for (int i = 0; i<4; i++){
758 clip0_ptr = LLVMBuildGEP(builder, clip_ptr0,
759 indices, 2, ""); //x0
760 clip1_ptr = LLVMBuildGEP(builder, clip_ptr1,
761 indices, 2, ""); //x1
762 clip2_ptr = LLVMBuildGEP(builder, clip_ptr2,
763 indices, 2, ""); //x2
764 clip3_ptr = LLVMBuildGEP(builder, clip_ptr3,
765 indices, 2, ""); //x3
766
767 out0elem = LLVMBuildExtractElement(builder, out[i],
768 ind0, ""); //x0
769 out1elem = LLVMBuildExtractElement(builder, out[i],
770 ind1, ""); //x1
771 out2elem = LLVMBuildExtractElement(builder, out[i],
772 ind2, ""); //x2
773 out3elem = LLVMBuildExtractElement(builder, out[i],
774 ind3, ""); //x3
775
776 LLVMBuildStore(builder, out0elem, clip0_ptr);
777 LLVMBuildStore(builder, out1elem, clip1_ptr);
778 LLVMBuildStore(builder, out2elem, clip2_ptr);
779 LLVMBuildStore(builder, out3elem, clip3_ptr);
780
781 indices[1]= LLVMBuildAdd(builder, indices[1], ind1, "");
782 }
783
784 }
785
786 /*
787 * Transforms the outputs for viewport mapping
788 */
789 static void
790 generate_viewport(struct draw_llvm *llvm,
791 LLVMBuilderRef builder,
792 LLVMValueRef (*outputs)[NUM_CHANNELS])
793 {
794 int i;
795 const float *scaleA = llvm->draw->viewport.scale;
796 const float *transA = llvm->draw->viewport.translate;
797 struct lp_type f32_type = lp_type_float_vec(32);
798 LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
799 LLVMValueRef const1 = lp_build_const_vec(f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
800
801 /* for 1/w convention*/
802 out3 = LLVMBuildFDiv(builder, const1, out3, "");
803 LLVMBuildStore(builder, out3, outputs[0][3]);
804
805 /* Viewport Mapping */
806 for (i=0; i<3; i++){
807 LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/
808 LLVMValueRef scale = lp_build_const_vec(f32_type, scaleA[i]); /*sx sx sx sx*/
809 LLVMValueRef trans = lp_build_const_vec(f32_type, transA[i]); /*tx tx tx tx*/
810
811 /* divide by w */
812 out = LLVMBuildMul(builder, out, out3, "");
813 /* mult by scale */
814 out = LLVMBuildMul(builder, out, scale, "");
815 /* add translation */
816 out = LLVMBuildAdd(builder, out, trans, "");
817
818 /* store transformed outputs */
819 LLVMBuildStore(builder, out, outputs[0][i]);
820 }
821
822 }
823
824 /* Equivalent of _mm_set1_ps(a)
825 */
826 static LLVMValueRef vec4f_from_scalar(LLVMBuilderRef bld,
827 LLVMValueRef a,
828 const char *name)
829 {
830 LLVMValueRef res = LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
831 int i;
832
833 for(i = 0; i < 4; ++i) {
834 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
835 res = LLVMBuildInsertElement(bld, res, a, index, i == 3 ? name : "");
836 }
837
838 return res;
839 }
840
841 /*
842 * Returns clipmask as 4xi32 bitmask for the 4 vertices
843 */
844 static LLVMValueRef
845 generate_clipmask(LLVMBuilderRef builder,
846 LLVMValueRef (*outputs)[NUM_CHANNELS],
847 boolean clip_xy,
848 boolean clip_z,
849 boolean clip_user,
850 boolean clip_halfz,
851 unsigned nr,
852 LLVMValueRef context_ptr)
853 {
854 LLVMValueRef mask; /* stores the <4xi32> clipmasks */
855 LLVMValueRef test, temp;
856 LLVMValueRef zero, shift;
857 LLVMValueRef pos_x, pos_y, pos_z, pos_w;
858 LLVMValueRef plane1, planes, plane_ptr, sum;
859
860 unsigned i;
861
862 struct lp_type f32_type = lp_type_float_vec(32);
863
864 zero = lp_build_const_vec(f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */
865 shift = lp_build_const_int_vec(lp_type_int_vec(32), 1); /* 1 1 1 1 */
866
867 /* Assuming position stored at output[0] */
868 pos_x = LLVMBuildLoad(builder, outputs[0][0], ""); /*x0 x1 x2 x3*/
869 pos_y = LLVMBuildLoad(builder, outputs[0][1], ""); /*y0 y1 y2 y3*/
870 pos_z = LLVMBuildLoad(builder, outputs[0][2], ""); /*z0 z1 z2 z3*/
871 pos_w = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/
872
873 /* Cliptest, for hardwired planes */
874 if (clip_xy){
875 /* plane 1 */
876 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
877 temp = shift;
878 test = LLVMBuildAnd(builder, test, temp, "");
879 mask = test;
880
881 /* plane 2 */
882 test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
883 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
884 temp = LLVMBuildShl(builder, temp, shift, "");
885 test = LLVMBuildAnd(builder, test, temp, "");
886 mask = LLVMBuildOr(builder, mask, test, "");
887
888 /* plane 3 */
889 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
890 temp = LLVMBuildShl(builder, temp, shift, "");
891 test = LLVMBuildAnd(builder, test, temp, "");
892 mask = LLVMBuildOr(builder, mask, test, "");
893
894 /* plane 4 */
895 test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
896 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
897 temp = LLVMBuildShl(builder, temp, shift, "");
898 test = LLVMBuildAnd(builder, test, temp, "");
899 mask = LLVMBuildOr(builder, mask, test, "");
900 }
901
902 if (clip_z){
903 if (clip_halfz){
904 /* plane 5 */
905 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
906 temp = LLVMBuildShl(builder, temp, shift, "");
907 test = LLVMBuildAnd(builder, test, temp, "");
908 mask = LLVMBuildOr(builder, mask, test, "");
909 }
910 else{
911 /* plane 5 */
912 test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
913 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, test);
914 temp = LLVMBuildShl(builder, temp, shift, "");
915 test = LLVMBuildAnd(builder, test, temp, "");
916 mask = LLVMBuildOr(builder, mask, test, "");
917 }
918 /* plane 6 */
919 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
920 temp = LLVMBuildShl(builder, temp, shift, "");
921 test = LLVMBuildAnd(builder, test, temp, "");
922 mask = LLVMBuildOr(builder, mask, test, "");
923 }
924
925 if (clip_user){
926 LLVMValueRef planes_ptr = draw_jit_context_planes(builder, context_ptr);
927 LLVMValueRef indices[3];
928
929 /* userclip planes */
930 for (i = 6; i < nr; i++) {
931 indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
932 indices[1] = LLVMConstInt(LLVMInt32Type(), i, 0);
933
934 indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0);
935 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
936 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
937 planes = vec4f_from_scalar(builder, plane1, "plane4_x");
938 sum = LLVMBuildMul(builder, planes, pos_x, "");
939
940 indices[2] = LLVMConstInt(LLVMInt32Type(), 1, 0);
941 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
942 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
943 planes = vec4f_from_scalar(builder, plane1, "plane4_y");
944 test = LLVMBuildMul(builder, planes, pos_y, "");
945 sum = LLVMBuildFAdd(builder, sum, test, "");
946
947 indices[2] = LLVMConstInt(LLVMInt32Type(), 2, 0);
948 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
949 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
950 planes = vec4f_from_scalar(builder, plane1, "plane4_z");
951 test = LLVMBuildMul(builder, planes, pos_z, "");
952 sum = LLVMBuildFAdd(builder, sum, test, "");
953
954 indices[2] = LLVMConstInt(LLVMInt32Type(), 3, 0);
955 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
956 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
957 planes = vec4f_from_scalar(builder, plane1, "plane4_w");
958 test = LLVMBuildMul(builder, planes, pos_w, "");
959 sum = LLVMBuildFAdd(builder, sum, test, "");
960
961 test = lp_build_compare(builder, f32_type, PIPE_FUNC_GREATER, zero, sum);
962 temp = LLVMBuildShl(builder, temp, shift, "");
963 test = LLVMBuildAnd(builder, test, temp, "");
964 mask = LLVMBuildOr(builder, mask, test, "");
965 }
966 }
967 return mask;
968 }
969
970 /*
971 * Returns boolean if any clipping has occurred
972 * Used zero/non-zero i32 value to represent boolean
973 */
974 static void
975 clipmask_bool(LLVMBuilderRef builder,
976 LLVMValueRef clipmask,
977 LLVMValueRef ret_ptr)
978 {
979 LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, "");
980 LLVMValueRef temp;
981 int i;
982
983 for (i=0; i<4; i++){
984 temp = LLVMBuildExtractElement(builder, clipmask,
985 LLVMConstInt(LLVMInt32Type(), i, 0) , "");
986 ret = LLVMBuildOr(builder, ret, temp, "");
987 }
988
989 LLVMBuildStore(builder, ret, ret_ptr);
990 }
991
992 static void
993 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
994 {
995 LLVMTypeRef arg_types[8];
996 LLVMTypeRef func_type;
997 LLVMValueRef context_ptr;
998 LLVMBasicBlockRef block;
999 LLVMBuilderRef builder;
1000 LLVMValueRef start, end, count, stride, step, io_itr;
1001 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1002 LLVMValueRef instance_id;
1003 struct draw_context *draw = llvm->draw;
1004 unsigned i, j;
1005 struct lp_build_context bld;
1006 struct lp_build_loop_state lp_loop;
1007 const int max_vertices = 4;
1008 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1009 void *code;
1010 struct lp_build_sampler_soa *sampler = 0;
1011 LLVMValueRef ret, ret_ptr;
1012 boolean bypass_viewport = variant->key.bypass_viewport;
1013 boolean enable_cliptest = variant->key.clip_xy ||
1014 variant->key.clip_z ||
1015 variant->key.clip_user;
1016
1017 arg_types[0] = llvm->context_ptr_type; /* context */
1018 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
1019 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
1020 arg_types[3] = LLVMInt32Type(); /* start */
1021 arg_types[4] = LLVMInt32Type(); /* count */
1022 arg_types[5] = LLVMInt32Type(); /* stride */
1023 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
1024 arg_types[7] = LLVMInt32Type(); /* instance_id */
1025
1026 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1027
1028 variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type);
1029 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv);
1030 for(i = 0; i < Elements(arg_types); ++i)
1031 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1032 LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute);
1033
1034 context_ptr = LLVMGetParam(variant->function, 0);
1035 io_ptr = LLVMGetParam(variant->function, 1);
1036 vbuffers_ptr = LLVMGetParam(variant->function, 2);
1037 start = LLVMGetParam(variant->function, 3);
1038 count = LLVMGetParam(variant->function, 4);
1039 stride = LLVMGetParam(variant->function, 5);
1040 vb_ptr = LLVMGetParam(variant->function, 6);
1041 instance_id = LLVMGetParam(variant->function, 7);
1042
1043 lp_build_name(context_ptr, "context");
1044 lp_build_name(io_ptr, "io");
1045 lp_build_name(vbuffers_ptr, "vbuffers");
1046 lp_build_name(start, "start");
1047 lp_build_name(count, "count");
1048 lp_build_name(stride, "stride");
1049 lp_build_name(vb_ptr, "vb");
1050 lp_build_name(instance_id, "instance_id");
1051
1052 /*
1053 * Function body
1054 */
1055
1056 block = LLVMAppendBasicBlock(variant->function, "entry");
1057 builder = LLVMCreateBuilder();
1058 LLVMPositionBuilderAtEnd(builder, block);
1059
1060 lp_build_context_init(&bld, builder, lp_type_int(32));
1061
1062 end = lp_build_add(&bld, start, count);
1063
1064 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1065
1066 /* function will return non-zero i32 value if any clipped vertices */
1067 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1068 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1069
1070 /* code generated texture sampling */
1071 sampler = draw_llvm_sampler_soa_create(
1072 draw_llvm_variant_key_samplers(&variant->key),
1073 context_ptr);
1074
1075 #if DEBUG_STORE
1076 lp_build_printf(builder, "start = %d, end = %d, step = %d\n",
1077 start, end, step);
1078 #endif
1079 lp_build_loop_begin(builder, start, &lp_loop);
1080 {
1081 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1082 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1083 LLVMValueRef io;
1084 LLVMValueRef clipmask; /* holds the clipmask value */
1085 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1086
1087 io_itr = LLVMBuildSub(builder, lp_loop.counter, start, "");
1088 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1089 #if DEBUG_STORE
1090 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1091 io_itr, io, lp_loop.counter);
1092 #endif
1093 for (i = 0; i < NUM_CHANNELS; ++i) {
1094 LLVMValueRef true_index = LLVMBuildAdd(
1095 builder,
1096 lp_loop.counter,
1097 LLVMConstInt(LLVMInt32Type(), i, 0), "");
1098 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1099 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1100 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1101 velem->vertex_buffer_index,
1102 0);
1103 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1104 &vb_index, 1, "");
1105 generate_fetch(builder, vbuffers_ptr,
1106 &aos_attribs[j][i], velem, vb, true_index,
1107 instance_id);
1108 }
1109 }
1110 convert_to_soa(builder, aos_attribs, inputs,
1111 draw->pt.nr_vertex_elements);
1112
1113 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1114 generate_vs(llvm,
1115 builder,
1116 outputs,
1117 ptr_aos,
1118 context_ptr,
1119 sampler);
1120
1121 /* store original positions in clip before further manipulation */
1122 store_clip(builder, io, outputs);
1123
1124 /* do cliptest */
1125 if (enable_cliptest){
1126 /* allocate clipmask, assign it integer type */
1127 clipmask = generate_clipmask(builder, outputs,
1128 variant->key.clip_xy,
1129 variant->key.clip_z,
1130 variant->key.clip_user,
1131 variant->key.clip_halfz,
1132 variant->key.nr_planes,
1133 context_ptr);
1134 /* return clipping boolean value for function */
1135 clipmask_bool(builder, clipmask, ret_ptr);
1136 }
1137 else{
1138 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1139 }
1140
1141 /* do viewport mapping */
1142 if (!bypass_viewport){
1143 generate_viewport(llvm, builder, outputs);
1144 }
1145
1146 /* store clipmask in vertex header and positions in data */
1147 convert_to_aos(builder, io, outputs, clipmask,
1148 draw->vs.vertex_shader->info.num_outputs,
1149 max_vertices);
1150 }
1151
1152 lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop);
1153
1154 sampler->destroy(sampler);
1155
1156 #ifdef PIPE_ARCH_X86
1157 /* Avoid corrupting the FPU stack on 32bit OSes. */
1158 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1159 #endif
1160
1161 ret = LLVMBuildLoad(builder, ret_ptr,"");
1162 LLVMBuildRet(builder, ret);
1163
1164 LLVMDisposeBuilder(builder);
1165
1166 /*
1167 * Translate the LLVM IR into machine code.
1168 */
1169 #ifdef DEBUG
1170 if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
1171 lp_debug_dump_value(variant->function);
1172 assert(0);
1173 }
1174 #endif
1175
1176 LLVMRunFunctionPassManager(llvm->pass, variant->function);
1177
1178 if (gallivm_debug & GALLIVM_DEBUG_IR) {
1179 lp_debug_dump_value(variant->function);
1180 debug_printf("\n");
1181 }
1182
1183 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
1184 variant->jit_func = (draw_jit_vert_func)pointer_to_func(code);
1185
1186 if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1187 lp_disassemble(code);
1188 }
1189 lp_func_delete_body(variant->function);
1190 }
1191
1192
1193 static void
1194 draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
1195 {
1196 LLVMTypeRef arg_types[8];
1197 LLVMTypeRef func_type;
1198 LLVMValueRef context_ptr;
1199 LLVMBasicBlockRef block;
1200 LLVMBuilderRef builder;
1201 LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
1202 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
1203 LLVMValueRef instance_id;
1204 struct draw_context *draw = llvm->draw;
1205 unsigned i, j;
1206 struct lp_build_context bld;
1207 struct lp_build_loop_state lp_loop;
1208 const int max_vertices = 4;
1209 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
1210 LLVMValueRef fetch_max;
1211 void *code;
1212 struct lp_build_sampler_soa *sampler = 0;
1213 LLVMValueRef ret, ret_ptr;
1214 boolean bypass_viewport = variant->key.bypass_viewport;
1215 boolean enable_cliptest = variant->key.clip_xy ||
1216 variant->key.clip_z ||
1217 variant->key.clip_user;
1218
1219 arg_types[0] = llvm->context_ptr_type; /* context */
1220 arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
1221 arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
1222 arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
1223 arg_types[4] = LLVMInt32Type(); /* fetch_count */
1224 arg_types[5] = LLVMInt32Type(); /* stride */
1225 arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
1226 arg_types[7] = LLVMInt32Type(); /* instance_id */
1227
1228 func_type = LLVMFunctionType(LLVMInt32Type(), arg_types, Elements(arg_types), 0);
1229
1230 variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
1231 LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
1232 for(i = 0; i < Elements(arg_types); ++i)
1233 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
1234 LLVMAddAttribute(LLVMGetParam(variant->function_elts, i),
1235 LLVMNoAliasAttribute);
1236
1237 context_ptr = LLVMGetParam(variant->function_elts, 0);
1238 io_ptr = LLVMGetParam(variant->function_elts, 1);
1239 vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
1240 fetch_elts = LLVMGetParam(variant->function_elts, 3);
1241 fetch_count = LLVMGetParam(variant->function_elts, 4);
1242 stride = LLVMGetParam(variant->function_elts, 5);
1243 vb_ptr = LLVMGetParam(variant->function_elts, 6);
1244 instance_id = LLVMGetParam(variant->function_elts, 7);
1245
1246 lp_build_name(context_ptr, "context");
1247 lp_build_name(io_ptr, "io");
1248 lp_build_name(vbuffers_ptr, "vbuffers");
1249 lp_build_name(fetch_elts, "fetch_elts");
1250 lp_build_name(fetch_count, "fetch_count");
1251 lp_build_name(stride, "stride");
1252 lp_build_name(vb_ptr, "vb");
1253 lp_build_name(instance_id, "instance_id");
1254
1255 /*
1256 * Function body
1257 */
1258
1259 block = LLVMAppendBasicBlock(variant->function_elts, "entry");
1260 builder = LLVMCreateBuilder();
1261 LLVMPositionBuilderAtEnd(builder, block);
1262
1263 lp_build_context_init(&bld, builder, lp_type_int(32));
1264
1265 step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
1266
1267 /* code generated texture sampling */
1268 sampler = draw_llvm_sampler_soa_create(
1269 draw_llvm_variant_key_samplers(&variant->key),
1270 context_ptr);
1271
1272 fetch_max = LLVMBuildSub(builder, fetch_count,
1273 LLVMConstInt(LLVMInt32Type(), 1, 0),
1274 "fetch_max");
1275
1276 /* function returns non-zero i32 value if any clipped vertices */
1277 ret_ptr = lp_build_alloca(builder, LLVMInt32Type(), "");
1278 LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), ret_ptr);
1279
1280 lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
1281 {
1282 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
1283 LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
1284 LLVMValueRef io;
1285 LLVMValueRef clipmask; /* holds the clipmask value */
1286 const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
1287
1288 io_itr = lp_loop.counter;
1289 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
1290 #if DEBUG_STORE
1291 lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
1292 io_itr, io, lp_loop.counter);
1293 #endif
1294 for (i = 0; i < NUM_CHANNELS; ++i) {
1295 LLVMValueRef true_index = LLVMBuildAdd(
1296 builder,
1297 lp_loop.counter,
1298 LLVMConstInt(LLVMInt32Type(), i, 0), "");
1299 LLVMValueRef fetch_ptr;
1300
1301 /* make sure we're not out of bounds which can happen
1302 * if fetch_count % 4 != 0, because on the last iteration
1303 * a few of the 4 vertex fetches will be out of bounds */
1304 true_index = lp_build_min(&bld, true_index, fetch_max);
1305
1306 fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
1307 &true_index, 1, "");
1308 true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
1309 for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
1310 struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
1311 LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
1312 velem->vertex_buffer_index,
1313 0);
1314 LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
1315 &vb_index, 1, "");
1316 generate_fetch(builder, vbuffers_ptr,
1317 &aos_attribs[j][i], velem, vb, true_index,
1318 instance_id);
1319 }
1320 }
1321 convert_to_soa(builder, aos_attribs, inputs,
1322 draw->pt.nr_vertex_elements);
1323
1324 ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
1325 generate_vs(llvm,
1326 builder,
1327 outputs,
1328 ptr_aos,
1329 context_ptr,
1330 sampler);
1331
1332 /* store original positions in clip before further manipulation */
1333 store_clip(builder, io, outputs);
1334
1335 /* do cliptest */
1336 if (enable_cliptest){
1337 /* allocate clipmask, assign it integer type */
1338 clipmask = generate_clipmask(builder, outputs,
1339 variant->key.clip_xy,
1340 variant->key.clip_z,
1341 variant->key.clip_user,
1342 variant->key.clip_halfz,
1343 variant->key.nr_planes,
1344 context_ptr);
1345 /* return clipping boolean value for function */
1346 clipmask_bool(builder, clipmask, ret_ptr);
1347 }
1348 else{
1349 clipmask = lp_build_const_int_vec(lp_type_int_vec(32), 0);
1350 }
1351
1352 /* do viewport mapping */
1353 if (!bypass_viewport){
1354 generate_viewport(llvm, builder, outputs);
1355 }
1356
1357 /* store clipmask in vertex header,
1358 * original positions in clip
1359 * and transformed positions in data
1360 */
1361 convert_to_aos(builder, io, outputs, clipmask,
1362 draw->vs.vertex_shader->info.num_outputs,
1363 max_vertices);
1364 }
1365
1366 lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
1367
1368 sampler->destroy(sampler);
1369
1370 #ifdef PIPE_ARCH_X86
1371 /* Avoid corrupting the FPU stack on 32bit OSes. */
1372 lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0);
1373 #endif
1374
1375 ret = LLVMBuildLoad(builder, ret_ptr,"");
1376 LLVMBuildRet(builder, ret);
1377
1378 LLVMDisposeBuilder(builder);
1379
1380 /*
1381 * Translate the LLVM IR into machine code.
1382 */
1383 #ifdef DEBUG
1384 if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
1385 lp_debug_dump_value(variant->function_elts);
1386 assert(0);
1387 }
1388 #endif
1389
1390 LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
1391
1392 if (gallivm_debug & GALLIVM_DEBUG_IR) {
1393 lp_debug_dump_value(variant->function_elts);
1394 debug_printf("\n");
1395 }
1396
1397 code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts);
1398 variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code);
1399
1400 if (gallivm_debug & GALLIVM_DEBUG_ASM) {
1401 lp_disassemble(code);
1402 }
1403 lp_func_delete_body(variant->function_elts);
1404 }
1405
1406
1407 struct draw_llvm_variant_key *
1408 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
1409 {
1410 unsigned i;
1411 struct draw_llvm_variant_key *key;
1412 struct lp_sampler_static_state *sampler;
1413
1414 key = (struct draw_llvm_variant_key *)store;
1415
1416 /* Presumably all variants of the shader should have the same
1417 * number of vertex elements - ie the number of shader inputs.
1418 */
1419 key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements;
1420
1421 /* will have to rig this up properly later */
1422 key->clip_xy = llvm->draw->clip_xy;
1423 key->clip_z = llvm->draw->clip_z;
1424 key->clip_user = llvm->draw->clip_user;
1425 key->bypass_viewport = llvm->draw->identity_viewport;
1426 key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
1427 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
1428 key->nr_planes = llvm->draw->nr_planes;
1429 key->pad = 0;
1430
1431 /* All variants of this shader will have the same value for
1432 * nr_samplers. Not yet trying to compact away holes in the
1433 * sampler array.
1434 */
1435 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
1436
1437 sampler = draw_llvm_variant_key_samplers(key);
1438
1439 memcpy(key->vertex_element,
1440 llvm->draw->pt.vertex_element,
1441 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
1442
1443 memset(sampler, 0, key->nr_samplers * sizeof *sampler);
1444
1445 for (i = 0 ; i < key->nr_samplers; i++) {
1446 lp_sampler_static_state(&sampler[i],
1447 llvm->draw->sampler_views[i],
1448 llvm->draw->samplers[i]);
1449 }
1450
1451 return key;
1452 }
1453
1454 void
1455 draw_llvm_set_mapped_texture(struct draw_context *draw,
1456 unsigned sampler_idx,
1457 uint32_t width, uint32_t height, uint32_t depth,
1458 uint32_t last_level,
1459 uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS],
1460 uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS],
1461 const void *data[DRAW_MAX_TEXTURE_LEVELS])
1462 {
1463 unsigned j;
1464 struct draw_jit_texture *jit_tex;
1465
1466 assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS);
1467
1468
1469 jit_tex = &draw->llvm->jit_context.textures[sampler_idx];
1470
1471 jit_tex->width = width;
1472 jit_tex->height = height;
1473 jit_tex->depth = depth;
1474 jit_tex->last_level = last_level;
1475
1476 for (j = 0; j <= last_level; j++) {
1477 jit_tex->data[j] = data[j];
1478 jit_tex->row_stride[j] = row_stride[j];
1479 jit_tex->img_stride[j] = img_stride[j];
1480 }
1481 }
1482
1483 void
1484 draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
1485 {
1486 struct draw_llvm *llvm = variant->llvm;
1487 struct draw_context *draw = llvm->draw;
1488
1489 if (variant->function_elts) {
1490 if (variant->function_elts)
1491 LLVMFreeMachineCodeForFunction(draw->engine,
1492 variant->function_elts);
1493 LLVMDeleteFunction(variant->function_elts);
1494 }
1495
1496 if (variant->function) {
1497 if (variant->function)
1498 LLVMFreeMachineCodeForFunction(draw->engine,
1499 variant->function);
1500 LLVMDeleteFunction(variant->function);
1501 }
1502
1503 remove_from_list(&variant->list_item_local);
1504 variant->shader->variants_cached--;
1505 remove_from_list(&variant->list_item_global);
1506 llvm->nr_variants--;
1507 FREE(variant);
1508 }