llvmpipe: silence cast warnings in test programs
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "gallivm/lp_bld_type.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_bld_blend.h"
43 #include "lp_test.h"
44
45
46 enum vector_mode
47 {
48 AoS = 0,
49 SoA = 1
50 };
51
52
53 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
54
55 /** cast wrapper */
56 static blend_test_ptr_t
57 voidptr_to_blend_test_ptr_t(void *p)
58 {
59 union {
60 void *v;
61 blend_test_ptr_t f;
62 } u;
63 u.v = p;
64 return u.f;
65 }
66
67
68
69 void
70 write_tsv_header(FILE *fp)
71 {
72 fprintf(fp,
73 "result\t"
74 "cycles_per_channel\t"
75 "mode\t"
76 "type\t"
77 "sep_func\t"
78 "sep_src_factor\t"
79 "sep_dst_factor\t"
80 "rgb_func\t"
81 "rgb_src_factor\t"
82 "rgb_dst_factor\t"
83 "alpha_func\t"
84 "alpha_src_factor\t"
85 "alpha_dst_factor\n");
86
87 fflush(fp);
88 }
89
90
91 static void
92 write_tsv_row(FILE *fp,
93 const struct pipe_blend_state *blend,
94 enum vector_mode mode,
95 struct lp_type type,
96 double cycles,
97 boolean success)
98 {
99 fprintf(fp, "%s\t", success ? "pass" : "fail");
100
101 if (mode == AoS) {
102 fprintf(fp, "%.1f\t", cycles / type.length);
103 fprintf(fp, "aos\t");
104 }
105
106 if (mode == SoA) {
107 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
108 fprintf(fp, "soa\t");
109 }
110
111 fprintf(fp, "%s%u%sx%u\t",
112 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
113 type.width,
114 type.norm ? "n" : "",
115 type.length);
116
117 fprintf(fp,
118 "%s\t%s\t%s\t",
119 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
120 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
121 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
122
123 fprintf(fp,
124 "%s\t%s\t%s\t%s\t%s\t%s\n",
125 util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
126 util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
127 util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
128 util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
129 util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
130 util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
131
132 fflush(fp);
133 }
134
135
136 static void
137 dump_blend_type(FILE *fp,
138 const struct pipe_blend_state *blend,
139 enum vector_mode mode,
140 struct lp_type type)
141 {
142 fprintf(fp, "%s", mode ? "soa" : "aos");
143
144 fprintf(fp, " type=%s%u%sx%u",
145 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
146 type.width,
147 type.norm ? "n" : "",
148 type.length);
149
150 fprintf(fp,
151 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
152 "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
153 "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
154 "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
155 "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
156 "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
157 "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
158
159 fprintf(fp, " ...\n");
160 fflush(fp);
161 }
162
163
164 static LLVMValueRef
165 add_blend_test(LLVMModuleRef module,
166 const struct pipe_blend_state *blend,
167 enum vector_mode mode,
168 struct lp_type type)
169 {
170 LLVMTypeRef vec_type;
171 LLVMTypeRef args[4];
172 LLVMValueRef func;
173 LLVMValueRef src_ptr;
174 LLVMValueRef dst_ptr;
175 LLVMValueRef const_ptr;
176 LLVMValueRef res_ptr;
177 LLVMBasicBlockRef block;
178 LLVMBuilderRef builder;
179 const unsigned rt = 0;
180
181 vec_type = lp_build_vec_type(type);
182
183 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
184 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
185 LLVMSetFunctionCallConv(func, LLVMCCallConv);
186 src_ptr = LLVMGetParam(func, 0);
187 dst_ptr = LLVMGetParam(func, 1);
188 const_ptr = LLVMGetParam(func, 2);
189 res_ptr = LLVMGetParam(func, 3);
190
191 block = LLVMAppendBasicBlock(func, "entry");
192 builder = LLVMCreateBuilder();
193 LLVMPositionBuilderAtEnd(builder, block);
194
195 if (mode == AoS) {
196 LLVMValueRef src;
197 LLVMValueRef dst;
198 LLVMValueRef con;
199 LLVMValueRef res;
200
201 src = LLVMBuildLoad(builder, src_ptr, "src");
202 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
203 con = LLVMBuildLoad(builder, const_ptr, "const");
204
205 res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
206
207 lp_build_name(res, "res");
208
209 LLVMBuildStore(builder, res, res_ptr);
210 }
211
212 if (mode == SoA) {
213 LLVMValueRef src[4];
214 LLVMValueRef dst[4];
215 LLVMValueRef con[4];
216 LLVMValueRef res[4];
217 unsigned i;
218
219 for(i = 0; i < 4; ++i) {
220 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
221 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
222 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
223 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
224 lp_build_name(src[i], "src.%c", "rgba"[i]);
225 lp_build_name(con[i], "con.%c", "rgba"[i]);
226 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
227 }
228
229 lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
230
231 for(i = 0; i < 4; ++i) {
232 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
233 lp_build_name(res[i], "res.%c", "rgba"[i]);
234 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
235 }
236 }
237
238 LLVMBuildRetVoid(builder);;
239
240 LLVMDisposeBuilder(builder);
241 return func;
242 }
243
244
245 /** Add and limit result to ceiling of 1.0 */
246 #define ADD_SAT(R, A, B) \
247 do { \
248 R = (A) + (B); if (R > 1.0f) R = 1.0f; \
249 } while (0)
250
251 /** Subtract and limit result to floor of 0.0 */
252 #define SUB_SAT(R, A, B) \
253 do { \
254 R = (A) - (B); if (R < 0.0f) R = 0.0f; \
255 } while (0)
256
257
258 static void
259 compute_blend_ref_term(unsigned rgb_factor,
260 unsigned alpha_factor,
261 const double *factor,
262 const double *src,
263 const double *dst,
264 const double *con,
265 double *term)
266 {
267 double temp;
268
269 switch (rgb_factor) {
270 case PIPE_BLENDFACTOR_ONE:
271 term[0] = factor[0]; /* R */
272 term[1] = factor[1]; /* G */
273 term[2] = factor[2]; /* B */
274 break;
275 case PIPE_BLENDFACTOR_SRC_COLOR:
276 term[0] = factor[0] * src[0]; /* R */
277 term[1] = factor[1] * src[1]; /* G */
278 term[2] = factor[2] * src[2]; /* B */
279 break;
280 case PIPE_BLENDFACTOR_SRC_ALPHA:
281 term[0] = factor[0] * src[3]; /* R */
282 term[1] = factor[1] * src[3]; /* G */
283 term[2] = factor[2] * src[3]; /* B */
284 break;
285 case PIPE_BLENDFACTOR_DST_COLOR:
286 term[0] = factor[0] * dst[0]; /* R */
287 term[1] = factor[1] * dst[1]; /* G */
288 term[2] = factor[2] * dst[2]; /* B */
289 break;
290 case PIPE_BLENDFACTOR_DST_ALPHA:
291 term[0] = factor[0] * dst[3]; /* R */
292 term[1] = factor[1] * dst[3]; /* G */
293 term[2] = factor[2] * dst[3]; /* B */
294 break;
295 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
296 temp = MIN2(src[3], 1.0f - dst[3]);
297 term[0] = factor[0] * temp; /* R */
298 term[1] = factor[1] * temp; /* G */
299 term[2] = factor[2] * temp; /* B */
300 break;
301 case PIPE_BLENDFACTOR_CONST_COLOR:
302 term[0] = factor[0] * con[0]; /* R */
303 term[1] = factor[1] * con[1]; /* G */
304 term[2] = factor[2] * con[2]; /* B */
305 break;
306 case PIPE_BLENDFACTOR_CONST_ALPHA:
307 term[0] = factor[0] * con[3]; /* R */
308 term[1] = factor[1] * con[3]; /* G */
309 term[2] = factor[2] * con[3]; /* B */
310 break;
311 case PIPE_BLENDFACTOR_SRC1_COLOR:
312 assert(0); /* to do */
313 break;
314 case PIPE_BLENDFACTOR_SRC1_ALPHA:
315 assert(0); /* to do */
316 break;
317 case PIPE_BLENDFACTOR_ZERO:
318 term[0] = 0.0f; /* R */
319 term[1] = 0.0f; /* G */
320 term[2] = 0.0f; /* B */
321 break;
322 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
323 term[0] = factor[0] * (1.0f - src[0]); /* R */
324 term[1] = factor[1] * (1.0f - src[1]); /* G */
325 term[2] = factor[2] * (1.0f - src[2]); /* B */
326 break;
327 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
328 term[0] = factor[0] * (1.0f - src[3]); /* R */
329 term[1] = factor[1] * (1.0f - src[3]); /* G */
330 term[2] = factor[2] * (1.0f - src[3]); /* B */
331 break;
332 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
333 term[0] = factor[0] * (1.0f - dst[3]); /* R */
334 term[1] = factor[1] * (1.0f - dst[3]); /* G */
335 term[2] = factor[2] * (1.0f - dst[3]); /* B */
336 break;
337 case PIPE_BLENDFACTOR_INV_DST_COLOR:
338 term[0] = factor[0] * (1.0f - dst[0]); /* R */
339 term[1] = factor[1] * (1.0f - dst[1]); /* G */
340 term[2] = factor[2] * (1.0f - dst[2]); /* B */
341 break;
342 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
343 term[0] = factor[0] * (1.0f - con[0]); /* R */
344 term[1] = factor[1] * (1.0f - con[1]); /* G */
345 term[2] = factor[2] * (1.0f - con[2]); /* B */
346 break;
347 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
348 term[0] = factor[0] * (1.0f - con[3]); /* R */
349 term[1] = factor[1] * (1.0f - con[3]); /* G */
350 term[2] = factor[2] * (1.0f - con[3]); /* B */
351 break;
352 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
353 assert(0); /* to do */
354 break;
355 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
356 assert(0); /* to do */
357 break;
358 default:
359 assert(0);
360 }
361
362 /*
363 * Compute src/first term A
364 */
365 switch (alpha_factor) {
366 case PIPE_BLENDFACTOR_ONE:
367 term[3] = factor[3]; /* A */
368 break;
369 case PIPE_BLENDFACTOR_SRC_COLOR:
370 case PIPE_BLENDFACTOR_SRC_ALPHA:
371 term[3] = factor[3] * src[3]; /* A */
372 break;
373 case PIPE_BLENDFACTOR_DST_COLOR:
374 case PIPE_BLENDFACTOR_DST_ALPHA:
375 term[3] = factor[3] * dst[3]; /* A */
376 break;
377 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
378 term[3] = src[3]; /* A */
379 break;
380 case PIPE_BLENDFACTOR_CONST_COLOR:
381 case PIPE_BLENDFACTOR_CONST_ALPHA:
382 term[3] = factor[3] * con[3]; /* A */
383 break;
384 case PIPE_BLENDFACTOR_ZERO:
385 term[3] = 0.0f; /* A */
386 break;
387 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
388 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
389 term[3] = factor[3] * (1.0f - src[3]); /* A */
390 break;
391 case PIPE_BLENDFACTOR_INV_DST_COLOR:
392 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
393 term[3] = factor[3] * (1.0f - dst[3]); /* A */
394 break;
395 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
396 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
397 term[3] = factor[3] * (1.0f - con[3]);
398 break;
399 default:
400 assert(0);
401 }
402 }
403
404
405 static void
406 compute_blend_ref(const struct pipe_blend_state *blend,
407 const double *src,
408 const double *dst,
409 const double *con,
410 double *res)
411 {
412 double src_term[4];
413 double dst_term[4];
414
415 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
416 src, src, dst, con, src_term);
417 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
418 dst, src, dst, con, dst_term);
419
420 /*
421 * Combine RGB terms
422 */
423 switch (blend->rt[0].rgb_func) {
424 case PIPE_BLEND_ADD:
425 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
426 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
427 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
428 break;
429 case PIPE_BLEND_SUBTRACT:
430 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
431 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
432 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
433 break;
434 case PIPE_BLEND_REVERSE_SUBTRACT:
435 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
436 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
437 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
438 break;
439 case PIPE_BLEND_MIN:
440 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
441 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
442 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
443 break;
444 case PIPE_BLEND_MAX:
445 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
446 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
447 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
448 break;
449 default:
450 assert(0);
451 }
452
453 /*
454 * Combine A terms
455 */
456 switch (blend->rt[0].alpha_func) {
457 case PIPE_BLEND_ADD:
458 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
459 break;
460 case PIPE_BLEND_SUBTRACT:
461 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
462 break;
463 case PIPE_BLEND_REVERSE_SUBTRACT:
464 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
465 break;
466 case PIPE_BLEND_MIN:
467 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
468 break;
469 case PIPE_BLEND_MAX:
470 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
471 break;
472 default:
473 assert(0);
474 }
475 }
476
477
478 PIPE_ALIGN_STACK
479 static boolean
480 test_one(unsigned verbose,
481 FILE *fp,
482 const struct pipe_blend_state *blend,
483 enum vector_mode mode,
484 struct lp_type type)
485 {
486 LLVMModuleRef module = NULL;
487 LLVMValueRef func = NULL;
488 LLVMExecutionEngineRef engine = NULL;
489 LLVMModuleProviderRef provider = NULL;
490 LLVMPassManagerRef pass = NULL;
491 char *error = NULL;
492 blend_test_ptr_t blend_test_ptr;
493 boolean success;
494 const unsigned n = LP_TEST_NUM_SAMPLES;
495 int64_t cycles[LP_TEST_NUM_SAMPLES];
496 double cycles_avg = 0.0;
497 unsigned i, j;
498 void *code;
499
500 if(verbose >= 1)
501 dump_blend_type(stdout, blend, mode, type);
502
503 module = LLVMModuleCreateWithName("test");
504
505 func = add_blend_test(module, blend, mode, type);
506
507 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
508 LLVMDumpModule(module);
509 abort();
510 }
511 LLVMDisposeMessage(error);
512
513 provider = LLVMCreateModuleProviderForExistingModule(module);
514 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
515 if(verbose < 1)
516 dump_blend_type(stderr, blend, mode, type);
517 fprintf(stderr, "%s\n", error);
518 LLVMDisposeMessage(error);
519 abort();
520 }
521
522 #if 0
523 pass = LLVMCreatePassManager();
524 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
525 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
526 * but there are more on SVN. */
527 LLVMAddConstantPropagationPass(pass);
528 LLVMAddInstructionCombiningPass(pass);
529 LLVMAddPromoteMemoryToRegisterPass(pass);
530 LLVMAddGVNPass(pass);
531 LLVMAddCFGSimplificationPass(pass);
532 LLVMRunPassManager(pass, module);
533 #else
534 (void)pass;
535 #endif
536
537 if(verbose >= 2)
538 LLVMDumpModule(module);
539
540 code = LLVMGetPointerToGlobal(engine, func);
541 blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
542
543 if(verbose >= 2)
544 lp_disassemble(code);
545
546 success = TRUE;
547 for(i = 0; i < n && success; ++i) {
548 if(mode == AoS) {
549 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
550 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
551 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
552 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
553 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
554 int64_t start_counter = 0;
555 int64_t end_counter = 0;
556
557 random_vec(type, src);
558 random_vec(type, dst);
559 random_vec(type, con);
560
561 {
562 double fsrc[LP_MAX_VECTOR_LENGTH];
563 double fdst[LP_MAX_VECTOR_LENGTH];
564 double fcon[LP_MAX_VECTOR_LENGTH];
565 double fref[LP_MAX_VECTOR_LENGTH];
566
567 read_vec(type, src, fsrc);
568 read_vec(type, dst, fdst);
569 read_vec(type, con, fcon);
570
571 for(j = 0; j < type.length; j += 4)
572 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
573
574 write_vec(type, ref, fref);
575 }
576
577 start_counter = rdtsc();
578 blend_test_ptr(src, dst, con, res);
579 end_counter = rdtsc();
580
581 cycles[i] = end_counter - start_counter;
582
583 if(!compare_vec(type, res, ref)) {
584 success = FALSE;
585
586 if(verbose < 1)
587 dump_blend_type(stderr, blend, mode, type);
588 fprintf(stderr, "MISMATCH\n");
589
590 fprintf(stderr, " Src: ");
591 dump_vec(stderr, type, src);
592 fprintf(stderr, "\n");
593
594 fprintf(stderr, " Dst: ");
595 dump_vec(stderr, type, dst);
596 fprintf(stderr, "\n");
597
598 fprintf(stderr, " Con: ");
599 dump_vec(stderr, type, con);
600 fprintf(stderr, "\n");
601
602 fprintf(stderr, " Res: ");
603 dump_vec(stderr, type, res);
604 fprintf(stderr, "\n");
605
606 fprintf(stderr, " Ref: ");
607 dump_vec(stderr, type, ref);
608 fprintf(stderr, "\n");
609 }
610 }
611
612 if(mode == SoA) {
613 const unsigned stride = type.length*type.width/8;
614 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
615 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
616 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
617 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
618 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
619 int64_t start_counter = 0;
620 int64_t end_counter = 0;
621 boolean mismatch;
622
623 for(j = 0; j < 4; ++j) {
624 random_vec(type, src + j*stride);
625 random_vec(type, dst + j*stride);
626 random_vec(type, con + j*stride);
627 }
628
629 {
630 double fsrc[4];
631 double fdst[4];
632 double fcon[4];
633 double fref[4];
634 unsigned k;
635
636 for(k = 0; k < type.length; ++k) {
637 for(j = 0; j < 4; ++j) {
638 fsrc[j] = read_elem(type, src + j*stride, k);
639 fdst[j] = read_elem(type, dst + j*stride, k);
640 fcon[j] = read_elem(type, con + j*stride, k);
641 }
642
643 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
644
645 for(j = 0; j < 4; ++j)
646 write_elem(type, ref + j*stride, k, fref[j]);
647 }
648 }
649
650 start_counter = rdtsc();
651 blend_test_ptr(src, dst, con, res);
652 end_counter = rdtsc();
653
654 cycles[i] = end_counter - start_counter;
655
656 mismatch = FALSE;
657 for (j = 0; j < 4; ++j)
658 if(!compare_vec(type, res + j*stride, ref + j*stride))
659 mismatch = TRUE;
660
661 if (mismatch) {
662 success = FALSE;
663
664 if(verbose < 1)
665 dump_blend_type(stderr, blend, mode, type);
666 fprintf(stderr, "MISMATCH\n");
667 for(j = 0; j < 4; ++j) {
668 char channel = "RGBA"[j];
669 fprintf(stderr, " Src%c: ", channel);
670 dump_vec(stderr, type, src + j*stride);
671 fprintf(stderr, "\n");
672
673 fprintf(stderr, " Dst%c: ", channel);
674 dump_vec(stderr, type, dst + j*stride);
675 fprintf(stderr, "\n");
676
677 fprintf(stderr, " Con%c: ", channel);
678 dump_vec(stderr, type, con + j*stride);
679 fprintf(stderr, "\n");
680
681 fprintf(stderr, " Res%c: ", channel);
682 dump_vec(stderr, type, res + j*stride);
683 fprintf(stderr, "\n");
684
685 fprintf(stderr, " Ref%c: ", channel);
686 dump_vec(stderr, type, ref + j*stride);
687 fprintf(stderr, "\n");
688 }
689 }
690 }
691 }
692
693 /*
694 * Unfortunately the output of cycle counter is not very reliable as it comes
695 * -- sometimes we get outliers (due IRQs perhaps?) which are
696 * better removed to avoid random or biased data.
697 */
698 {
699 double sum = 0.0, sum2 = 0.0;
700 double avg, std;
701 unsigned m;
702
703 for(i = 0; i < n; ++i) {
704 sum += cycles[i];
705 sum2 += cycles[i]*cycles[i];
706 }
707
708 avg = sum/n;
709 std = sqrtf((sum2 - n*avg*avg)/n);
710
711 m = 0;
712 sum = 0.0;
713 for(i = 0; i < n; ++i) {
714 if(fabs(cycles[i] - avg) <= 4.0*std) {
715 sum += cycles[i];
716 ++m;
717 }
718 }
719
720 cycles_avg = sum/m;
721
722 }
723
724 if(fp)
725 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
726
727 if (!success) {
728 if(verbose < 2)
729 LLVMDumpModule(module);
730 LLVMWriteBitcodeToFile(module, "blend.bc");
731 fprintf(stderr, "blend.bc written\n");
732 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
733 abort();
734 }
735
736 LLVMFreeMachineCodeForFunction(engine, func);
737
738 LLVMDisposeExecutionEngine(engine);
739 if(pass)
740 LLVMDisposePassManager(pass);
741
742 return success;
743 }
744
745
746 const unsigned
747 blend_factors[] = {
748 PIPE_BLENDFACTOR_ZERO,
749 PIPE_BLENDFACTOR_ONE,
750 PIPE_BLENDFACTOR_SRC_COLOR,
751 PIPE_BLENDFACTOR_SRC_ALPHA,
752 PIPE_BLENDFACTOR_DST_COLOR,
753 PIPE_BLENDFACTOR_DST_ALPHA,
754 PIPE_BLENDFACTOR_CONST_COLOR,
755 PIPE_BLENDFACTOR_CONST_ALPHA,
756 #if 0
757 PIPE_BLENDFACTOR_SRC1_COLOR,
758 PIPE_BLENDFACTOR_SRC1_ALPHA,
759 #endif
760 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
761 PIPE_BLENDFACTOR_INV_SRC_COLOR,
762 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
763 PIPE_BLENDFACTOR_INV_DST_COLOR,
764 PIPE_BLENDFACTOR_INV_DST_ALPHA,
765 PIPE_BLENDFACTOR_INV_CONST_COLOR,
766 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
767 #if 0
768 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
769 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
770 #endif
771 };
772
773
774 const unsigned
775 blend_funcs[] = {
776 PIPE_BLEND_ADD,
777 PIPE_BLEND_SUBTRACT,
778 PIPE_BLEND_REVERSE_SUBTRACT,
779 PIPE_BLEND_MIN,
780 PIPE_BLEND_MAX
781 };
782
783
784 const struct lp_type blend_types[] = {
785 /* float, fixed, sign, norm, width, len */
786 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
787 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
788 };
789
790
791 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
792 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
793 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
794
795
796 boolean
797 test_all(unsigned verbose, FILE *fp)
798 {
799 const unsigned *rgb_func;
800 const unsigned *rgb_src_factor;
801 const unsigned *rgb_dst_factor;
802 const unsigned *alpha_func;
803 const unsigned *alpha_src_factor;
804 const unsigned *alpha_dst_factor;
805 struct pipe_blend_state blend;
806 enum vector_mode mode;
807 const struct lp_type *type;
808 bool success = TRUE;
809
810 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
811 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
812 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
813 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
814 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
815 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
816 for(mode = 0; mode < 2; ++mode) {
817 for(type = blend_types; type < &blend_types[num_types]; ++type) {
818
819 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
820 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
821 continue;
822
823 memset(&blend, 0, sizeof blend);
824 blend.rt[0].blend_enable = 1;
825 blend.rt[0].rgb_func = *rgb_func;
826 blend.rt[0].rgb_src_factor = *rgb_src_factor;
827 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
828 blend.rt[0].alpha_func = *alpha_func;
829 blend.rt[0].alpha_src_factor = *alpha_src_factor;
830 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
831 blend.rt[0].colormask = PIPE_MASK_RGBA;
832
833 if(!test_one(verbose, fp, &blend, mode, *type))
834 success = FALSE;
835
836 }
837 }
838 }
839 }
840 }
841 }
842 }
843 }
844
845 return success;
846 }
847
848
849 boolean
850 test_some(unsigned verbose, FILE *fp, unsigned long n)
851 {
852 const unsigned *rgb_func;
853 const unsigned *rgb_src_factor;
854 const unsigned *rgb_dst_factor;
855 const unsigned *alpha_func;
856 const unsigned *alpha_src_factor;
857 const unsigned *alpha_dst_factor;
858 struct pipe_blend_state blend;
859 enum vector_mode mode;
860 const struct lp_type *type;
861 unsigned long i;
862 bool success = TRUE;
863
864 for(i = 0; i < n; ++i) {
865 rgb_func = &blend_funcs[rand() % num_funcs];
866 alpha_func = &blend_funcs[rand() % num_funcs];
867 rgb_src_factor = &blend_factors[rand() % num_factors];
868 alpha_src_factor = &blend_factors[rand() % num_factors];
869
870 do {
871 rgb_dst_factor = &blend_factors[rand() % num_factors];
872 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
873
874 do {
875 alpha_dst_factor = &blend_factors[rand() % num_factors];
876 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
877
878 mode = rand() & 1;
879
880 type = &blend_types[rand() % num_types];
881
882 memset(&blend, 0, sizeof blend);
883 blend.rt[0].blend_enable = 1;
884 blend.rt[0].rgb_func = *rgb_func;
885 blend.rt[0].rgb_src_factor = *rgb_src_factor;
886 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
887 blend.rt[0].alpha_func = *alpha_func;
888 blend.rt[0].alpha_src_factor = *alpha_src_factor;
889 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
890 blend.rt[0].colormask = PIPE_MASK_RGBA;
891
892 if(!test_one(verbose, fp, &blend, mode, *type))
893 success = FALSE;
894 }
895
896 return success;
897 }