llvmpipe: don't call LLVMCreateJITCompiler() twice
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "gallivm/lp_bld_init.h"
41 #include "gallivm/lp_bld_type.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "lp_bld_blend.h"
44 #include "lp_test.h"
45
46
47 enum vector_mode
48 {
49 AoS = 0,
50 SoA = 1
51 };
52
53
54 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
55
56 /** cast wrapper */
57 static blend_test_ptr_t
58 voidptr_to_blend_test_ptr_t(void *p)
59 {
60 union {
61 void *v;
62 blend_test_ptr_t f;
63 } u;
64 u.v = p;
65 return u.f;
66 }
67
68
69
70 void
71 write_tsv_header(FILE *fp)
72 {
73 fprintf(fp,
74 "result\t"
75 "cycles_per_channel\t"
76 "mode\t"
77 "type\t"
78 "sep_func\t"
79 "sep_src_factor\t"
80 "sep_dst_factor\t"
81 "rgb_func\t"
82 "rgb_src_factor\t"
83 "rgb_dst_factor\t"
84 "alpha_func\t"
85 "alpha_src_factor\t"
86 "alpha_dst_factor\n");
87
88 fflush(fp);
89 }
90
91
92 static void
93 write_tsv_row(FILE *fp,
94 const struct pipe_blend_state *blend,
95 enum vector_mode mode,
96 struct lp_type type,
97 double cycles,
98 boolean success)
99 {
100 fprintf(fp, "%s\t", success ? "pass" : "fail");
101
102 if (mode == AoS) {
103 fprintf(fp, "%.1f\t", cycles / type.length);
104 fprintf(fp, "aos\t");
105 }
106
107 if (mode == SoA) {
108 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
109 fprintf(fp, "soa\t");
110 }
111
112 fprintf(fp, "%s%u%sx%u\t",
113 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
114 type.width,
115 type.norm ? "n" : "",
116 type.length);
117
118 fprintf(fp,
119 "%s\t%s\t%s\t",
120 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
121 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
122 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
123
124 fprintf(fp,
125 "%s\t%s\t%s\t%s\t%s\t%s\n",
126 util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
127 util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
128 util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
129 util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
130 util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
131 util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
132
133 fflush(fp);
134 }
135
136
137 static void
138 dump_blend_type(FILE *fp,
139 const struct pipe_blend_state *blend,
140 enum vector_mode mode,
141 struct lp_type type)
142 {
143 fprintf(fp, "%s", mode ? "soa" : "aos");
144
145 fprintf(fp, " type=%s%u%sx%u",
146 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
147 type.width,
148 type.norm ? "n" : "",
149 type.length);
150
151 fprintf(fp,
152 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
153 "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
154 "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
155 "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
156 "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
157 "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
158 "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
159
160 fprintf(fp, " ...\n");
161 fflush(fp);
162 }
163
164
165 static LLVMValueRef
166 add_blend_test(LLVMModuleRef module,
167 const struct pipe_blend_state *blend,
168 enum vector_mode mode,
169 struct lp_type type)
170 {
171 LLVMTypeRef vec_type;
172 LLVMTypeRef args[4];
173 LLVMValueRef func;
174 LLVMValueRef src_ptr;
175 LLVMValueRef dst_ptr;
176 LLVMValueRef const_ptr;
177 LLVMValueRef res_ptr;
178 LLVMBasicBlockRef block;
179 LLVMBuilderRef builder;
180 const unsigned rt = 0;
181
182 vec_type = lp_build_vec_type(type);
183
184 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
185 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
186 LLVMSetFunctionCallConv(func, LLVMCCallConv);
187 src_ptr = LLVMGetParam(func, 0);
188 dst_ptr = LLVMGetParam(func, 1);
189 const_ptr = LLVMGetParam(func, 2);
190 res_ptr = LLVMGetParam(func, 3);
191
192 block = LLVMAppendBasicBlock(func, "entry");
193 builder = LLVMCreateBuilder();
194 LLVMPositionBuilderAtEnd(builder, block);
195
196 if (mode == AoS) {
197 LLVMValueRef src;
198 LLVMValueRef dst;
199 LLVMValueRef con;
200 LLVMValueRef res;
201
202 src = LLVMBuildLoad(builder, src_ptr, "src");
203 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
204 con = LLVMBuildLoad(builder, const_ptr, "const");
205
206 res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
207
208 lp_build_name(res, "res");
209
210 LLVMBuildStore(builder, res, res_ptr);
211 }
212
213 if (mode == SoA) {
214 LLVMValueRef src[4];
215 LLVMValueRef dst[4];
216 LLVMValueRef con[4];
217 LLVMValueRef res[4];
218 unsigned i;
219
220 for(i = 0; i < 4; ++i) {
221 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
222 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
223 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
224 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
225 lp_build_name(src[i], "src.%c", "rgba"[i]);
226 lp_build_name(con[i], "con.%c", "rgba"[i]);
227 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
228 }
229
230 lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
231
232 for(i = 0; i < 4; ++i) {
233 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
234 lp_build_name(res[i], "res.%c", "rgba"[i]);
235 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
236 }
237 }
238
239 LLVMBuildRetVoid(builder);;
240
241 LLVMDisposeBuilder(builder);
242 return func;
243 }
244
245
246 /** Add and limit result to ceiling of 1.0 */
247 #define ADD_SAT(R, A, B) \
248 do { \
249 R = (A) + (B); if (R > 1.0f) R = 1.0f; \
250 } while (0)
251
252 /** Subtract and limit result to floor of 0.0 */
253 #define SUB_SAT(R, A, B) \
254 do { \
255 R = (A) - (B); if (R < 0.0f) R = 0.0f; \
256 } while (0)
257
258
259 static void
260 compute_blend_ref_term(unsigned rgb_factor,
261 unsigned alpha_factor,
262 const double *factor,
263 const double *src,
264 const double *dst,
265 const double *con,
266 double *term)
267 {
268 double temp;
269
270 switch (rgb_factor) {
271 case PIPE_BLENDFACTOR_ONE:
272 term[0] = factor[0]; /* R */
273 term[1] = factor[1]; /* G */
274 term[2] = factor[2]; /* B */
275 break;
276 case PIPE_BLENDFACTOR_SRC_COLOR:
277 term[0] = factor[0] * src[0]; /* R */
278 term[1] = factor[1] * src[1]; /* G */
279 term[2] = factor[2] * src[2]; /* B */
280 break;
281 case PIPE_BLENDFACTOR_SRC_ALPHA:
282 term[0] = factor[0] * src[3]; /* R */
283 term[1] = factor[1] * src[3]; /* G */
284 term[2] = factor[2] * src[3]; /* B */
285 break;
286 case PIPE_BLENDFACTOR_DST_COLOR:
287 term[0] = factor[0] * dst[0]; /* R */
288 term[1] = factor[1] * dst[1]; /* G */
289 term[2] = factor[2] * dst[2]; /* B */
290 break;
291 case PIPE_BLENDFACTOR_DST_ALPHA:
292 term[0] = factor[0] * dst[3]; /* R */
293 term[1] = factor[1] * dst[3]; /* G */
294 term[2] = factor[2] * dst[3]; /* B */
295 break;
296 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
297 temp = MIN2(src[3], 1.0f - dst[3]);
298 term[0] = factor[0] * temp; /* R */
299 term[1] = factor[1] * temp; /* G */
300 term[2] = factor[2] * temp; /* B */
301 break;
302 case PIPE_BLENDFACTOR_CONST_COLOR:
303 term[0] = factor[0] * con[0]; /* R */
304 term[1] = factor[1] * con[1]; /* G */
305 term[2] = factor[2] * con[2]; /* B */
306 break;
307 case PIPE_BLENDFACTOR_CONST_ALPHA:
308 term[0] = factor[0] * con[3]; /* R */
309 term[1] = factor[1] * con[3]; /* G */
310 term[2] = factor[2] * con[3]; /* B */
311 break;
312 case PIPE_BLENDFACTOR_SRC1_COLOR:
313 assert(0); /* to do */
314 break;
315 case PIPE_BLENDFACTOR_SRC1_ALPHA:
316 assert(0); /* to do */
317 break;
318 case PIPE_BLENDFACTOR_ZERO:
319 term[0] = 0.0f; /* R */
320 term[1] = 0.0f; /* G */
321 term[2] = 0.0f; /* B */
322 break;
323 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
324 term[0] = factor[0] * (1.0f - src[0]); /* R */
325 term[1] = factor[1] * (1.0f - src[1]); /* G */
326 term[2] = factor[2] * (1.0f - src[2]); /* B */
327 break;
328 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
329 term[0] = factor[0] * (1.0f - src[3]); /* R */
330 term[1] = factor[1] * (1.0f - src[3]); /* G */
331 term[2] = factor[2] * (1.0f - src[3]); /* B */
332 break;
333 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
334 term[0] = factor[0] * (1.0f - dst[3]); /* R */
335 term[1] = factor[1] * (1.0f - dst[3]); /* G */
336 term[2] = factor[2] * (1.0f - dst[3]); /* B */
337 break;
338 case PIPE_BLENDFACTOR_INV_DST_COLOR:
339 term[0] = factor[0] * (1.0f - dst[0]); /* R */
340 term[1] = factor[1] * (1.0f - dst[1]); /* G */
341 term[2] = factor[2] * (1.0f - dst[2]); /* B */
342 break;
343 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
344 term[0] = factor[0] * (1.0f - con[0]); /* R */
345 term[1] = factor[1] * (1.0f - con[1]); /* G */
346 term[2] = factor[2] * (1.0f - con[2]); /* B */
347 break;
348 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
349 term[0] = factor[0] * (1.0f - con[3]); /* R */
350 term[1] = factor[1] * (1.0f - con[3]); /* G */
351 term[2] = factor[2] * (1.0f - con[3]); /* B */
352 break;
353 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
354 assert(0); /* to do */
355 break;
356 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
357 assert(0); /* to do */
358 break;
359 default:
360 assert(0);
361 }
362
363 /*
364 * Compute src/first term A
365 */
366 switch (alpha_factor) {
367 case PIPE_BLENDFACTOR_ONE:
368 term[3] = factor[3]; /* A */
369 break;
370 case PIPE_BLENDFACTOR_SRC_COLOR:
371 case PIPE_BLENDFACTOR_SRC_ALPHA:
372 term[3] = factor[3] * src[3]; /* A */
373 break;
374 case PIPE_BLENDFACTOR_DST_COLOR:
375 case PIPE_BLENDFACTOR_DST_ALPHA:
376 term[3] = factor[3] * dst[3]; /* A */
377 break;
378 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
379 term[3] = src[3]; /* A */
380 break;
381 case PIPE_BLENDFACTOR_CONST_COLOR:
382 case PIPE_BLENDFACTOR_CONST_ALPHA:
383 term[3] = factor[3] * con[3]; /* A */
384 break;
385 case PIPE_BLENDFACTOR_ZERO:
386 term[3] = 0.0f; /* A */
387 break;
388 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
389 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
390 term[3] = factor[3] * (1.0f - src[3]); /* A */
391 break;
392 case PIPE_BLENDFACTOR_INV_DST_COLOR:
393 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
394 term[3] = factor[3] * (1.0f - dst[3]); /* A */
395 break;
396 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
397 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
398 term[3] = factor[3] * (1.0f - con[3]);
399 break;
400 default:
401 assert(0);
402 }
403 }
404
405
406 static void
407 compute_blend_ref(const struct pipe_blend_state *blend,
408 const double *src,
409 const double *dst,
410 const double *con,
411 double *res)
412 {
413 double src_term[4];
414 double dst_term[4];
415
416 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
417 src, src, dst, con, src_term);
418 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
419 dst, src, dst, con, dst_term);
420
421 /*
422 * Combine RGB terms
423 */
424 switch (blend->rt[0].rgb_func) {
425 case PIPE_BLEND_ADD:
426 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
427 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
428 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
429 break;
430 case PIPE_BLEND_SUBTRACT:
431 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
432 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
433 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
434 break;
435 case PIPE_BLEND_REVERSE_SUBTRACT:
436 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
437 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
438 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
439 break;
440 case PIPE_BLEND_MIN:
441 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
442 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
443 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
444 break;
445 case PIPE_BLEND_MAX:
446 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
447 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
448 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
449 break;
450 default:
451 assert(0);
452 }
453
454 /*
455 * Combine A terms
456 */
457 switch (blend->rt[0].alpha_func) {
458 case PIPE_BLEND_ADD:
459 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
460 break;
461 case PIPE_BLEND_SUBTRACT:
462 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
463 break;
464 case PIPE_BLEND_REVERSE_SUBTRACT:
465 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
466 break;
467 case PIPE_BLEND_MIN:
468 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
469 break;
470 case PIPE_BLEND_MAX:
471 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
472 break;
473 default:
474 assert(0);
475 }
476 }
477
478
479 PIPE_ALIGN_STACK
480 static boolean
481 test_one(unsigned verbose,
482 FILE *fp,
483 const struct pipe_blend_state *blend,
484 enum vector_mode mode,
485 struct lp_type type)
486 {
487 LLVMModuleRef module = NULL;
488 LLVMValueRef func = NULL;
489 LLVMExecutionEngineRef engine = lp_build_engine;
490 LLVMPassManagerRef pass = NULL;
491 char *error = NULL;
492 blend_test_ptr_t blend_test_ptr;
493 boolean success;
494 const unsigned n = LP_TEST_NUM_SAMPLES;
495 int64_t cycles[LP_TEST_NUM_SAMPLES];
496 double cycles_avg = 0.0;
497 unsigned i, j;
498 void *code;
499
500 if(verbose >= 1)
501 dump_blend_type(stdout, blend, mode, type);
502
503 module = LLVMModuleCreateWithName("test");
504
505 func = add_blend_test(module, blend, mode, type);
506
507 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
508 LLVMDumpModule(module);
509 abort();
510 }
511 LLVMDisposeMessage(error);
512
513 #if 0
514 pass = LLVMCreatePassManager();
515 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
516 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
517 * but there are more on SVN. */
518 LLVMAddConstantPropagationPass(pass);
519 LLVMAddInstructionCombiningPass(pass);
520 LLVMAddPromoteMemoryToRegisterPass(pass);
521 LLVMAddGVNPass(pass);
522 LLVMAddCFGSimplificationPass(pass);
523 LLVMRunPassManager(pass, module);
524 #else
525 (void)pass;
526 #endif
527
528 if(verbose >= 2)
529 LLVMDumpModule(module);
530
531 code = LLVMGetPointerToGlobal(engine, func);
532 blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
533
534 if(verbose >= 2)
535 lp_disassemble(code);
536
537 success = TRUE;
538 for(i = 0; i < n && success; ++i) {
539 if(mode == AoS) {
540 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
541 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
542 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
543 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
544 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
545 int64_t start_counter = 0;
546 int64_t end_counter = 0;
547
548 random_vec(type, src);
549 random_vec(type, dst);
550 random_vec(type, con);
551
552 {
553 double fsrc[LP_MAX_VECTOR_LENGTH];
554 double fdst[LP_MAX_VECTOR_LENGTH];
555 double fcon[LP_MAX_VECTOR_LENGTH];
556 double fref[LP_MAX_VECTOR_LENGTH];
557
558 read_vec(type, src, fsrc);
559 read_vec(type, dst, fdst);
560 read_vec(type, con, fcon);
561
562 for(j = 0; j < type.length; j += 4)
563 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
564
565 write_vec(type, ref, fref);
566 }
567
568 start_counter = rdtsc();
569 blend_test_ptr(src, dst, con, res);
570 end_counter = rdtsc();
571
572 cycles[i] = end_counter - start_counter;
573
574 if(!compare_vec(type, res, ref)) {
575 success = FALSE;
576
577 if(verbose < 1)
578 dump_blend_type(stderr, blend, mode, type);
579 fprintf(stderr, "MISMATCH\n");
580
581 fprintf(stderr, " Src: ");
582 dump_vec(stderr, type, src);
583 fprintf(stderr, "\n");
584
585 fprintf(stderr, " Dst: ");
586 dump_vec(stderr, type, dst);
587 fprintf(stderr, "\n");
588
589 fprintf(stderr, " Con: ");
590 dump_vec(stderr, type, con);
591 fprintf(stderr, "\n");
592
593 fprintf(stderr, " Res: ");
594 dump_vec(stderr, type, res);
595 fprintf(stderr, "\n");
596
597 fprintf(stderr, " Ref: ");
598 dump_vec(stderr, type, ref);
599 fprintf(stderr, "\n");
600 }
601 }
602
603 if(mode == SoA) {
604 const unsigned stride = type.length*type.width/8;
605 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
606 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
607 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
608 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
609 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
610 int64_t start_counter = 0;
611 int64_t end_counter = 0;
612 boolean mismatch;
613
614 for(j = 0; j < 4; ++j) {
615 random_vec(type, src + j*stride);
616 random_vec(type, dst + j*stride);
617 random_vec(type, con + j*stride);
618 }
619
620 {
621 double fsrc[4];
622 double fdst[4];
623 double fcon[4];
624 double fref[4];
625 unsigned k;
626
627 for(k = 0; k < type.length; ++k) {
628 for(j = 0; j < 4; ++j) {
629 fsrc[j] = read_elem(type, src + j*stride, k);
630 fdst[j] = read_elem(type, dst + j*stride, k);
631 fcon[j] = read_elem(type, con + j*stride, k);
632 }
633
634 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
635
636 for(j = 0; j < 4; ++j)
637 write_elem(type, ref + j*stride, k, fref[j]);
638 }
639 }
640
641 start_counter = rdtsc();
642 blend_test_ptr(src, dst, con, res);
643 end_counter = rdtsc();
644
645 cycles[i] = end_counter - start_counter;
646
647 mismatch = FALSE;
648 for (j = 0; j < 4; ++j)
649 if(!compare_vec(type, res + j*stride, ref + j*stride))
650 mismatch = TRUE;
651
652 if (mismatch) {
653 success = FALSE;
654
655 if(verbose < 1)
656 dump_blend_type(stderr, blend, mode, type);
657 fprintf(stderr, "MISMATCH\n");
658 for(j = 0; j < 4; ++j) {
659 char channel = "RGBA"[j];
660 fprintf(stderr, " Src%c: ", channel);
661 dump_vec(stderr, type, src + j*stride);
662 fprintf(stderr, "\n");
663
664 fprintf(stderr, " Dst%c: ", channel);
665 dump_vec(stderr, type, dst + j*stride);
666 fprintf(stderr, "\n");
667
668 fprintf(stderr, " Con%c: ", channel);
669 dump_vec(stderr, type, con + j*stride);
670 fprintf(stderr, "\n");
671
672 fprintf(stderr, " Res%c: ", channel);
673 dump_vec(stderr, type, res + j*stride);
674 fprintf(stderr, "\n");
675
676 fprintf(stderr, " Ref%c: ", channel);
677 dump_vec(stderr, type, ref + j*stride);
678 fprintf(stderr, "\n");
679 }
680 }
681 }
682 }
683
684 /*
685 * Unfortunately the output of cycle counter is not very reliable as it comes
686 * -- sometimes we get outliers (due IRQs perhaps?) which are
687 * better removed to avoid random or biased data.
688 */
689 {
690 double sum = 0.0, sum2 = 0.0;
691 double avg, std;
692 unsigned m;
693
694 for(i = 0; i < n; ++i) {
695 sum += cycles[i];
696 sum2 += cycles[i]*cycles[i];
697 }
698
699 avg = sum/n;
700 std = sqrtf((sum2 - n*avg*avg)/n);
701
702 m = 0;
703 sum = 0.0;
704 for(i = 0; i < n; ++i) {
705 if(fabs(cycles[i] - avg) <= 4.0*std) {
706 sum += cycles[i];
707 ++m;
708 }
709 }
710
711 cycles_avg = sum/m;
712
713 }
714
715 if(fp)
716 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
717
718 if (!success) {
719 if(verbose < 2)
720 LLVMDumpModule(module);
721 LLVMWriteBitcodeToFile(module, "blend.bc");
722 fprintf(stderr, "blend.bc written\n");
723 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
724 abort();
725 }
726
727 LLVMFreeMachineCodeForFunction(engine, func);
728
729 if(pass)
730 LLVMDisposePassManager(pass);
731
732 return success;
733 }
734
735
736 const unsigned
737 blend_factors[] = {
738 PIPE_BLENDFACTOR_ZERO,
739 PIPE_BLENDFACTOR_ONE,
740 PIPE_BLENDFACTOR_SRC_COLOR,
741 PIPE_BLENDFACTOR_SRC_ALPHA,
742 PIPE_BLENDFACTOR_DST_COLOR,
743 PIPE_BLENDFACTOR_DST_ALPHA,
744 PIPE_BLENDFACTOR_CONST_COLOR,
745 PIPE_BLENDFACTOR_CONST_ALPHA,
746 #if 0
747 PIPE_BLENDFACTOR_SRC1_COLOR,
748 PIPE_BLENDFACTOR_SRC1_ALPHA,
749 #endif
750 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
751 PIPE_BLENDFACTOR_INV_SRC_COLOR,
752 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
753 PIPE_BLENDFACTOR_INV_DST_COLOR,
754 PIPE_BLENDFACTOR_INV_DST_ALPHA,
755 PIPE_BLENDFACTOR_INV_CONST_COLOR,
756 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
757 #if 0
758 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
759 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
760 #endif
761 };
762
763
764 const unsigned
765 blend_funcs[] = {
766 PIPE_BLEND_ADD,
767 PIPE_BLEND_SUBTRACT,
768 PIPE_BLEND_REVERSE_SUBTRACT,
769 PIPE_BLEND_MIN,
770 PIPE_BLEND_MAX
771 };
772
773
774 const struct lp_type blend_types[] = {
775 /* float, fixed, sign, norm, width, len */
776 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
777 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
778 };
779
780
781 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
782 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
783 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
784
785
786 boolean
787 test_all(unsigned verbose, FILE *fp)
788 {
789 const unsigned *rgb_func;
790 const unsigned *rgb_src_factor;
791 const unsigned *rgb_dst_factor;
792 const unsigned *alpha_func;
793 const unsigned *alpha_src_factor;
794 const unsigned *alpha_dst_factor;
795 struct pipe_blend_state blend;
796 enum vector_mode mode;
797 const struct lp_type *type;
798 boolean success = TRUE;
799
800 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
801 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
802 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
803 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
804 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
805 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
806 for(mode = 0; mode < 2; ++mode) {
807 for(type = blend_types; type < &blend_types[num_types]; ++type) {
808
809 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
810 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
811 continue;
812
813 memset(&blend, 0, sizeof blend);
814 blend.rt[0].blend_enable = 1;
815 blend.rt[0].rgb_func = *rgb_func;
816 blend.rt[0].rgb_src_factor = *rgb_src_factor;
817 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
818 blend.rt[0].alpha_func = *alpha_func;
819 blend.rt[0].alpha_src_factor = *alpha_src_factor;
820 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
821 blend.rt[0].colormask = PIPE_MASK_RGBA;
822
823 if(!test_one(verbose, fp, &blend, mode, *type))
824 success = FALSE;
825
826 }
827 }
828 }
829 }
830 }
831 }
832 }
833 }
834
835 return success;
836 }
837
838
839 boolean
840 test_some(unsigned verbose, FILE *fp, unsigned long n)
841 {
842 const unsigned *rgb_func;
843 const unsigned *rgb_src_factor;
844 const unsigned *rgb_dst_factor;
845 const unsigned *alpha_func;
846 const unsigned *alpha_src_factor;
847 const unsigned *alpha_dst_factor;
848 struct pipe_blend_state blend;
849 enum vector_mode mode;
850 const struct lp_type *type;
851 unsigned long i;
852 boolean success = TRUE;
853
854 for(i = 0; i < n; ++i) {
855 rgb_func = &blend_funcs[rand() % num_funcs];
856 alpha_func = &blend_funcs[rand() % num_funcs];
857 rgb_src_factor = &blend_factors[rand() % num_factors];
858 alpha_src_factor = &blend_factors[rand() % num_factors];
859
860 do {
861 rgb_dst_factor = &blend_factors[rand() % num_factors];
862 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
863
864 do {
865 alpha_dst_factor = &blend_factors[rand() % num_factors];
866 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
867
868 mode = rand() & 1;
869
870 type = &blend_types[rand() % num_types];
871
872 memset(&blend, 0, sizeof blend);
873 blend.rt[0].blend_enable = 1;
874 blend.rt[0].rgb_func = *rgb_func;
875 blend.rt[0].rgb_src_factor = *rgb_src_factor;
876 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
877 blend.rt[0].alpha_func = *alpha_func;
878 blend.rt[0].alpha_src_factor = *alpha_src_factor;
879 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
880 blend.rt[0].colormask = PIPE_MASK_RGBA;
881
882 if(!test_one(verbose, fp, &blend, mode, *type))
883 success = FALSE;
884 }
885
886 return success;
887 }
888
889
890 boolean
891 test_single(unsigned verbose, FILE *fp)
892 {
893 printf("no test_single()");
894 return TRUE;
895 }