Merge remote branch 'origin/master' into lp-binning
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "lp_bld_type.h"
41 #include "lp_bld_arit.h"
42 #include "lp_bld_blend.h"
43 #include "lp_bld_debug.h"
44 #include "lp_test.h"
45
46
47 enum vector_mode
48 {
49 AoS = 0,
50 SoA = 1
51 };
52
53
54 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
55
56
57 void
58 write_tsv_header(FILE *fp)
59 {
60 fprintf(fp,
61 "result\t"
62 "cycles_per_channel\t"
63 "mode\t"
64 "type\t"
65 "sep_func\t"
66 "sep_src_factor\t"
67 "sep_dst_factor\t"
68 "rgb_func\t"
69 "rgb_src_factor\t"
70 "rgb_dst_factor\t"
71 "alpha_func\t"
72 "alpha_src_factor\t"
73 "alpha_dst_factor\n");
74
75 fflush(fp);
76 }
77
78
79 static void
80 write_tsv_row(FILE *fp,
81 const struct pipe_blend_state *blend,
82 enum vector_mode mode,
83 struct lp_type type,
84 double cycles,
85 boolean success)
86 {
87 fprintf(fp, "%s\t", success ? "pass" : "fail");
88
89 if (mode == AoS) {
90 fprintf(fp, "%.1f\t", cycles / type.length);
91 fprintf(fp, "aos\t");
92 }
93
94 if (mode == SoA) {
95 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
96 fprintf(fp, "soa\t");
97 }
98
99 fprintf(fp, "%s%u%sx%u\t",
100 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
101 type.width,
102 type.norm ? "n" : "",
103 type.length);
104
105 fprintf(fp,
106 "%s\t%s\t%s\t",
107 blend->rgb_func != blend->alpha_func ? "true" : "false",
108 blend->rgb_src_factor != blend->alpha_src_factor ? "true" : "false",
109 blend->rgb_dst_factor != blend->alpha_dst_factor ? "true" : "false");
110
111 fprintf(fp,
112 "%s\t%s\t%s\t%s\t%s\t%s\n",
113 debug_dump_blend_func(blend->rgb_func, TRUE),
114 debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
115 debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
116 debug_dump_blend_func(blend->alpha_func, TRUE),
117 debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
118 debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
119
120 fflush(fp);
121 }
122
123
124 static void
125 dump_blend_type(FILE *fp,
126 const struct pipe_blend_state *blend,
127 enum vector_mode mode,
128 struct lp_type type)
129 {
130 fprintf(fp, "%s", mode ? "soa" : "aos");
131
132 fprintf(fp, " type=%s%u%sx%u",
133 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
134 type.width,
135 type.norm ? "n" : "",
136 type.length);
137
138 fprintf(fp,
139 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
140 "rgb_func", debug_dump_blend_func(blend->rgb_func, TRUE),
141 "rgb_src_factor", debug_dump_blend_factor(blend->rgb_src_factor, TRUE),
142 "rgb_dst_factor", debug_dump_blend_factor(blend->rgb_dst_factor, TRUE),
143 "alpha_func", debug_dump_blend_func(blend->alpha_func, TRUE),
144 "alpha_src_factor", debug_dump_blend_factor(blend->alpha_src_factor, TRUE),
145 "alpha_dst_factor", debug_dump_blend_factor(blend->alpha_dst_factor, TRUE));
146
147 fprintf(fp, " ...\n");
148 fflush(fp);
149 }
150
151
152 static LLVMValueRef
153 add_blend_test(LLVMModuleRef module,
154 const struct pipe_blend_state *blend,
155 enum vector_mode mode,
156 struct lp_type type)
157 {
158 LLVMTypeRef ret_type;
159 LLVMTypeRef vec_type;
160 LLVMTypeRef args[4];
161 LLVMValueRef func;
162 LLVMValueRef src_ptr;
163 LLVMValueRef dst_ptr;
164 LLVMValueRef const_ptr;
165 LLVMValueRef res_ptr;
166 LLVMBasicBlockRef block;
167 LLVMBuilderRef builder;
168
169 ret_type = LLVMInt64Type();
170 vec_type = lp_build_vec_type(type);
171
172 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
173 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
174 LLVMSetFunctionCallConv(func, LLVMCCallConv);
175 src_ptr = LLVMGetParam(func, 0);
176 dst_ptr = LLVMGetParam(func, 1);
177 const_ptr = LLVMGetParam(func, 2);
178 res_ptr = LLVMGetParam(func, 3);
179
180 block = LLVMAppendBasicBlock(func, "entry");
181 builder = LLVMCreateBuilder();
182 LLVMPositionBuilderAtEnd(builder, block);
183
184 if (mode == AoS) {
185 LLVMValueRef src;
186 LLVMValueRef dst;
187 LLVMValueRef con;
188 LLVMValueRef res;
189
190 src = LLVMBuildLoad(builder, src_ptr, "src");
191 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
192 con = LLVMBuildLoad(builder, const_ptr, "const");
193
194 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
195
196 lp_build_name(res, "res");
197
198 LLVMBuildStore(builder, res, res_ptr);
199 }
200
201 if (mode == SoA) {
202 LLVMValueRef src[4];
203 LLVMValueRef dst[4];
204 LLVMValueRef con[4];
205 LLVMValueRef res[4];
206 unsigned i;
207
208 for(i = 0; i < 4; ++i) {
209 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
210 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
211 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
212 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
213 lp_build_name(src[i], "src.%c", "rgba"[i]);
214 lp_build_name(con[i], "con.%c", "rgba"[i]);
215 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
216 }
217
218 lp_build_blend_soa(builder, blend, type, src, dst, con, res);
219
220 for(i = 0; i < 4; ++i) {
221 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
222 lp_build_name(res[i], "res.%c", "rgba"[i]);
223 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
224 }
225 }
226
227 LLVMBuildRetVoid(builder);;
228
229 LLVMDisposeBuilder(builder);
230 return func;
231 }
232
233
234 /** Add and limit result to ceiling of 1.0 */
235 #define ADD_SAT(R, A, B) \
236 do { \
237 R = (A) + (B); if (R > 1.0f) R = 1.0f; \
238 } while (0)
239
240 /** Subtract and limit result to floor of 0.0 */
241 #define SUB_SAT(R, A, B) \
242 do { \
243 R = (A) - (B); if (R < 0.0f) R = 0.0f; \
244 } while (0)
245
246
247 static void
248 compute_blend_ref_term(unsigned rgb_factor,
249 unsigned alpha_factor,
250 const double *factor,
251 const double *src,
252 const double *dst,
253 const double *con,
254 double *term)
255 {
256 double temp;
257
258 switch (rgb_factor) {
259 case PIPE_BLENDFACTOR_ONE:
260 term[0] = factor[0]; /* R */
261 term[1] = factor[1]; /* G */
262 term[2] = factor[2]; /* B */
263 break;
264 case PIPE_BLENDFACTOR_SRC_COLOR:
265 term[0] = factor[0] * src[0]; /* R */
266 term[1] = factor[1] * src[1]; /* G */
267 term[2] = factor[2] * src[2]; /* B */
268 break;
269 case PIPE_BLENDFACTOR_SRC_ALPHA:
270 term[0] = factor[0] * src[3]; /* R */
271 term[1] = factor[1] * src[3]; /* G */
272 term[2] = factor[2] * src[3]; /* B */
273 break;
274 case PIPE_BLENDFACTOR_DST_COLOR:
275 term[0] = factor[0] * dst[0]; /* R */
276 term[1] = factor[1] * dst[1]; /* G */
277 term[2] = factor[2] * dst[2]; /* B */
278 break;
279 case PIPE_BLENDFACTOR_DST_ALPHA:
280 term[0] = factor[0] * dst[3]; /* R */
281 term[1] = factor[1] * dst[3]; /* G */
282 term[2] = factor[2] * dst[3]; /* B */
283 break;
284 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
285 temp = MIN2(src[3], 1.0f - dst[3]);
286 term[0] = factor[0] * temp; /* R */
287 term[1] = factor[1] * temp; /* G */
288 term[2] = factor[2] * temp; /* B */
289 break;
290 case PIPE_BLENDFACTOR_CONST_COLOR:
291 term[0] = factor[0] * con[0]; /* R */
292 term[1] = factor[1] * con[1]; /* G */
293 term[2] = factor[2] * con[2]; /* B */
294 break;
295 case PIPE_BLENDFACTOR_CONST_ALPHA:
296 term[0] = factor[0] * con[3]; /* R */
297 term[1] = factor[1] * con[3]; /* G */
298 term[2] = factor[2] * con[3]; /* B */
299 break;
300 case PIPE_BLENDFACTOR_SRC1_COLOR:
301 assert(0); /* to do */
302 break;
303 case PIPE_BLENDFACTOR_SRC1_ALPHA:
304 assert(0); /* to do */
305 break;
306 case PIPE_BLENDFACTOR_ZERO:
307 term[0] = 0.0f; /* R */
308 term[1] = 0.0f; /* G */
309 term[2] = 0.0f; /* B */
310 break;
311 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
312 term[0] = factor[0] * (1.0f - src[0]); /* R */
313 term[1] = factor[1] * (1.0f - src[1]); /* G */
314 term[2] = factor[2] * (1.0f - src[2]); /* B */
315 break;
316 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
317 term[0] = factor[0] * (1.0f - src[3]); /* R */
318 term[1] = factor[1] * (1.0f - src[3]); /* G */
319 term[2] = factor[2] * (1.0f - src[3]); /* B */
320 break;
321 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
322 term[0] = factor[0] * (1.0f - dst[3]); /* R */
323 term[1] = factor[1] * (1.0f - dst[3]); /* G */
324 term[2] = factor[2] * (1.0f - dst[3]); /* B */
325 break;
326 case PIPE_BLENDFACTOR_INV_DST_COLOR:
327 term[0] = factor[0] * (1.0f - dst[0]); /* R */
328 term[1] = factor[1] * (1.0f - dst[1]); /* G */
329 term[2] = factor[2] * (1.0f - dst[2]); /* B */
330 break;
331 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
332 term[0] = factor[0] * (1.0f - con[0]); /* R */
333 term[1] = factor[1] * (1.0f - con[1]); /* G */
334 term[2] = factor[2] * (1.0f - con[2]); /* B */
335 break;
336 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
337 term[0] = factor[0] * (1.0f - con[3]); /* R */
338 term[1] = factor[1] * (1.0f - con[3]); /* G */
339 term[2] = factor[2] * (1.0f - con[3]); /* B */
340 break;
341 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
342 assert(0); /* to do */
343 break;
344 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
345 assert(0); /* to do */
346 break;
347 default:
348 assert(0);
349 }
350
351 /*
352 * Compute src/first term A
353 */
354 switch (alpha_factor) {
355 case PIPE_BLENDFACTOR_ONE:
356 term[3] = factor[3]; /* A */
357 break;
358 case PIPE_BLENDFACTOR_SRC_COLOR:
359 case PIPE_BLENDFACTOR_SRC_ALPHA:
360 term[3] = factor[3] * src[3]; /* A */
361 break;
362 case PIPE_BLENDFACTOR_DST_COLOR:
363 case PIPE_BLENDFACTOR_DST_ALPHA:
364 term[3] = factor[3] * dst[3]; /* A */
365 break;
366 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
367 term[3] = src[3]; /* A */
368 break;
369 case PIPE_BLENDFACTOR_CONST_COLOR:
370 case PIPE_BLENDFACTOR_CONST_ALPHA:
371 term[3] = factor[3] * con[3]; /* A */
372 break;
373 case PIPE_BLENDFACTOR_ZERO:
374 term[3] = 0.0f; /* A */
375 break;
376 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
377 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
378 term[3] = factor[3] * (1.0f - src[3]); /* A */
379 break;
380 case PIPE_BLENDFACTOR_INV_DST_COLOR:
381 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
382 term[3] = factor[3] * (1.0f - dst[3]); /* A */
383 break;
384 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
385 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
386 term[3] = factor[3] * (1.0f - con[3]);
387 break;
388 default:
389 assert(0);
390 }
391 }
392
393
394 static void
395 compute_blend_ref(const struct pipe_blend_state *blend,
396 const double *src,
397 const double *dst,
398 const double *con,
399 double *res)
400 {
401 double src_term[4];
402 double dst_term[4];
403
404 compute_blend_ref_term(blend->rgb_src_factor, blend->alpha_src_factor, src, src, dst, con, src_term);
405 compute_blend_ref_term(blend->rgb_dst_factor, blend->alpha_dst_factor, dst, src, dst, con, dst_term);
406
407 /*
408 * Combine RGB terms
409 */
410 switch (blend->rgb_func) {
411 case PIPE_BLEND_ADD:
412 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
413 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
414 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
415 break;
416 case PIPE_BLEND_SUBTRACT:
417 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
418 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
419 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
420 break;
421 case PIPE_BLEND_REVERSE_SUBTRACT:
422 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
423 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
424 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
425 break;
426 case PIPE_BLEND_MIN:
427 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
428 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
429 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
430 break;
431 case PIPE_BLEND_MAX:
432 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
433 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
434 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
435 break;
436 default:
437 assert(0);
438 }
439
440 /*
441 * Combine A terms
442 */
443 switch (blend->alpha_func) {
444 case PIPE_BLEND_ADD:
445 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
446 break;
447 case PIPE_BLEND_SUBTRACT:
448 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
449 break;
450 case PIPE_BLEND_REVERSE_SUBTRACT:
451 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
452 break;
453 case PIPE_BLEND_MIN:
454 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
455 break;
456 case PIPE_BLEND_MAX:
457 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
458 break;
459 default:
460 assert(0);
461 }
462 }
463
464
465 ALIGN_STACK
466 static boolean
467 test_one(unsigned verbose,
468 FILE *fp,
469 const struct pipe_blend_state *blend,
470 enum vector_mode mode,
471 struct lp_type type)
472 {
473 LLVMModuleRef module = NULL;
474 LLVMValueRef func = NULL;
475 LLVMExecutionEngineRef engine = NULL;
476 LLVMModuleProviderRef provider = NULL;
477 LLVMPassManagerRef pass = NULL;
478 char *error = NULL;
479 blend_test_ptr_t blend_test_ptr;
480 boolean success;
481 const unsigned n = LP_TEST_NUM_SAMPLES;
482 int64_t cycles[LP_TEST_NUM_SAMPLES];
483 double cycles_avg = 0.0;
484 unsigned i, j;
485
486 if(verbose >= 1)
487 dump_blend_type(stdout, blend, mode, type);
488
489 module = LLVMModuleCreateWithName("test");
490
491 func = add_blend_test(module, blend, mode, type);
492
493 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
494 LLVMDumpModule(module);
495 abort();
496 }
497 LLVMDisposeMessage(error);
498
499 provider = LLVMCreateModuleProviderForExistingModule(module);
500 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
501 if(verbose < 1)
502 dump_blend_type(stderr, blend, mode, type);
503 fprintf(stderr, "%s\n", error);
504 LLVMDisposeMessage(error);
505 abort();
506 }
507
508 #if 0
509 pass = LLVMCreatePassManager();
510 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
511 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
512 * but there are more on SVN. */
513 LLVMAddConstantPropagationPass(pass);
514 LLVMAddInstructionCombiningPass(pass);
515 LLVMAddPromoteMemoryToRegisterPass(pass);
516 LLVMAddGVNPass(pass);
517 LLVMAddCFGSimplificationPass(pass);
518 LLVMRunPassManager(pass, module);
519 #else
520 (void)pass;
521 #endif
522
523 if(verbose >= 2)
524 LLVMDumpModule(module);
525
526 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
527
528 if(verbose >= 2)
529 lp_disassemble(blend_test_ptr);
530
531 success = TRUE;
532 for(i = 0; i < n && success; ++i) {
533 if(mode == AoS) {
534 ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
535 ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
536 ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
537 ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
538 ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
539 int64_t start_counter = 0;
540 int64_t end_counter = 0;
541
542 random_vec(type, src);
543 random_vec(type, dst);
544 random_vec(type, con);
545
546 {
547 double fsrc[LP_MAX_VECTOR_LENGTH];
548 double fdst[LP_MAX_VECTOR_LENGTH];
549 double fcon[LP_MAX_VECTOR_LENGTH];
550 double fref[LP_MAX_VECTOR_LENGTH];
551
552 read_vec(type, src, fsrc);
553 read_vec(type, dst, fdst);
554 read_vec(type, con, fcon);
555
556 for(j = 0; j < type.length; j += 4)
557 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
558
559 write_vec(type, ref, fref);
560 }
561
562 start_counter = rdtsc();
563 blend_test_ptr(src, dst, con, res);
564 end_counter = rdtsc();
565
566 cycles[i] = end_counter - start_counter;
567
568 if(!compare_vec(type, res, ref)) {
569 success = FALSE;
570
571 if(verbose < 1)
572 dump_blend_type(stderr, blend, mode, type);
573 fprintf(stderr, "MISMATCH\n");
574
575 fprintf(stderr, " Src: ");
576 dump_vec(stderr, type, src);
577 fprintf(stderr, "\n");
578
579 fprintf(stderr, " Dst: ");
580 dump_vec(stderr, type, dst);
581 fprintf(stderr, "\n");
582
583 fprintf(stderr, " Con: ");
584 dump_vec(stderr, type, con);
585 fprintf(stderr, "\n");
586
587 fprintf(stderr, " Res: ");
588 dump_vec(stderr, type, res);
589 fprintf(stderr, "\n");
590
591 fprintf(stderr, " Ref: ");
592 dump_vec(stderr, type, ref);
593 fprintf(stderr, "\n");
594 }
595 }
596
597 if(mode == SoA) {
598 const unsigned stride = type.length*type.width/8;
599 ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
600 ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
601 ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
602 ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
603 ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
604 int64_t start_counter = 0;
605 int64_t end_counter = 0;
606 boolean mismatch;
607
608 for(j = 0; j < 4; ++j) {
609 random_vec(type, src + j*stride);
610 random_vec(type, dst + j*stride);
611 random_vec(type, con + j*stride);
612 }
613
614 {
615 double fsrc[4];
616 double fdst[4];
617 double fcon[4];
618 double fref[4];
619 unsigned k;
620
621 for(k = 0; k < type.length; ++k) {
622 for(j = 0; j < 4; ++j) {
623 fsrc[j] = read_elem(type, src + j*stride, k);
624 fdst[j] = read_elem(type, dst + j*stride, k);
625 fcon[j] = read_elem(type, con + j*stride, k);
626 }
627
628 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
629
630 for(j = 0; j < 4; ++j)
631 write_elem(type, ref + j*stride, k, fref[j]);
632 }
633 }
634
635 start_counter = rdtsc();
636 blend_test_ptr(src, dst, con, res);
637 end_counter = rdtsc();
638
639 cycles[i] = end_counter - start_counter;
640
641 mismatch = FALSE;
642 for (j = 0; j < 4; ++j)
643 if(!compare_vec(type, res + j*stride, ref + j*stride))
644 mismatch = TRUE;
645
646 if (mismatch) {
647 success = FALSE;
648
649 if(verbose < 1)
650 dump_blend_type(stderr, blend, mode, type);
651 fprintf(stderr, "MISMATCH\n");
652 for(j = 0; j < 4; ++j) {
653 char channel = "RGBA"[j];
654 fprintf(stderr, " Src%c: ", channel);
655 dump_vec(stderr, type, src + j*stride);
656 fprintf(stderr, "\n");
657
658 fprintf(stderr, " Dst%c: ", channel);
659 dump_vec(stderr, type, dst + j*stride);
660 fprintf(stderr, "\n");
661
662 fprintf(stderr, " Con%c: ", channel);
663 dump_vec(stderr, type, con + j*stride);
664 fprintf(stderr, "\n");
665
666 fprintf(stderr, " Res%c: ", channel);
667 dump_vec(stderr, type, res + j*stride);
668 fprintf(stderr, "\n");
669
670 fprintf(stderr, " Ref%c: ", channel);
671 dump_vec(stderr, type, ref + j*stride);
672 fprintf(stderr, "\n");
673 }
674 }
675 }
676 }
677
678 /*
679 * Unfortunately the output of cycle counter is not very reliable as it comes
680 * -- sometimes we get outliers (due IRQs perhaps?) which are
681 * better removed to avoid random or biased data.
682 */
683 {
684 double sum = 0.0, sum2 = 0.0;
685 double avg, std;
686 unsigned m;
687
688 for(i = 0; i < n; ++i) {
689 sum += cycles[i];
690 sum2 += cycles[i]*cycles[i];
691 }
692
693 avg = sum/n;
694 std = sqrtf((sum2 - n*avg*avg)/n);
695
696 m = 0;
697 sum = 0.0;
698 for(i = 0; i < n; ++i) {
699 if(fabs(cycles[i] - avg) <= 4.0*std) {
700 sum += cycles[i];
701 ++m;
702 }
703 }
704
705 cycles_avg = sum/m;
706
707 }
708
709 if(fp)
710 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
711
712 if (!success) {
713 if(verbose < 2)
714 LLVMDumpModule(module);
715 LLVMWriteBitcodeToFile(module, "blend.bc");
716 fprintf(stderr, "blend.bc written\n");
717 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
718 abort();
719 }
720
721 LLVMFreeMachineCodeForFunction(engine, func);
722
723 LLVMDisposeExecutionEngine(engine);
724 if(pass)
725 LLVMDisposePassManager(pass);
726
727 return success;
728 }
729
730
731 const unsigned
732 blend_factors[] = {
733 PIPE_BLENDFACTOR_ZERO,
734 PIPE_BLENDFACTOR_ONE,
735 PIPE_BLENDFACTOR_SRC_COLOR,
736 PIPE_BLENDFACTOR_SRC_ALPHA,
737 PIPE_BLENDFACTOR_DST_COLOR,
738 PIPE_BLENDFACTOR_DST_ALPHA,
739 PIPE_BLENDFACTOR_CONST_COLOR,
740 PIPE_BLENDFACTOR_CONST_ALPHA,
741 #if 0
742 PIPE_BLENDFACTOR_SRC1_COLOR,
743 PIPE_BLENDFACTOR_SRC1_ALPHA,
744 #endif
745 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
746 PIPE_BLENDFACTOR_INV_SRC_COLOR,
747 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
748 PIPE_BLENDFACTOR_INV_DST_COLOR,
749 PIPE_BLENDFACTOR_INV_DST_ALPHA,
750 PIPE_BLENDFACTOR_INV_CONST_COLOR,
751 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
752 #if 0
753 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
754 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
755 #endif
756 };
757
758
759 const unsigned
760 blend_funcs[] = {
761 PIPE_BLEND_ADD,
762 PIPE_BLEND_SUBTRACT,
763 PIPE_BLEND_REVERSE_SUBTRACT,
764 PIPE_BLEND_MIN,
765 PIPE_BLEND_MAX
766 };
767
768
769 const struct lp_type blend_types[] = {
770 /* float, fixed, sign, norm, width, len */
771 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
772 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
773 };
774
775
776 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
777 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
778 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
779
780
781 boolean
782 test_all(unsigned verbose, FILE *fp)
783 {
784 const unsigned *rgb_func;
785 const unsigned *rgb_src_factor;
786 const unsigned *rgb_dst_factor;
787 const unsigned *alpha_func;
788 const unsigned *alpha_src_factor;
789 const unsigned *alpha_dst_factor;
790 struct pipe_blend_state blend;
791 enum vector_mode mode;
792 const struct lp_type *type;
793 bool success = TRUE;
794
795 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
796 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
797 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
798 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
799 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
800 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
801 for(mode = 0; mode < 2; ++mode) {
802 for(type = blend_types; type < &blend_types[num_types]; ++type) {
803
804 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
805 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
806 continue;
807
808 memset(&blend, 0, sizeof blend);
809 blend.blend_enable = 1;
810 blend.rgb_func = *rgb_func;
811 blend.rgb_src_factor = *rgb_src_factor;
812 blend.rgb_dst_factor = *rgb_dst_factor;
813 blend.alpha_func = *alpha_func;
814 blend.alpha_src_factor = *alpha_src_factor;
815 blend.alpha_dst_factor = *alpha_dst_factor;
816 blend.colormask = PIPE_MASK_RGBA;
817
818 if(!test_one(verbose, fp, &blend, mode, *type))
819 success = FALSE;
820
821 }
822 }
823 }
824 }
825 }
826 }
827 }
828 }
829
830 return success;
831 }
832
833
834 boolean
835 test_some(unsigned verbose, FILE *fp, unsigned long n)
836 {
837 const unsigned *rgb_func;
838 const unsigned *rgb_src_factor;
839 const unsigned *rgb_dst_factor;
840 const unsigned *alpha_func;
841 const unsigned *alpha_src_factor;
842 const unsigned *alpha_dst_factor;
843 struct pipe_blend_state blend;
844 enum vector_mode mode;
845 const struct lp_type *type;
846 unsigned long i;
847 bool success = TRUE;
848
849 for(i = 0; i < n; ++i) {
850 rgb_func = &blend_funcs[rand() % num_funcs];
851 alpha_func = &blend_funcs[rand() % num_funcs];
852 rgb_src_factor = &blend_factors[rand() % num_factors];
853 alpha_src_factor = &blend_factors[rand() % num_factors];
854
855 do {
856 rgb_dst_factor = &blend_factors[rand() % num_factors];
857 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
858
859 do {
860 alpha_dst_factor = &blend_factors[rand() % num_factors];
861 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
862
863 mode = rand() & 1;
864
865 type = &blend_types[rand() % num_types];
866
867 memset(&blend, 0, sizeof blend);
868 blend.blend_enable = 1;
869 blend.rgb_func = *rgb_func;
870 blend.rgb_src_factor = *rgb_src_factor;
871 blend.rgb_dst_factor = *rgb_dst_factor;
872 blend.alpha_func = *alpha_func;
873 blend.alpha_src_factor = *alpha_src_factor;
874 blend.alpha_dst_factor = *alpha_dst_factor;
875 blend.colormask = PIPE_MASK_RGBA;
876
877 if(!test_one(verbose, fp, &blend, mode, *type))
878 success = FALSE;
879 }
880
881 return success;
882 }