Merge commit 'origin/perrtblend'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "lp_bld_type.h"
41 #include "lp_bld_blend.h"
42 #include "lp_bld_debug.h"
43 #include "lp_test.h"
44
45
46 enum vector_mode
47 {
48 AoS = 0,
49 SoA = 1
50 };
51
52
53 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
54
55
56 void
57 write_tsv_header(FILE *fp)
58 {
59 fprintf(fp,
60 "result\t"
61 "cycles_per_channel\t"
62 "mode\t"
63 "type\t"
64 "sep_func\t"
65 "sep_src_factor\t"
66 "sep_dst_factor\t"
67 "rgb_func\t"
68 "rgb_src_factor\t"
69 "rgb_dst_factor\t"
70 "alpha_func\t"
71 "alpha_src_factor\t"
72 "alpha_dst_factor\n");
73
74 fflush(fp);
75 }
76
77
78 static void
79 write_tsv_row(FILE *fp,
80 const struct pipe_blend_state *blend,
81 enum vector_mode mode,
82 struct lp_type type,
83 double cycles,
84 boolean success)
85 {
86 fprintf(fp, "%s\t", success ? "pass" : "fail");
87
88 if (mode == AoS) {
89 fprintf(fp, "%.1f\t", cycles / type.length);
90 fprintf(fp, "aos\t");
91 }
92
93 if (mode == SoA) {
94 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
95 fprintf(fp, "soa\t");
96 }
97
98 fprintf(fp, "%s%u%sx%u\t",
99 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
100 type.width,
101 type.norm ? "n" : "",
102 type.length);
103
104 fprintf(fp,
105 "%s\t%s\t%s\t",
106 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
107 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
108 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
109
110 fprintf(fp,
111 "%s\t%s\t%s\t%s\t%s\t%s\n",
112 debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
113 debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
114 debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
115 debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
116 debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
117 debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
118
119 fflush(fp);
120 }
121
122
123 static void
124 dump_blend_type(FILE *fp,
125 const struct pipe_blend_state *blend,
126 enum vector_mode mode,
127 struct lp_type type)
128 {
129 fprintf(fp, "%s", mode ? "soa" : "aos");
130
131 fprintf(fp, " type=%s%u%sx%u",
132 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
133 type.width,
134 type.norm ? "n" : "",
135 type.length);
136
137 fprintf(fp,
138 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
139 "rgb_func", debug_dump_blend_func(blend->rt[0].rgb_func, TRUE),
140 "rgb_src_factor", debug_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
141 "rgb_dst_factor", debug_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
142 "alpha_func", debug_dump_blend_func(blend->rt[0].alpha_func, TRUE),
143 "alpha_src_factor", debug_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
144 "alpha_dst_factor", debug_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
145
146 fprintf(fp, " ...\n");
147 fflush(fp);
148 }
149
150
151 static LLVMValueRef
152 add_blend_test(LLVMModuleRef module,
153 const struct pipe_blend_state *blend,
154 enum vector_mode mode,
155 struct lp_type type)
156 {
157 LLVMTypeRef ret_type;
158 LLVMTypeRef vec_type;
159 LLVMTypeRef args[4];
160 LLVMValueRef func;
161 LLVMValueRef src_ptr;
162 LLVMValueRef dst_ptr;
163 LLVMValueRef const_ptr;
164 LLVMValueRef res_ptr;
165 LLVMBasicBlockRef block;
166 LLVMBuilderRef builder;
167
168 ret_type = LLVMInt64Type();
169 vec_type = lp_build_vec_type(type);
170
171 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
172 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
173 LLVMSetFunctionCallConv(func, LLVMCCallConv);
174 src_ptr = LLVMGetParam(func, 0);
175 dst_ptr = LLVMGetParam(func, 1);
176 const_ptr = LLVMGetParam(func, 2);
177 res_ptr = LLVMGetParam(func, 3);
178
179 block = LLVMAppendBasicBlock(func, "entry");
180 builder = LLVMCreateBuilder();
181 LLVMPositionBuilderAtEnd(builder, block);
182
183 if (mode == AoS) {
184 LLVMValueRef src;
185 LLVMValueRef dst;
186 LLVMValueRef con;
187 LLVMValueRef res;
188
189 src = LLVMBuildLoad(builder, src_ptr, "src");
190 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
191 con = LLVMBuildLoad(builder, const_ptr, "const");
192
193 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
194
195 lp_build_name(res, "res");
196
197 LLVMBuildStore(builder, res, res_ptr);
198 }
199
200 if (mode == SoA) {
201 LLVMValueRef src[4];
202 LLVMValueRef dst[4];
203 LLVMValueRef con[4];
204 LLVMValueRef res[4];
205 unsigned i;
206
207 for(i = 0; i < 4; ++i) {
208 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
209 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
210 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
211 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
212 lp_build_name(src[i], "src.%c", "rgba"[i]);
213 lp_build_name(con[i], "con.%c", "rgba"[i]);
214 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
215 }
216
217 lp_build_blend_soa(builder, blend, type, src, dst, con, res);
218
219 for(i = 0; i < 4; ++i) {
220 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
221 lp_build_name(res[i], "res.%c", "rgba"[i]);
222 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
223 }
224 }
225
226 LLVMBuildRetVoid(builder);;
227
228 LLVMDisposeBuilder(builder);
229 return func;
230 }
231
232
233 /** Add and limit result to ceiling of 1.0 */
234 #define ADD_SAT(R, A, B) \
235 do { \
236 R = (A) + (B); if (R > 1.0f) R = 1.0f; \
237 } while (0)
238
239 /** Subtract and limit result to floor of 0.0 */
240 #define SUB_SAT(R, A, B) \
241 do { \
242 R = (A) - (B); if (R < 0.0f) R = 0.0f; \
243 } while (0)
244
245
246 static void
247 compute_blend_ref_term(unsigned rgb_factor,
248 unsigned alpha_factor,
249 const double *factor,
250 const double *src,
251 const double *dst,
252 const double *con,
253 double *term)
254 {
255 double temp;
256
257 switch (rgb_factor) {
258 case PIPE_BLENDFACTOR_ONE:
259 term[0] = factor[0]; /* R */
260 term[1] = factor[1]; /* G */
261 term[2] = factor[2]; /* B */
262 break;
263 case PIPE_BLENDFACTOR_SRC_COLOR:
264 term[0] = factor[0] * src[0]; /* R */
265 term[1] = factor[1] * src[1]; /* G */
266 term[2] = factor[2] * src[2]; /* B */
267 break;
268 case PIPE_BLENDFACTOR_SRC_ALPHA:
269 term[0] = factor[0] * src[3]; /* R */
270 term[1] = factor[1] * src[3]; /* G */
271 term[2] = factor[2] * src[3]; /* B */
272 break;
273 case PIPE_BLENDFACTOR_DST_COLOR:
274 term[0] = factor[0] * dst[0]; /* R */
275 term[1] = factor[1] * dst[1]; /* G */
276 term[2] = factor[2] * dst[2]; /* B */
277 break;
278 case PIPE_BLENDFACTOR_DST_ALPHA:
279 term[0] = factor[0] * dst[3]; /* R */
280 term[1] = factor[1] * dst[3]; /* G */
281 term[2] = factor[2] * dst[3]; /* B */
282 break;
283 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
284 temp = MIN2(src[3], 1.0f - dst[3]);
285 term[0] = factor[0] * temp; /* R */
286 term[1] = factor[1] * temp; /* G */
287 term[2] = factor[2] * temp; /* B */
288 break;
289 case PIPE_BLENDFACTOR_CONST_COLOR:
290 term[0] = factor[0] * con[0]; /* R */
291 term[1] = factor[1] * con[1]; /* G */
292 term[2] = factor[2] * con[2]; /* B */
293 break;
294 case PIPE_BLENDFACTOR_CONST_ALPHA:
295 term[0] = factor[0] * con[3]; /* R */
296 term[1] = factor[1] * con[3]; /* G */
297 term[2] = factor[2] * con[3]; /* B */
298 break;
299 case PIPE_BLENDFACTOR_SRC1_COLOR:
300 assert(0); /* to do */
301 break;
302 case PIPE_BLENDFACTOR_SRC1_ALPHA:
303 assert(0); /* to do */
304 break;
305 case PIPE_BLENDFACTOR_ZERO:
306 term[0] = 0.0f; /* R */
307 term[1] = 0.0f; /* G */
308 term[2] = 0.0f; /* B */
309 break;
310 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
311 term[0] = factor[0] * (1.0f - src[0]); /* R */
312 term[1] = factor[1] * (1.0f - src[1]); /* G */
313 term[2] = factor[2] * (1.0f - src[2]); /* B */
314 break;
315 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
316 term[0] = factor[0] * (1.0f - src[3]); /* R */
317 term[1] = factor[1] * (1.0f - src[3]); /* G */
318 term[2] = factor[2] * (1.0f - src[3]); /* B */
319 break;
320 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
321 term[0] = factor[0] * (1.0f - dst[3]); /* R */
322 term[1] = factor[1] * (1.0f - dst[3]); /* G */
323 term[2] = factor[2] * (1.0f - dst[3]); /* B */
324 break;
325 case PIPE_BLENDFACTOR_INV_DST_COLOR:
326 term[0] = factor[0] * (1.0f - dst[0]); /* R */
327 term[1] = factor[1] * (1.0f - dst[1]); /* G */
328 term[2] = factor[2] * (1.0f - dst[2]); /* B */
329 break;
330 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
331 term[0] = factor[0] * (1.0f - con[0]); /* R */
332 term[1] = factor[1] * (1.0f - con[1]); /* G */
333 term[2] = factor[2] * (1.0f - con[2]); /* B */
334 break;
335 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
336 term[0] = factor[0] * (1.0f - con[3]); /* R */
337 term[1] = factor[1] * (1.0f - con[3]); /* G */
338 term[2] = factor[2] * (1.0f - con[3]); /* B */
339 break;
340 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
341 assert(0); /* to do */
342 break;
343 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
344 assert(0); /* to do */
345 break;
346 default:
347 assert(0);
348 }
349
350 /*
351 * Compute src/first term A
352 */
353 switch (alpha_factor) {
354 case PIPE_BLENDFACTOR_ONE:
355 term[3] = factor[3]; /* A */
356 break;
357 case PIPE_BLENDFACTOR_SRC_COLOR:
358 case PIPE_BLENDFACTOR_SRC_ALPHA:
359 term[3] = factor[3] * src[3]; /* A */
360 break;
361 case PIPE_BLENDFACTOR_DST_COLOR:
362 case PIPE_BLENDFACTOR_DST_ALPHA:
363 term[3] = factor[3] * dst[3]; /* A */
364 break;
365 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
366 term[3] = src[3]; /* A */
367 break;
368 case PIPE_BLENDFACTOR_CONST_COLOR:
369 case PIPE_BLENDFACTOR_CONST_ALPHA:
370 term[3] = factor[3] * con[3]; /* A */
371 break;
372 case PIPE_BLENDFACTOR_ZERO:
373 term[3] = 0.0f; /* A */
374 break;
375 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
376 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
377 term[3] = factor[3] * (1.0f - src[3]); /* A */
378 break;
379 case PIPE_BLENDFACTOR_INV_DST_COLOR:
380 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
381 term[3] = factor[3] * (1.0f - dst[3]); /* A */
382 break;
383 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
384 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
385 term[3] = factor[3] * (1.0f - con[3]);
386 break;
387 default:
388 assert(0);
389 }
390 }
391
392
393 static void
394 compute_blend_ref(const struct pipe_blend_state *blend,
395 const double *src,
396 const double *dst,
397 const double *con,
398 double *res)
399 {
400 double src_term[4];
401 double dst_term[4];
402
403 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
404 src, src, dst, con, src_term);
405 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
406 dst, src, dst, con, dst_term);
407
408 /*
409 * Combine RGB terms
410 */
411 switch (blend->rt[0].rgb_func) {
412 case PIPE_BLEND_ADD:
413 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
414 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
415 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
416 break;
417 case PIPE_BLEND_SUBTRACT:
418 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
419 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
420 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
421 break;
422 case PIPE_BLEND_REVERSE_SUBTRACT:
423 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
424 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
425 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
426 break;
427 case PIPE_BLEND_MIN:
428 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
429 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
430 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
431 break;
432 case PIPE_BLEND_MAX:
433 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
434 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
435 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
436 break;
437 default:
438 assert(0);
439 }
440
441 /*
442 * Combine A terms
443 */
444 switch (blend->rt[0].alpha_func) {
445 case PIPE_BLEND_ADD:
446 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
447 break;
448 case PIPE_BLEND_SUBTRACT:
449 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
450 break;
451 case PIPE_BLEND_REVERSE_SUBTRACT:
452 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
453 break;
454 case PIPE_BLEND_MIN:
455 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
456 break;
457 case PIPE_BLEND_MAX:
458 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
459 break;
460 default:
461 assert(0);
462 }
463 }
464
465
466 PIPE_ALIGN_STACK
467 static boolean
468 test_one(unsigned verbose,
469 FILE *fp,
470 const struct pipe_blend_state *blend,
471 enum vector_mode mode,
472 struct lp_type type)
473 {
474 LLVMModuleRef module = NULL;
475 LLVMValueRef func = NULL;
476 LLVMExecutionEngineRef engine = NULL;
477 LLVMModuleProviderRef provider = NULL;
478 LLVMPassManagerRef pass = NULL;
479 char *error = NULL;
480 blend_test_ptr_t blend_test_ptr;
481 boolean success;
482 const unsigned n = LP_TEST_NUM_SAMPLES;
483 int64_t cycles[LP_TEST_NUM_SAMPLES];
484 double cycles_avg = 0.0;
485 unsigned i, j;
486
487 if(verbose >= 1)
488 dump_blend_type(stdout, blend, mode, type);
489
490 module = LLVMModuleCreateWithName("test");
491
492 func = add_blend_test(module, blend, mode, type);
493
494 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
495 LLVMDumpModule(module);
496 abort();
497 }
498 LLVMDisposeMessage(error);
499
500 provider = LLVMCreateModuleProviderForExistingModule(module);
501 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
502 if(verbose < 1)
503 dump_blend_type(stderr, blend, mode, type);
504 fprintf(stderr, "%s\n", error);
505 LLVMDisposeMessage(error);
506 abort();
507 }
508
509 #if 0
510 pass = LLVMCreatePassManager();
511 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
512 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
513 * but there are more on SVN. */
514 LLVMAddConstantPropagationPass(pass);
515 LLVMAddInstructionCombiningPass(pass);
516 LLVMAddPromoteMemoryToRegisterPass(pass);
517 LLVMAddGVNPass(pass);
518 LLVMAddCFGSimplificationPass(pass);
519 LLVMRunPassManager(pass, module);
520 #else
521 (void)pass;
522 #endif
523
524 if(verbose >= 2)
525 LLVMDumpModule(module);
526
527 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
528
529 if(verbose >= 2)
530 lp_disassemble(blend_test_ptr);
531
532 success = TRUE;
533 for(i = 0; i < n && success; ++i) {
534 if(mode == AoS) {
535 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
536 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
537 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
538 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
539 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
540 int64_t start_counter = 0;
541 int64_t end_counter = 0;
542
543 random_vec(type, src);
544 random_vec(type, dst);
545 random_vec(type, con);
546
547 {
548 double fsrc[LP_MAX_VECTOR_LENGTH];
549 double fdst[LP_MAX_VECTOR_LENGTH];
550 double fcon[LP_MAX_VECTOR_LENGTH];
551 double fref[LP_MAX_VECTOR_LENGTH];
552
553 read_vec(type, src, fsrc);
554 read_vec(type, dst, fdst);
555 read_vec(type, con, fcon);
556
557 for(j = 0; j < type.length; j += 4)
558 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
559
560 write_vec(type, ref, fref);
561 }
562
563 start_counter = rdtsc();
564 blend_test_ptr(src, dst, con, res);
565 end_counter = rdtsc();
566
567 cycles[i] = end_counter - start_counter;
568
569 if(!compare_vec(type, res, ref)) {
570 success = FALSE;
571
572 if(verbose < 1)
573 dump_blend_type(stderr, blend, mode, type);
574 fprintf(stderr, "MISMATCH\n");
575
576 fprintf(stderr, " Src: ");
577 dump_vec(stderr, type, src);
578 fprintf(stderr, "\n");
579
580 fprintf(stderr, " Dst: ");
581 dump_vec(stderr, type, dst);
582 fprintf(stderr, "\n");
583
584 fprintf(stderr, " Con: ");
585 dump_vec(stderr, type, con);
586 fprintf(stderr, "\n");
587
588 fprintf(stderr, " Res: ");
589 dump_vec(stderr, type, res);
590 fprintf(stderr, "\n");
591
592 fprintf(stderr, " Ref: ");
593 dump_vec(stderr, type, ref);
594 fprintf(stderr, "\n");
595 }
596 }
597
598 if(mode == SoA) {
599 const unsigned stride = type.length*type.width/8;
600 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
601 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
602 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
603 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
604 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
605 int64_t start_counter = 0;
606 int64_t end_counter = 0;
607 boolean mismatch;
608
609 for(j = 0; j < 4; ++j) {
610 random_vec(type, src + j*stride);
611 random_vec(type, dst + j*stride);
612 random_vec(type, con + j*stride);
613 }
614
615 {
616 double fsrc[4];
617 double fdst[4];
618 double fcon[4];
619 double fref[4];
620 unsigned k;
621
622 for(k = 0; k < type.length; ++k) {
623 for(j = 0; j < 4; ++j) {
624 fsrc[j] = read_elem(type, src + j*stride, k);
625 fdst[j] = read_elem(type, dst + j*stride, k);
626 fcon[j] = read_elem(type, con + j*stride, k);
627 }
628
629 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
630
631 for(j = 0; j < 4; ++j)
632 write_elem(type, ref + j*stride, k, fref[j]);
633 }
634 }
635
636 start_counter = rdtsc();
637 blend_test_ptr(src, dst, con, res);
638 end_counter = rdtsc();
639
640 cycles[i] = end_counter - start_counter;
641
642 mismatch = FALSE;
643 for (j = 0; j < 4; ++j)
644 if(!compare_vec(type, res + j*stride, ref + j*stride))
645 mismatch = TRUE;
646
647 if (mismatch) {
648 success = FALSE;
649
650 if(verbose < 1)
651 dump_blend_type(stderr, blend, mode, type);
652 fprintf(stderr, "MISMATCH\n");
653 for(j = 0; j < 4; ++j) {
654 char channel = "RGBA"[j];
655 fprintf(stderr, " Src%c: ", channel);
656 dump_vec(stderr, type, src + j*stride);
657 fprintf(stderr, "\n");
658
659 fprintf(stderr, " Dst%c: ", channel);
660 dump_vec(stderr, type, dst + j*stride);
661 fprintf(stderr, "\n");
662
663 fprintf(stderr, " Con%c: ", channel);
664 dump_vec(stderr, type, con + j*stride);
665 fprintf(stderr, "\n");
666
667 fprintf(stderr, " Res%c: ", channel);
668 dump_vec(stderr, type, res + j*stride);
669 fprintf(stderr, "\n");
670
671 fprintf(stderr, " Ref%c: ", channel);
672 dump_vec(stderr, type, ref + j*stride);
673 fprintf(stderr, "\n");
674 }
675 }
676 }
677 }
678
679 /*
680 * Unfortunately the output of cycle counter is not very reliable as it comes
681 * -- sometimes we get outliers (due IRQs perhaps?) which are
682 * better removed to avoid random or biased data.
683 */
684 {
685 double sum = 0.0, sum2 = 0.0;
686 double avg, std;
687 unsigned m;
688
689 for(i = 0; i < n; ++i) {
690 sum += cycles[i];
691 sum2 += cycles[i]*cycles[i];
692 }
693
694 avg = sum/n;
695 std = sqrtf((sum2 - n*avg*avg)/n);
696
697 m = 0;
698 sum = 0.0;
699 for(i = 0; i < n; ++i) {
700 if(fabs(cycles[i] - avg) <= 4.0*std) {
701 sum += cycles[i];
702 ++m;
703 }
704 }
705
706 cycles_avg = sum/m;
707
708 }
709
710 if(fp)
711 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
712
713 if (!success) {
714 if(verbose < 2)
715 LLVMDumpModule(module);
716 LLVMWriteBitcodeToFile(module, "blend.bc");
717 fprintf(stderr, "blend.bc written\n");
718 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
719 abort();
720 }
721
722 LLVMFreeMachineCodeForFunction(engine, func);
723
724 LLVMDisposeExecutionEngine(engine);
725 if(pass)
726 LLVMDisposePassManager(pass);
727
728 return success;
729 }
730
731
732 const unsigned
733 blend_factors[] = {
734 PIPE_BLENDFACTOR_ZERO,
735 PIPE_BLENDFACTOR_ONE,
736 PIPE_BLENDFACTOR_SRC_COLOR,
737 PIPE_BLENDFACTOR_SRC_ALPHA,
738 PIPE_BLENDFACTOR_DST_COLOR,
739 PIPE_BLENDFACTOR_DST_ALPHA,
740 PIPE_BLENDFACTOR_CONST_COLOR,
741 PIPE_BLENDFACTOR_CONST_ALPHA,
742 #if 0
743 PIPE_BLENDFACTOR_SRC1_COLOR,
744 PIPE_BLENDFACTOR_SRC1_ALPHA,
745 #endif
746 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
747 PIPE_BLENDFACTOR_INV_SRC_COLOR,
748 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
749 PIPE_BLENDFACTOR_INV_DST_COLOR,
750 PIPE_BLENDFACTOR_INV_DST_ALPHA,
751 PIPE_BLENDFACTOR_INV_CONST_COLOR,
752 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
753 #if 0
754 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
755 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
756 #endif
757 };
758
759
760 const unsigned
761 blend_funcs[] = {
762 PIPE_BLEND_ADD,
763 PIPE_BLEND_SUBTRACT,
764 PIPE_BLEND_REVERSE_SUBTRACT,
765 PIPE_BLEND_MIN,
766 PIPE_BLEND_MAX
767 };
768
769
770 const struct lp_type blend_types[] = {
771 /* float, fixed, sign, norm, width, len */
772 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
773 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
774 };
775
776
777 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
778 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
779 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
780
781
782 boolean
783 test_all(unsigned verbose, FILE *fp)
784 {
785 const unsigned *rgb_func;
786 const unsigned *rgb_src_factor;
787 const unsigned *rgb_dst_factor;
788 const unsigned *alpha_func;
789 const unsigned *alpha_src_factor;
790 const unsigned *alpha_dst_factor;
791 struct pipe_blend_state blend;
792 enum vector_mode mode;
793 const struct lp_type *type;
794 bool success = TRUE;
795
796 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
797 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
798 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
799 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
800 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
801 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
802 for(mode = 0; mode < 2; ++mode) {
803 for(type = blend_types; type < &blend_types[num_types]; ++type) {
804
805 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
806 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
807 continue;
808
809 memset(&blend, 0, sizeof blend);
810 blend.rt[0].blend_enable = 1;
811 blend.rt[0].rgb_func = *rgb_func;
812 blend.rt[0].rgb_src_factor = *rgb_src_factor;
813 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
814 blend.rt[0].alpha_func = *alpha_func;
815 blend.rt[0].alpha_src_factor = *alpha_src_factor;
816 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
817 blend.rt[0].colormask = PIPE_MASK_RGBA;
818
819 if(!test_one(verbose, fp, &blend, mode, *type))
820 success = FALSE;
821
822 }
823 }
824 }
825 }
826 }
827 }
828 }
829 }
830
831 return success;
832 }
833
834
835 boolean
836 test_some(unsigned verbose, FILE *fp, unsigned long n)
837 {
838 const unsigned *rgb_func;
839 const unsigned *rgb_src_factor;
840 const unsigned *rgb_dst_factor;
841 const unsigned *alpha_func;
842 const unsigned *alpha_src_factor;
843 const unsigned *alpha_dst_factor;
844 struct pipe_blend_state blend;
845 enum vector_mode mode;
846 const struct lp_type *type;
847 unsigned long i;
848 bool success = TRUE;
849
850 for(i = 0; i < n; ++i) {
851 rgb_func = &blend_funcs[rand() % num_funcs];
852 alpha_func = &blend_funcs[rand() % num_funcs];
853 rgb_src_factor = &blend_factors[rand() % num_factors];
854 alpha_src_factor = &blend_factors[rand() % num_factors];
855
856 do {
857 rgb_dst_factor = &blend_factors[rand() % num_factors];
858 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
859
860 do {
861 alpha_dst_factor = &blend_factors[rand() % num_factors];
862 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
863
864 mode = rand() & 1;
865
866 type = &blend_types[rand() % num_types];
867
868 memset(&blend, 0, sizeof blend);
869 blend.rt[0].blend_enable = 1;
870 blend.rt[0].rgb_func = *rgb_func;
871 blend.rt[0].rgb_src_factor = *rgb_src_factor;
872 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
873 blend.rt[0].alpha_func = *alpha_func;
874 blend.rt[0].alpha_src_factor = *alpha_src_factor;
875 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
876 blend.rt[0].colormask = PIPE_MASK_RGBA;
877
878 if(!test_one(verbose, fp, &blend, mode, *type))
879 success = FALSE;
880 }
881
882 return success;
883 }