Merge branch '7.8'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "gallivm/lp_bld_type.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "lp_bld_blend.h"
43 #include "lp_test.h"
44
45
46 enum vector_mode
47 {
48 AoS = 0,
49 SoA = 1
50 };
51
52
53 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
54
55
56 void
57 write_tsv_header(FILE *fp)
58 {
59 fprintf(fp,
60 "result\t"
61 "cycles_per_channel\t"
62 "mode\t"
63 "type\t"
64 "sep_func\t"
65 "sep_src_factor\t"
66 "sep_dst_factor\t"
67 "rgb_func\t"
68 "rgb_src_factor\t"
69 "rgb_dst_factor\t"
70 "alpha_func\t"
71 "alpha_src_factor\t"
72 "alpha_dst_factor\n");
73
74 fflush(fp);
75 }
76
77
78 static void
79 write_tsv_row(FILE *fp,
80 const struct pipe_blend_state *blend,
81 enum vector_mode mode,
82 struct lp_type type,
83 double cycles,
84 boolean success)
85 {
86 fprintf(fp, "%s\t", success ? "pass" : "fail");
87
88 if (mode == AoS) {
89 fprintf(fp, "%.1f\t", cycles / type.length);
90 fprintf(fp, "aos\t");
91 }
92
93 if (mode == SoA) {
94 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
95 fprintf(fp, "soa\t");
96 }
97
98 fprintf(fp, "%s%u%sx%u\t",
99 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
100 type.width,
101 type.norm ? "n" : "",
102 type.length);
103
104 fprintf(fp,
105 "%s\t%s\t%s\t",
106 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
107 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
108 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
109
110 fprintf(fp,
111 "%s\t%s\t%s\t%s\t%s\t%s\n",
112 util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
113 util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
114 util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
115 util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
116 util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
117 util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
118
119 fflush(fp);
120 }
121
122
123 static void
124 dump_blend_type(FILE *fp,
125 const struct pipe_blend_state *blend,
126 enum vector_mode mode,
127 struct lp_type type)
128 {
129 fprintf(fp, "%s", mode ? "soa" : "aos");
130
131 fprintf(fp, " type=%s%u%sx%u",
132 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
133 type.width,
134 type.norm ? "n" : "",
135 type.length);
136
137 fprintf(fp,
138 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
139 "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
140 "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
141 "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
142 "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
143 "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
144 "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
145
146 fprintf(fp, " ...\n");
147 fflush(fp);
148 }
149
150
151 static LLVMValueRef
152 add_blend_test(LLVMModuleRef module,
153 const struct pipe_blend_state *blend,
154 enum vector_mode mode,
155 struct lp_type type)
156 {
157 LLVMTypeRef vec_type;
158 LLVMTypeRef args[4];
159 LLVMValueRef func;
160 LLVMValueRef src_ptr;
161 LLVMValueRef dst_ptr;
162 LLVMValueRef const_ptr;
163 LLVMValueRef res_ptr;
164 LLVMBasicBlockRef block;
165 LLVMBuilderRef builder;
166
167 vec_type = lp_build_vec_type(type);
168
169 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
170 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
171 LLVMSetFunctionCallConv(func, LLVMCCallConv);
172 src_ptr = LLVMGetParam(func, 0);
173 dst_ptr = LLVMGetParam(func, 1);
174 const_ptr = LLVMGetParam(func, 2);
175 res_ptr = LLVMGetParam(func, 3);
176
177 block = LLVMAppendBasicBlock(func, "entry");
178 builder = LLVMCreateBuilder();
179 LLVMPositionBuilderAtEnd(builder, block);
180
181 if (mode == AoS) {
182 LLVMValueRef src;
183 LLVMValueRef dst;
184 LLVMValueRef con;
185 LLVMValueRef res;
186
187 src = LLVMBuildLoad(builder, src_ptr, "src");
188 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
189 con = LLVMBuildLoad(builder, const_ptr, "const");
190
191 res = lp_build_blend_aos(builder, blend, type, src, dst, con, 3);
192
193 lp_build_name(res, "res");
194
195 LLVMBuildStore(builder, res, res_ptr);
196 }
197
198 if (mode == SoA) {
199 LLVMValueRef src[4];
200 LLVMValueRef dst[4];
201 LLVMValueRef con[4];
202 LLVMValueRef res[4];
203 unsigned i;
204
205 for(i = 0; i < 4; ++i) {
206 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
207 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
208 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
209 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
210 lp_build_name(src[i], "src.%c", "rgba"[i]);
211 lp_build_name(con[i], "con.%c", "rgba"[i]);
212 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
213 }
214
215 lp_build_blend_soa(builder, blend, type, src, dst, con, res);
216
217 for(i = 0; i < 4; ++i) {
218 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
219 lp_build_name(res[i], "res.%c", "rgba"[i]);
220 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
221 }
222 }
223
224 LLVMBuildRetVoid(builder);;
225
226 LLVMDisposeBuilder(builder);
227 return func;
228 }
229
230
231 /** Add and limit result to ceiling of 1.0 */
232 #define ADD_SAT(R, A, B) \
233 do { \
234 R = (A) + (B); if (R > 1.0f) R = 1.0f; \
235 } while (0)
236
237 /** Subtract and limit result to floor of 0.0 */
238 #define SUB_SAT(R, A, B) \
239 do { \
240 R = (A) - (B); if (R < 0.0f) R = 0.0f; \
241 } while (0)
242
243
244 static void
245 compute_blend_ref_term(unsigned rgb_factor,
246 unsigned alpha_factor,
247 const double *factor,
248 const double *src,
249 const double *dst,
250 const double *con,
251 double *term)
252 {
253 double temp;
254
255 switch (rgb_factor) {
256 case PIPE_BLENDFACTOR_ONE:
257 term[0] = factor[0]; /* R */
258 term[1] = factor[1]; /* G */
259 term[2] = factor[2]; /* B */
260 break;
261 case PIPE_BLENDFACTOR_SRC_COLOR:
262 term[0] = factor[0] * src[0]; /* R */
263 term[1] = factor[1] * src[1]; /* G */
264 term[2] = factor[2] * src[2]; /* B */
265 break;
266 case PIPE_BLENDFACTOR_SRC_ALPHA:
267 term[0] = factor[0] * src[3]; /* R */
268 term[1] = factor[1] * src[3]; /* G */
269 term[2] = factor[2] * src[3]; /* B */
270 break;
271 case PIPE_BLENDFACTOR_DST_COLOR:
272 term[0] = factor[0] * dst[0]; /* R */
273 term[1] = factor[1] * dst[1]; /* G */
274 term[2] = factor[2] * dst[2]; /* B */
275 break;
276 case PIPE_BLENDFACTOR_DST_ALPHA:
277 term[0] = factor[0] * dst[3]; /* R */
278 term[1] = factor[1] * dst[3]; /* G */
279 term[2] = factor[2] * dst[3]; /* B */
280 break;
281 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
282 temp = MIN2(src[3], 1.0f - dst[3]);
283 term[0] = factor[0] * temp; /* R */
284 term[1] = factor[1] * temp; /* G */
285 term[2] = factor[2] * temp; /* B */
286 break;
287 case PIPE_BLENDFACTOR_CONST_COLOR:
288 term[0] = factor[0] * con[0]; /* R */
289 term[1] = factor[1] * con[1]; /* G */
290 term[2] = factor[2] * con[2]; /* B */
291 break;
292 case PIPE_BLENDFACTOR_CONST_ALPHA:
293 term[0] = factor[0] * con[3]; /* R */
294 term[1] = factor[1] * con[3]; /* G */
295 term[2] = factor[2] * con[3]; /* B */
296 break;
297 case PIPE_BLENDFACTOR_SRC1_COLOR:
298 assert(0); /* to do */
299 break;
300 case PIPE_BLENDFACTOR_SRC1_ALPHA:
301 assert(0); /* to do */
302 break;
303 case PIPE_BLENDFACTOR_ZERO:
304 term[0] = 0.0f; /* R */
305 term[1] = 0.0f; /* G */
306 term[2] = 0.0f; /* B */
307 break;
308 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
309 term[0] = factor[0] * (1.0f - src[0]); /* R */
310 term[1] = factor[1] * (1.0f - src[1]); /* G */
311 term[2] = factor[2] * (1.0f - src[2]); /* B */
312 break;
313 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
314 term[0] = factor[0] * (1.0f - src[3]); /* R */
315 term[1] = factor[1] * (1.0f - src[3]); /* G */
316 term[2] = factor[2] * (1.0f - src[3]); /* B */
317 break;
318 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
319 term[0] = factor[0] * (1.0f - dst[3]); /* R */
320 term[1] = factor[1] * (1.0f - dst[3]); /* G */
321 term[2] = factor[2] * (1.0f - dst[3]); /* B */
322 break;
323 case PIPE_BLENDFACTOR_INV_DST_COLOR:
324 term[0] = factor[0] * (1.0f - dst[0]); /* R */
325 term[1] = factor[1] * (1.0f - dst[1]); /* G */
326 term[2] = factor[2] * (1.0f - dst[2]); /* B */
327 break;
328 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
329 term[0] = factor[0] * (1.0f - con[0]); /* R */
330 term[1] = factor[1] * (1.0f - con[1]); /* G */
331 term[2] = factor[2] * (1.0f - con[2]); /* B */
332 break;
333 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
334 term[0] = factor[0] * (1.0f - con[3]); /* R */
335 term[1] = factor[1] * (1.0f - con[3]); /* G */
336 term[2] = factor[2] * (1.0f - con[3]); /* B */
337 break;
338 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
339 assert(0); /* to do */
340 break;
341 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
342 assert(0); /* to do */
343 break;
344 default:
345 assert(0);
346 }
347
348 /*
349 * Compute src/first term A
350 */
351 switch (alpha_factor) {
352 case PIPE_BLENDFACTOR_ONE:
353 term[3] = factor[3]; /* A */
354 break;
355 case PIPE_BLENDFACTOR_SRC_COLOR:
356 case PIPE_BLENDFACTOR_SRC_ALPHA:
357 term[3] = factor[3] * src[3]; /* A */
358 break;
359 case PIPE_BLENDFACTOR_DST_COLOR:
360 case PIPE_BLENDFACTOR_DST_ALPHA:
361 term[3] = factor[3] * dst[3]; /* A */
362 break;
363 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
364 term[3] = src[3]; /* A */
365 break;
366 case PIPE_BLENDFACTOR_CONST_COLOR:
367 case PIPE_BLENDFACTOR_CONST_ALPHA:
368 term[3] = factor[3] * con[3]; /* A */
369 break;
370 case PIPE_BLENDFACTOR_ZERO:
371 term[3] = 0.0f; /* A */
372 break;
373 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
374 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
375 term[3] = factor[3] * (1.0f - src[3]); /* A */
376 break;
377 case PIPE_BLENDFACTOR_INV_DST_COLOR:
378 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
379 term[3] = factor[3] * (1.0f - dst[3]); /* A */
380 break;
381 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
382 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
383 term[3] = factor[3] * (1.0f - con[3]);
384 break;
385 default:
386 assert(0);
387 }
388 }
389
390
391 static void
392 compute_blend_ref(const struct pipe_blend_state *blend,
393 const double *src,
394 const double *dst,
395 const double *con,
396 double *res)
397 {
398 double src_term[4];
399 double dst_term[4];
400
401 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
402 src, src, dst, con, src_term);
403 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
404 dst, src, dst, con, dst_term);
405
406 /*
407 * Combine RGB terms
408 */
409 switch (blend->rt[0].rgb_func) {
410 case PIPE_BLEND_ADD:
411 ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
412 ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
413 ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
414 break;
415 case PIPE_BLEND_SUBTRACT:
416 SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
417 SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
418 SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
419 break;
420 case PIPE_BLEND_REVERSE_SUBTRACT:
421 SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
422 SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
423 SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
424 break;
425 case PIPE_BLEND_MIN:
426 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
427 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
428 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
429 break;
430 case PIPE_BLEND_MAX:
431 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
432 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
433 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
434 break;
435 default:
436 assert(0);
437 }
438
439 /*
440 * Combine A terms
441 */
442 switch (blend->rt[0].alpha_func) {
443 case PIPE_BLEND_ADD:
444 ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
445 break;
446 case PIPE_BLEND_SUBTRACT:
447 SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
448 break;
449 case PIPE_BLEND_REVERSE_SUBTRACT:
450 SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
451 break;
452 case PIPE_BLEND_MIN:
453 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
454 break;
455 case PIPE_BLEND_MAX:
456 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
457 break;
458 default:
459 assert(0);
460 }
461 }
462
463
464 PIPE_ALIGN_STACK
465 static boolean
466 test_one(unsigned verbose,
467 FILE *fp,
468 const struct pipe_blend_state *blend,
469 enum vector_mode mode,
470 struct lp_type type)
471 {
472 LLVMModuleRef module = NULL;
473 LLVMValueRef func = NULL;
474 LLVMExecutionEngineRef engine = NULL;
475 LLVMModuleProviderRef provider = NULL;
476 LLVMPassManagerRef pass = NULL;
477 char *error = NULL;
478 blend_test_ptr_t blend_test_ptr;
479 boolean success;
480 const unsigned n = LP_TEST_NUM_SAMPLES;
481 int64_t cycles[LP_TEST_NUM_SAMPLES];
482 double cycles_avg = 0.0;
483 unsigned i, j;
484
485 if(verbose >= 1)
486 dump_blend_type(stdout, blend, mode, type);
487
488 module = LLVMModuleCreateWithName("test");
489
490 func = add_blend_test(module, blend, mode, type);
491
492 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
493 LLVMDumpModule(module);
494 abort();
495 }
496 LLVMDisposeMessage(error);
497
498 provider = LLVMCreateModuleProviderForExistingModule(module);
499 if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
500 if(verbose < 1)
501 dump_blend_type(stderr, blend, mode, type);
502 fprintf(stderr, "%s\n", error);
503 LLVMDisposeMessage(error);
504 abort();
505 }
506
507 #if 0
508 pass = LLVMCreatePassManager();
509 LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
510 /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
511 * but there are more on SVN. */
512 LLVMAddConstantPropagationPass(pass);
513 LLVMAddInstructionCombiningPass(pass);
514 LLVMAddPromoteMemoryToRegisterPass(pass);
515 LLVMAddGVNPass(pass);
516 LLVMAddCFGSimplificationPass(pass);
517 LLVMRunPassManager(pass, module);
518 #else
519 (void)pass;
520 #endif
521
522 if(verbose >= 2)
523 LLVMDumpModule(module);
524
525 blend_test_ptr = (blend_test_ptr_t)LLVMGetPointerToGlobal(engine, func);
526
527 if(verbose >= 2)
528 lp_disassemble(blend_test_ptr);
529
530 success = TRUE;
531 for(i = 0; i < n && success; ++i) {
532 if(mode == AoS) {
533 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
534 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
535 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
536 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
537 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
538 int64_t start_counter = 0;
539 int64_t end_counter = 0;
540
541 random_vec(type, src);
542 random_vec(type, dst);
543 random_vec(type, con);
544
545 {
546 double fsrc[LP_MAX_VECTOR_LENGTH];
547 double fdst[LP_MAX_VECTOR_LENGTH];
548 double fcon[LP_MAX_VECTOR_LENGTH];
549 double fref[LP_MAX_VECTOR_LENGTH];
550
551 read_vec(type, src, fsrc);
552 read_vec(type, dst, fdst);
553 read_vec(type, con, fcon);
554
555 for(j = 0; j < type.length; j += 4)
556 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
557
558 write_vec(type, ref, fref);
559 }
560
561 start_counter = rdtsc();
562 blend_test_ptr(src, dst, con, res);
563 end_counter = rdtsc();
564
565 cycles[i] = end_counter - start_counter;
566
567 if(!compare_vec(type, res, ref)) {
568 success = FALSE;
569
570 if(verbose < 1)
571 dump_blend_type(stderr, blend, mode, type);
572 fprintf(stderr, "MISMATCH\n");
573
574 fprintf(stderr, " Src: ");
575 dump_vec(stderr, type, src);
576 fprintf(stderr, "\n");
577
578 fprintf(stderr, " Dst: ");
579 dump_vec(stderr, type, dst);
580 fprintf(stderr, "\n");
581
582 fprintf(stderr, " Con: ");
583 dump_vec(stderr, type, con);
584 fprintf(stderr, "\n");
585
586 fprintf(stderr, " Res: ");
587 dump_vec(stderr, type, res);
588 fprintf(stderr, "\n");
589
590 fprintf(stderr, " Ref: ");
591 dump_vec(stderr, type, ref);
592 fprintf(stderr, "\n");
593 }
594 }
595
596 if(mode == SoA) {
597 const unsigned stride = type.length*type.width/8;
598 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
599 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
600 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
601 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
602 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
603 int64_t start_counter = 0;
604 int64_t end_counter = 0;
605 boolean mismatch;
606
607 for(j = 0; j < 4; ++j) {
608 random_vec(type, src + j*stride);
609 random_vec(type, dst + j*stride);
610 random_vec(type, con + j*stride);
611 }
612
613 {
614 double fsrc[4];
615 double fdst[4];
616 double fcon[4];
617 double fref[4];
618 unsigned k;
619
620 for(k = 0; k < type.length; ++k) {
621 for(j = 0; j < 4; ++j) {
622 fsrc[j] = read_elem(type, src + j*stride, k);
623 fdst[j] = read_elem(type, dst + j*stride, k);
624 fcon[j] = read_elem(type, con + j*stride, k);
625 }
626
627 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
628
629 for(j = 0; j < 4; ++j)
630 write_elem(type, ref + j*stride, k, fref[j]);
631 }
632 }
633
634 start_counter = rdtsc();
635 blend_test_ptr(src, dst, con, res);
636 end_counter = rdtsc();
637
638 cycles[i] = end_counter - start_counter;
639
640 mismatch = FALSE;
641 for (j = 0; j < 4; ++j)
642 if(!compare_vec(type, res + j*stride, ref + j*stride))
643 mismatch = TRUE;
644
645 if (mismatch) {
646 success = FALSE;
647
648 if(verbose < 1)
649 dump_blend_type(stderr, blend, mode, type);
650 fprintf(stderr, "MISMATCH\n");
651 for(j = 0; j < 4; ++j) {
652 char channel = "RGBA"[j];
653 fprintf(stderr, " Src%c: ", channel);
654 dump_vec(stderr, type, src + j*stride);
655 fprintf(stderr, "\n");
656
657 fprintf(stderr, " Dst%c: ", channel);
658 dump_vec(stderr, type, dst + j*stride);
659 fprintf(stderr, "\n");
660
661 fprintf(stderr, " Con%c: ", channel);
662 dump_vec(stderr, type, con + j*stride);
663 fprintf(stderr, "\n");
664
665 fprintf(stderr, " Res%c: ", channel);
666 dump_vec(stderr, type, res + j*stride);
667 fprintf(stderr, "\n");
668
669 fprintf(stderr, " Ref%c: ", channel);
670 dump_vec(stderr, type, ref + j*stride);
671 fprintf(stderr, "\n");
672 }
673 }
674 }
675 }
676
677 /*
678 * Unfortunately the output of cycle counter is not very reliable as it comes
679 * -- sometimes we get outliers (due IRQs perhaps?) which are
680 * better removed to avoid random or biased data.
681 */
682 {
683 double sum = 0.0, sum2 = 0.0;
684 double avg, std;
685 unsigned m;
686
687 for(i = 0; i < n; ++i) {
688 sum += cycles[i];
689 sum2 += cycles[i]*cycles[i];
690 }
691
692 avg = sum/n;
693 std = sqrtf((sum2 - n*avg*avg)/n);
694
695 m = 0;
696 sum = 0.0;
697 for(i = 0; i < n; ++i) {
698 if(fabs(cycles[i] - avg) <= 4.0*std) {
699 sum += cycles[i];
700 ++m;
701 }
702 }
703
704 cycles_avg = sum/m;
705
706 }
707
708 if(fp)
709 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
710
711 if (!success) {
712 if(verbose < 2)
713 LLVMDumpModule(module);
714 LLVMWriteBitcodeToFile(module, "blend.bc");
715 fprintf(stderr, "blend.bc written\n");
716 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
717 abort();
718 }
719
720 LLVMFreeMachineCodeForFunction(engine, func);
721
722 LLVMDisposeExecutionEngine(engine);
723 if(pass)
724 LLVMDisposePassManager(pass);
725
726 return success;
727 }
728
729
730 const unsigned
731 blend_factors[] = {
732 PIPE_BLENDFACTOR_ZERO,
733 PIPE_BLENDFACTOR_ONE,
734 PIPE_BLENDFACTOR_SRC_COLOR,
735 PIPE_BLENDFACTOR_SRC_ALPHA,
736 PIPE_BLENDFACTOR_DST_COLOR,
737 PIPE_BLENDFACTOR_DST_ALPHA,
738 PIPE_BLENDFACTOR_CONST_COLOR,
739 PIPE_BLENDFACTOR_CONST_ALPHA,
740 #if 0
741 PIPE_BLENDFACTOR_SRC1_COLOR,
742 PIPE_BLENDFACTOR_SRC1_ALPHA,
743 #endif
744 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
745 PIPE_BLENDFACTOR_INV_SRC_COLOR,
746 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
747 PIPE_BLENDFACTOR_INV_DST_COLOR,
748 PIPE_BLENDFACTOR_INV_DST_ALPHA,
749 PIPE_BLENDFACTOR_INV_CONST_COLOR,
750 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
751 #if 0
752 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
753 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
754 #endif
755 };
756
757
758 const unsigned
759 blend_funcs[] = {
760 PIPE_BLEND_ADD,
761 PIPE_BLEND_SUBTRACT,
762 PIPE_BLEND_REVERSE_SUBTRACT,
763 PIPE_BLEND_MIN,
764 PIPE_BLEND_MAX
765 };
766
767
768 const struct lp_type blend_types[] = {
769 /* float, fixed, sign, norm, width, len */
770 { TRUE, FALSE, FALSE, TRUE, 32, 4 }, /* f32 x 4 */
771 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
772 };
773
774
775 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
776 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
777 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
778
779
780 boolean
781 test_all(unsigned verbose, FILE *fp)
782 {
783 const unsigned *rgb_func;
784 const unsigned *rgb_src_factor;
785 const unsigned *rgb_dst_factor;
786 const unsigned *alpha_func;
787 const unsigned *alpha_src_factor;
788 const unsigned *alpha_dst_factor;
789 struct pipe_blend_state blend;
790 enum vector_mode mode;
791 const struct lp_type *type;
792 bool success = TRUE;
793
794 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
795 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
796 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
797 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
798 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
799 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
800 for(mode = 0; mode < 2; ++mode) {
801 for(type = blend_types; type < &blend_types[num_types]; ++type) {
802
803 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
804 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
805 continue;
806
807 memset(&blend, 0, sizeof blend);
808 blend.rt[0].blend_enable = 1;
809 blend.rt[0].rgb_func = *rgb_func;
810 blend.rt[0].rgb_src_factor = *rgb_src_factor;
811 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
812 blend.rt[0].alpha_func = *alpha_func;
813 blend.rt[0].alpha_src_factor = *alpha_src_factor;
814 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
815 blend.rt[0].colormask = PIPE_MASK_RGBA;
816
817 if(!test_one(verbose, fp, &blend, mode, *type))
818 success = FALSE;
819
820 }
821 }
822 }
823 }
824 }
825 }
826 }
827 }
828
829 return success;
830 }
831
832
833 boolean
834 test_some(unsigned verbose, FILE *fp, unsigned long n)
835 {
836 const unsigned *rgb_func;
837 const unsigned *rgb_src_factor;
838 const unsigned *rgb_dst_factor;
839 const unsigned *alpha_func;
840 const unsigned *alpha_src_factor;
841 const unsigned *alpha_dst_factor;
842 struct pipe_blend_state blend;
843 enum vector_mode mode;
844 const struct lp_type *type;
845 unsigned long i;
846 bool success = TRUE;
847
848 for(i = 0; i < n; ++i) {
849 rgb_func = &blend_funcs[rand() % num_funcs];
850 alpha_func = &blend_funcs[rand() % num_funcs];
851 rgb_src_factor = &blend_factors[rand() % num_factors];
852 alpha_src_factor = &blend_factors[rand() % num_factors];
853
854 do {
855 rgb_dst_factor = &blend_factors[rand() % num_factors];
856 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
857
858 do {
859 alpha_dst_factor = &blend_factors[rand() % num_factors];
860 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
861
862 mode = rand() & 1;
863
864 type = &blend_types[rand() % num_types];
865
866 memset(&blend, 0, sizeof blend);
867 blend.rt[0].blend_enable = 1;
868 blend.rt[0].rgb_func = *rgb_func;
869 blend.rt[0].rgb_src_factor = *rgb_src_factor;
870 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
871 blend.rt[0].alpha_func = *alpha_func;
872 blend.rt[0].alpha_src_factor = *alpha_src_factor;
873 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
874 blend.rt[0].colormask = PIPE_MASK_RGBA;
875
876 if(!test_one(verbose, fp, &blend, mode, *type))
877 success = FALSE;
878 }
879
880 return success;
881 }