Merge branch 'gallium-userbuf'
[mesa.git] / src / gallium / drivers / llvmpipe / lp_test_blend.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * @file
31 * Unit tests for blend LLVM IR generation
32 *
33 * @author Jose Fonseca <jfonseca@vmware.com>
34 *
35 * Blend computation code derived from code written by
36 * @author Brian Paul <brian@vmware.com>
37 */
38
39
40 #include "gallivm/lp_bld_init.h"
41 #include "gallivm/lp_bld_type.h"
42 #include "gallivm/lp_bld_debug.h"
43 #include "lp_bld_blend.h"
44 #include "lp_test.h"
45
46
47 enum vector_mode
48 {
49 AoS = 0,
50 SoA = 1
51 };
52
53
54 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
55
56 /** cast wrapper */
57 static blend_test_ptr_t
58 voidptr_to_blend_test_ptr_t(void *p)
59 {
60 union {
61 void *v;
62 blend_test_ptr_t f;
63 } u;
64 u.v = p;
65 return u.f;
66 }
67
68
69
70 void
71 write_tsv_header(FILE *fp)
72 {
73 fprintf(fp,
74 "result\t"
75 "cycles_per_channel\t"
76 "mode\t"
77 "type\t"
78 "sep_func\t"
79 "sep_src_factor\t"
80 "sep_dst_factor\t"
81 "rgb_func\t"
82 "rgb_src_factor\t"
83 "rgb_dst_factor\t"
84 "alpha_func\t"
85 "alpha_src_factor\t"
86 "alpha_dst_factor\n");
87
88 fflush(fp);
89 }
90
91
92 static void
93 write_tsv_row(FILE *fp,
94 const struct pipe_blend_state *blend,
95 enum vector_mode mode,
96 struct lp_type type,
97 double cycles,
98 boolean success)
99 {
100 fprintf(fp, "%s\t", success ? "pass" : "fail");
101
102 if (mode == AoS) {
103 fprintf(fp, "%.1f\t", cycles / type.length);
104 fprintf(fp, "aos\t");
105 }
106
107 if (mode == SoA) {
108 fprintf(fp, "%.1f\t", cycles / (4 * type.length));
109 fprintf(fp, "soa\t");
110 }
111
112 fprintf(fp, "%s%u%sx%u\t",
113 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
114 type.width,
115 type.norm ? "n" : "",
116 type.length);
117
118 fprintf(fp,
119 "%s\t%s\t%s\t",
120 blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
121 blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
122 blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
123
124 fprintf(fp,
125 "%s\t%s\t%s\t%s\t%s\t%s\n",
126 util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
127 util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
128 util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
129 util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
130 util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
131 util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
132
133 fflush(fp);
134 }
135
136
137 static void
138 dump_blend_type(FILE *fp,
139 const struct pipe_blend_state *blend,
140 enum vector_mode mode,
141 struct lp_type type)
142 {
143 fprintf(fp, "%s", mode ? "soa" : "aos");
144
145 fprintf(fp, " type=%s%u%sx%u",
146 type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
147 type.width,
148 type.norm ? "n" : "",
149 type.length);
150
151 fprintf(fp,
152 " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
153 "rgb_func", util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
154 "rgb_src_factor", util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
155 "rgb_dst_factor", util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
156 "alpha_func", util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
157 "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
158 "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
159
160 fprintf(fp, " ...\n");
161 fflush(fp);
162 }
163
164
165 static LLVMValueRef
166 add_blend_test(struct gallivm_state *gallivm,
167 const struct pipe_blend_state *blend,
168 enum vector_mode mode,
169 struct lp_type type)
170 {
171 LLVMModuleRef module = gallivm->module;
172 LLVMContextRef context = gallivm->context;
173 LLVMTypeRef vec_type;
174 LLVMTypeRef args[4];
175 LLVMValueRef func;
176 LLVMValueRef src_ptr;
177 LLVMValueRef dst_ptr;
178 LLVMValueRef const_ptr;
179 LLVMValueRef res_ptr;
180 LLVMBasicBlockRef block;
181 LLVMBuilderRef builder;
182 const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM;
183 const unsigned rt = 0;
184 const unsigned char swizzle[4] = { 0, 1, 2, 3 };
185
186 vec_type = lp_build_vec_type(gallivm, type);
187
188 args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
189 func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidTypeInContext(context), args, 4, 0));
190 LLVMSetFunctionCallConv(func, LLVMCCallConv);
191 src_ptr = LLVMGetParam(func, 0);
192 dst_ptr = LLVMGetParam(func, 1);
193 const_ptr = LLVMGetParam(func, 2);
194 res_ptr = LLVMGetParam(func, 3);
195
196 block = LLVMAppendBasicBlockInContext(context, func, "entry");
197 builder = gallivm->builder;
198 LLVMPositionBuilderAtEnd(builder, block);
199
200 if (mode == AoS) {
201 LLVMValueRef src;
202 LLVMValueRef dst;
203 LLVMValueRef con;
204 LLVMValueRef res;
205
206 src = LLVMBuildLoad(builder, src_ptr, "src");
207 dst = LLVMBuildLoad(builder, dst_ptr, "dst");
208 con = LLVMBuildLoad(builder, const_ptr, "const");
209
210 res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle);
211
212 lp_build_name(res, "res");
213
214 LLVMBuildStore(builder, res, res_ptr);
215 }
216
217 if (mode == SoA) {
218 LLVMValueRef src[4];
219 LLVMValueRef dst[4];
220 LLVMValueRef con[4];
221 LLVMValueRef res[4];
222 unsigned i;
223
224 for(i = 0; i < 4; ++i) {
225 LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
226 src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
227 dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
228 con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
229 lp_build_name(src[i], "src.%c", "rgba"[i]);
230 lp_build_name(con[i], "con.%c", "rgba"[i]);
231 lp_build_name(dst[i], "dst.%c", "rgba"[i]);
232 }
233
234 lp_build_blend_soa(gallivm, blend, type, rt, src, dst, con, res);
235
236 for(i = 0; i < 4; ++i) {
237 LLVMValueRef index = LLVMConstInt(LLVMInt32TypeInContext(context), i, 0);
238 lp_build_name(res[i], "res.%c", "rgba"[i]);
239 LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
240 }
241 }
242
243 LLVMBuildRetVoid(builder);;
244
245 return func;
246 }
247
248
249 static void
250 compute_blend_ref_term(unsigned rgb_factor,
251 unsigned alpha_factor,
252 const double *factor,
253 const double *src,
254 const double *dst,
255 const double *con,
256 double *term)
257 {
258 double temp;
259
260 switch (rgb_factor) {
261 case PIPE_BLENDFACTOR_ONE:
262 term[0] = factor[0]; /* R */
263 term[1] = factor[1]; /* G */
264 term[2] = factor[2]; /* B */
265 break;
266 case PIPE_BLENDFACTOR_SRC_COLOR:
267 term[0] = factor[0] * src[0]; /* R */
268 term[1] = factor[1] * src[1]; /* G */
269 term[2] = factor[2] * src[2]; /* B */
270 break;
271 case PIPE_BLENDFACTOR_SRC_ALPHA:
272 term[0] = factor[0] * src[3]; /* R */
273 term[1] = factor[1] * src[3]; /* G */
274 term[2] = factor[2] * src[3]; /* B */
275 break;
276 case PIPE_BLENDFACTOR_DST_COLOR:
277 term[0] = factor[0] * dst[0]; /* R */
278 term[1] = factor[1] * dst[1]; /* G */
279 term[2] = factor[2] * dst[2]; /* B */
280 break;
281 case PIPE_BLENDFACTOR_DST_ALPHA:
282 term[0] = factor[0] * dst[3]; /* R */
283 term[1] = factor[1] * dst[3]; /* G */
284 term[2] = factor[2] * dst[3]; /* B */
285 break;
286 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
287 temp = MIN2(src[3], 1.0f - dst[3]);
288 term[0] = factor[0] * temp; /* R */
289 term[1] = factor[1] * temp; /* G */
290 term[2] = factor[2] * temp; /* B */
291 break;
292 case PIPE_BLENDFACTOR_CONST_COLOR:
293 term[0] = factor[0] * con[0]; /* R */
294 term[1] = factor[1] * con[1]; /* G */
295 term[2] = factor[2] * con[2]; /* B */
296 break;
297 case PIPE_BLENDFACTOR_CONST_ALPHA:
298 term[0] = factor[0] * con[3]; /* R */
299 term[1] = factor[1] * con[3]; /* G */
300 term[2] = factor[2] * con[3]; /* B */
301 break;
302 case PIPE_BLENDFACTOR_SRC1_COLOR:
303 assert(0); /* to do */
304 break;
305 case PIPE_BLENDFACTOR_SRC1_ALPHA:
306 assert(0); /* to do */
307 break;
308 case PIPE_BLENDFACTOR_ZERO:
309 term[0] = 0.0f; /* R */
310 term[1] = 0.0f; /* G */
311 term[2] = 0.0f; /* B */
312 break;
313 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
314 term[0] = factor[0] * (1.0f - src[0]); /* R */
315 term[1] = factor[1] * (1.0f - src[1]); /* G */
316 term[2] = factor[2] * (1.0f - src[2]); /* B */
317 break;
318 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
319 term[0] = factor[0] * (1.0f - src[3]); /* R */
320 term[1] = factor[1] * (1.0f - src[3]); /* G */
321 term[2] = factor[2] * (1.0f - src[3]); /* B */
322 break;
323 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
324 term[0] = factor[0] * (1.0f - dst[3]); /* R */
325 term[1] = factor[1] * (1.0f - dst[3]); /* G */
326 term[2] = factor[2] * (1.0f - dst[3]); /* B */
327 break;
328 case PIPE_BLENDFACTOR_INV_DST_COLOR:
329 term[0] = factor[0] * (1.0f - dst[0]); /* R */
330 term[1] = factor[1] * (1.0f - dst[1]); /* G */
331 term[2] = factor[2] * (1.0f - dst[2]); /* B */
332 break;
333 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
334 term[0] = factor[0] * (1.0f - con[0]); /* R */
335 term[1] = factor[1] * (1.0f - con[1]); /* G */
336 term[2] = factor[2] * (1.0f - con[2]); /* B */
337 break;
338 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
339 term[0] = factor[0] * (1.0f - con[3]); /* R */
340 term[1] = factor[1] * (1.0f - con[3]); /* G */
341 term[2] = factor[2] * (1.0f - con[3]); /* B */
342 break;
343 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
344 assert(0); /* to do */
345 break;
346 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
347 assert(0); /* to do */
348 break;
349 default:
350 assert(0);
351 }
352
353 /*
354 * Compute src/first term A
355 */
356 switch (alpha_factor) {
357 case PIPE_BLENDFACTOR_ONE:
358 term[3] = factor[3]; /* A */
359 break;
360 case PIPE_BLENDFACTOR_SRC_COLOR:
361 case PIPE_BLENDFACTOR_SRC_ALPHA:
362 term[3] = factor[3] * src[3]; /* A */
363 break;
364 case PIPE_BLENDFACTOR_DST_COLOR:
365 case PIPE_BLENDFACTOR_DST_ALPHA:
366 term[3] = factor[3] * dst[3]; /* A */
367 break;
368 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
369 term[3] = src[3]; /* A */
370 break;
371 case PIPE_BLENDFACTOR_CONST_COLOR:
372 case PIPE_BLENDFACTOR_CONST_ALPHA:
373 term[3] = factor[3] * con[3]; /* A */
374 break;
375 case PIPE_BLENDFACTOR_ZERO:
376 term[3] = 0.0f; /* A */
377 break;
378 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
379 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
380 term[3] = factor[3] * (1.0f - src[3]); /* A */
381 break;
382 case PIPE_BLENDFACTOR_INV_DST_COLOR:
383 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
384 term[3] = factor[3] * (1.0f - dst[3]); /* A */
385 break;
386 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
387 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
388 term[3] = factor[3] * (1.0f - con[3]);
389 break;
390 default:
391 assert(0);
392 }
393 }
394
395
396 static void
397 compute_blend_ref(const struct pipe_blend_state *blend,
398 const double *src,
399 const double *dst,
400 const double *con,
401 double *res)
402 {
403 double src_term[4];
404 double dst_term[4];
405
406 compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
407 src, src, dst, con, src_term);
408 compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
409 dst, src, dst, con, dst_term);
410
411 /*
412 * Combine RGB terms
413 */
414 switch (blend->rt[0].rgb_func) {
415 case PIPE_BLEND_ADD:
416 res[0] = src_term[0] + dst_term[0]; /* R */
417 res[1] = src_term[1] + dst_term[1]; /* G */
418 res[2] = src_term[2] + dst_term[2]; /* B */
419 break;
420 case PIPE_BLEND_SUBTRACT:
421 res[0] = src_term[0] - dst_term[0]; /* R */
422 res[1] = src_term[1] - dst_term[1]; /* G */
423 res[2] = src_term[2] - dst_term[2]; /* B */
424 break;
425 case PIPE_BLEND_REVERSE_SUBTRACT:
426 res[0] = dst_term[0] - src_term[0]; /* R */
427 res[1] = dst_term[1] - src_term[1]; /* G */
428 res[2] = dst_term[2] - src_term[2]; /* B */
429 break;
430 case PIPE_BLEND_MIN:
431 res[0] = MIN2(src_term[0], dst_term[0]); /* R */
432 res[1] = MIN2(src_term[1], dst_term[1]); /* G */
433 res[2] = MIN2(src_term[2], dst_term[2]); /* B */
434 break;
435 case PIPE_BLEND_MAX:
436 res[0] = MAX2(src_term[0], dst_term[0]); /* R */
437 res[1] = MAX2(src_term[1], dst_term[1]); /* G */
438 res[2] = MAX2(src_term[2], dst_term[2]); /* B */
439 break;
440 default:
441 assert(0);
442 }
443
444 /*
445 * Combine A terms
446 */
447 switch (blend->rt[0].alpha_func) {
448 case PIPE_BLEND_ADD:
449 res[3] = src_term[3] + dst_term[3]; /* A */
450 break;
451 case PIPE_BLEND_SUBTRACT:
452 res[3] = src_term[3] - dst_term[3]; /* A */
453 break;
454 case PIPE_BLEND_REVERSE_SUBTRACT:
455 res[3] = dst_term[3] - src_term[3]; /* A */
456 break;
457 case PIPE_BLEND_MIN:
458 res[3] = MIN2(src_term[3], dst_term[3]); /* A */
459 break;
460 case PIPE_BLEND_MAX:
461 res[3] = MAX2(src_term[3], dst_term[3]); /* A */
462 break;
463 default:
464 assert(0);
465 }
466 }
467
468
469 PIPE_ALIGN_STACK
470 static boolean
471 test_one(struct gallivm_state *gallivm,
472 unsigned verbose,
473 FILE *fp,
474 const struct pipe_blend_state *blend,
475 enum vector_mode mode,
476 struct lp_type type)
477 {
478 LLVMModuleRef module = gallivm->module;
479 LLVMValueRef func = NULL;
480 LLVMExecutionEngineRef engine = gallivm->engine;
481 char *error = NULL;
482 blend_test_ptr_t blend_test_ptr;
483 boolean success;
484 const unsigned n = LP_TEST_NUM_SAMPLES;
485 int64_t cycles[LP_TEST_NUM_SAMPLES];
486 double cycles_avg = 0.0;
487 unsigned i, j;
488 void *code;
489
490 if(verbose >= 1)
491 dump_blend_type(stdout, blend, mode, type);
492
493 func = add_blend_test(gallivm, blend, mode, type);
494
495 if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
496 LLVMDumpModule(module);
497 abort();
498 }
499 LLVMDisposeMessage(error);
500
501 code = LLVMGetPointerToGlobal(engine, func);
502 blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
503
504 if(verbose >= 2)
505 lp_disassemble(code);
506
507 success = TRUE;
508 for(i = 0; i < n && success; ++i) {
509 if(mode == AoS) {
510 PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
511 PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
512 PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
513 PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
514 PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
515 int64_t start_counter = 0;
516 int64_t end_counter = 0;
517
518 random_vec(type, src);
519 random_vec(type, dst);
520 random_vec(type, con);
521
522 {
523 double fsrc[LP_MAX_VECTOR_LENGTH];
524 double fdst[LP_MAX_VECTOR_LENGTH];
525 double fcon[LP_MAX_VECTOR_LENGTH];
526 double fref[LP_MAX_VECTOR_LENGTH];
527
528 read_vec(type, src, fsrc);
529 read_vec(type, dst, fdst);
530 read_vec(type, con, fcon);
531
532 for(j = 0; j < type.length; j += 4)
533 compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
534
535 write_vec(type, ref, fref);
536 }
537
538 start_counter = rdtsc();
539 blend_test_ptr(src, dst, con, res);
540 end_counter = rdtsc();
541
542 cycles[i] = end_counter - start_counter;
543
544 if(!compare_vec(type, res, ref)) {
545 success = FALSE;
546
547 if(verbose < 1)
548 dump_blend_type(stderr, blend, mode, type);
549 fprintf(stderr, "MISMATCH\n");
550
551 fprintf(stderr, " Src: ");
552 dump_vec(stderr, type, src);
553 fprintf(stderr, "\n");
554
555 fprintf(stderr, " Dst: ");
556 dump_vec(stderr, type, dst);
557 fprintf(stderr, "\n");
558
559 fprintf(stderr, " Con: ");
560 dump_vec(stderr, type, con);
561 fprintf(stderr, "\n");
562
563 fprintf(stderr, " Res: ");
564 dump_vec(stderr, type, res);
565 fprintf(stderr, "\n");
566
567 fprintf(stderr, " Ref: ");
568 dump_vec(stderr, type, ref);
569 fprintf(stderr, "\n");
570 }
571 }
572
573 if(mode == SoA) {
574 const unsigned stride = type.length*type.width/8;
575 PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
576 PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
577 PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
578 PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
579 PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
580 int64_t start_counter = 0;
581 int64_t end_counter = 0;
582 boolean mismatch;
583
584 for(j = 0; j < 4; ++j) {
585 random_vec(type, src + j*stride);
586 random_vec(type, dst + j*stride);
587 random_vec(type, con + j*stride);
588 }
589
590 {
591 double fsrc[4];
592 double fdst[4];
593 double fcon[4];
594 double fref[4];
595 unsigned k;
596
597 for(k = 0; k < type.length; ++k) {
598 for(j = 0; j < 4; ++j) {
599 fsrc[j] = read_elem(type, src + j*stride, k);
600 fdst[j] = read_elem(type, dst + j*stride, k);
601 fcon[j] = read_elem(type, con + j*stride, k);
602 }
603
604 compute_blend_ref(blend, fsrc, fdst, fcon, fref);
605
606 for(j = 0; j < 4; ++j)
607 write_elem(type, ref + j*stride, k, fref[j]);
608 }
609 }
610
611 start_counter = rdtsc();
612 blend_test_ptr(src, dst, con, res);
613 end_counter = rdtsc();
614
615 cycles[i] = end_counter - start_counter;
616
617 mismatch = FALSE;
618 for (j = 0; j < 4; ++j)
619 if(!compare_vec(type, res + j*stride, ref + j*stride))
620 mismatch = TRUE;
621
622 if (mismatch) {
623 success = FALSE;
624
625 if(verbose < 1)
626 dump_blend_type(stderr, blend, mode, type);
627 fprintf(stderr, "MISMATCH\n");
628 for(j = 0; j < 4; ++j) {
629 char channel = "RGBA"[j];
630 fprintf(stderr, " Src%c: ", channel);
631 dump_vec(stderr, type, src + j*stride);
632 fprintf(stderr, "\n");
633
634 fprintf(stderr, " Dst%c: ", channel);
635 dump_vec(stderr, type, dst + j*stride);
636 fprintf(stderr, "\n");
637
638 fprintf(stderr, " Con%c: ", channel);
639 dump_vec(stderr, type, con + j*stride);
640 fprintf(stderr, "\n");
641
642 fprintf(stderr, " Res%c: ", channel);
643 dump_vec(stderr, type, res + j*stride);
644 fprintf(stderr, "\n");
645
646 fprintf(stderr, " Ref%c: ", channel);
647 dump_vec(stderr, type, ref + j*stride);
648 fprintf(stderr, "\n");
649
650 fprintf(stderr, "\n");
651 }
652 }
653 }
654 }
655
656 /*
657 * Unfortunately the output of cycle counter is not very reliable as it comes
658 * -- sometimes we get outliers (due IRQs perhaps?) which are
659 * better removed to avoid random or biased data.
660 */
661 {
662 double sum = 0.0, sum2 = 0.0;
663 double avg, std;
664 unsigned m;
665
666 for(i = 0; i < n; ++i) {
667 sum += cycles[i];
668 sum2 += cycles[i]*cycles[i];
669 }
670
671 avg = sum/n;
672 std = sqrtf((sum2 - n*avg*avg)/n);
673
674 m = 0;
675 sum = 0.0;
676 for(i = 0; i < n; ++i) {
677 if(fabs(cycles[i] - avg) <= 4.0*std) {
678 sum += cycles[i];
679 ++m;
680 }
681 }
682
683 cycles_avg = sum/m;
684
685 }
686
687 if(fp)
688 write_tsv_row(fp, blend, mode, type, cycles_avg, success);
689
690 if (!success) {
691 if(verbose < 2)
692 LLVMDumpModule(module);
693 LLVMWriteBitcodeToFile(module, "blend.bc");
694 fprintf(stderr, "blend.bc written\n");
695 fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
696 abort();
697 }
698
699 LLVMFreeMachineCodeForFunction(engine, func);
700
701 return success;
702 }
703
704
705 const unsigned
706 blend_factors[] = {
707 PIPE_BLENDFACTOR_ZERO,
708 PIPE_BLENDFACTOR_ONE,
709 PIPE_BLENDFACTOR_SRC_COLOR,
710 PIPE_BLENDFACTOR_SRC_ALPHA,
711 PIPE_BLENDFACTOR_DST_COLOR,
712 PIPE_BLENDFACTOR_DST_ALPHA,
713 PIPE_BLENDFACTOR_CONST_COLOR,
714 PIPE_BLENDFACTOR_CONST_ALPHA,
715 #if 0
716 PIPE_BLENDFACTOR_SRC1_COLOR,
717 PIPE_BLENDFACTOR_SRC1_ALPHA,
718 #endif
719 PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
720 PIPE_BLENDFACTOR_INV_SRC_COLOR,
721 PIPE_BLENDFACTOR_INV_SRC_ALPHA,
722 PIPE_BLENDFACTOR_INV_DST_COLOR,
723 PIPE_BLENDFACTOR_INV_DST_ALPHA,
724 PIPE_BLENDFACTOR_INV_CONST_COLOR,
725 PIPE_BLENDFACTOR_INV_CONST_ALPHA,
726 #if 0
727 PIPE_BLENDFACTOR_INV_SRC1_COLOR,
728 PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
729 #endif
730 };
731
732
733 const unsigned
734 blend_funcs[] = {
735 PIPE_BLEND_ADD,
736 PIPE_BLEND_SUBTRACT,
737 PIPE_BLEND_REVERSE_SUBTRACT,
738 PIPE_BLEND_MIN,
739 PIPE_BLEND_MAX
740 };
741
742
743 const struct lp_type blend_types[] = {
744 /* float, fixed, sign, norm, width, len */
745 { TRUE, FALSE, TRUE, FALSE, 32, 4 }, /* f32 x 4 */
746 { FALSE, FALSE, FALSE, TRUE, 8, 16 }, /* u8n x 16 */
747 };
748
749
750 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
751 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
752 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
753
754
755 boolean
756 test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
757 {
758 const unsigned *rgb_func;
759 const unsigned *rgb_src_factor;
760 const unsigned *rgb_dst_factor;
761 const unsigned *alpha_func;
762 const unsigned *alpha_src_factor;
763 const unsigned *alpha_dst_factor;
764 struct pipe_blend_state blend;
765 enum vector_mode mode;
766 const struct lp_type *type;
767 boolean success = TRUE;
768
769 for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
770 for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
771 for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
772 for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
773 for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
774 for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
775 for(mode = 0; mode < 2; ++mode) {
776 for(type = blend_types; type < &blend_types[num_types]; ++type) {
777
778 if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
779 *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
780 continue;
781
782 memset(&blend, 0, sizeof blend);
783 blend.rt[0].blend_enable = 1;
784 blend.rt[0].rgb_func = *rgb_func;
785 blend.rt[0].rgb_src_factor = *rgb_src_factor;
786 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
787 blend.rt[0].alpha_func = *alpha_func;
788 blend.rt[0].alpha_src_factor = *alpha_src_factor;
789 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
790 blend.rt[0].colormask = PIPE_MASK_RGBA;
791
792 if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
793 success = FALSE;
794
795 }
796 }
797 }
798 }
799 }
800 }
801 }
802 }
803
804 return success;
805 }
806
807
808 boolean
809 test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp,
810 unsigned long n)
811 {
812 const unsigned *rgb_func;
813 const unsigned *rgb_src_factor;
814 const unsigned *rgb_dst_factor;
815 const unsigned *alpha_func;
816 const unsigned *alpha_src_factor;
817 const unsigned *alpha_dst_factor;
818 struct pipe_blend_state blend;
819 enum vector_mode mode;
820 const struct lp_type *type;
821 unsigned long i;
822 boolean success = TRUE;
823
824 for(i = 0; i < n; ++i) {
825 rgb_func = &blend_funcs[rand() % num_funcs];
826 alpha_func = &blend_funcs[rand() % num_funcs];
827 rgb_src_factor = &blend_factors[rand() % num_factors];
828 alpha_src_factor = &blend_factors[rand() % num_factors];
829
830 do {
831 rgb_dst_factor = &blend_factors[rand() % num_factors];
832 } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
833
834 do {
835 alpha_dst_factor = &blend_factors[rand() % num_factors];
836 } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
837
838 mode = rand() & 1;
839
840 type = &blend_types[rand() % num_types];
841
842 memset(&blend, 0, sizeof blend);
843 blend.rt[0].blend_enable = 1;
844 blend.rt[0].rgb_func = *rgb_func;
845 blend.rt[0].rgb_src_factor = *rgb_src_factor;
846 blend.rt[0].rgb_dst_factor = *rgb_dst_factor;
847 blend.rt[0].alpha_func = *alpha_func;
848 blend.rt[0].alpha_src_factor = *alpha_src_factor;
849 blend.rt[0].alpha_dst_factor = *alpha_dst_factor;
850 blend.rt[0].colormask = PIPE_MASK_RGBA;
851
852 if(!test_one(gallivm, verbose, fp, &blend, mode, *type))
853 success = FALSE;
854 }
855
856 return success;
857 }
858
859
860 boolean
861 test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp)
862 {
863 printf("no test_single()");
864 return TRUE;
865 }