src/gallium/drivers/llvmpipe/lp_test_blend.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2009 VMware, Inc.
   4  * All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the
   8  * "Software"), to deal in the Software without restriction, including
   9  * without limitation the rights to use, copy, modify, merge, publish,
  10  * distribute, sub license, and/or sell copies of the Software, and to
  11  * permit persons to whom the Software is furnished to do so, subject to
  12  * the following conditions:
  13  *
  14  * The above copyright notice and this permission notice (including the
  15  * next paragraph) shall be included in all copies or substantial portions
  16  * of the Software.
  17  *
  18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25  *
  26  **************************************************************************/
  27
  28
  29 /**
  30  * @file
  31  * Unit tests for blend LLVM IR generation
  32  *
  33  * @author Jose Fonseca <jfonseca@vmware.com>
  34  *
  35  * Blend computation code derived from code written by
  36  * @author Brian Paul <brian@vmware.com>
  37  */
  38
  39
  40 #include "gallivm/lp_bld_init.h"
  41 #include "gallivm/lp_bld_type.h"
  42 #include "gallivm/lp_bld_debug.h"
  43 #include "lp_bld_blend.h"
  44 #include "lp_test.h"
  45
  46
  47 enum vector_mode
  48 {
  49    AoS = 0,
  50    SoA = 1
  51 };
  52
  53
  54 typedef void (*blend_test_ptr_t)(const void *src, const void *dst, const void *con, void *res);
  55
  56 /** cast wrapper */
  57 static blend_test_ptr_t
  58 voidptr_to_blend_test_ptr_t(void *p)
  59 {
  60    union {
  61       void *v;
  62       blend_test_ptr_t f;
  63    } u;
  64    u.v = p;
  65    return u.f;
  66 }
  67
  68
  69
  70 void
  71 write_tsv_header(FILE *fp)
  72 {
  73    fprintf(fp,
  74            "result\t"
  75            "cycles_per_channel\t"
  76            "mode\t"
  77            "type\t"
  78            "sep_func\t"
  79            "sep_src_factor\t"
  80            "sep_dst_factor\t"
  81            "rgb_func\t"
  82            "rgb_src_factor\t"
  83            "rgb_dst_factor\t"
  84            "alpha_func\t"
  85            "alpha_src_factor\t"
  86            "alpha_dst_factor\n");
  87
  88    fflush(fp);
  89 }
  90
  91
  92 static void
  93 write_tsv_row(FILE *fp,
  94               const struct pipe_blend_state *blend,
  95               enum vector_mode mode,
  96               struct lp_type type,
  97               double cycles,
  98               boolean success)
  99 {
 100    fprintf(fp, "%s\t", success ? "pass" : "fail");
 101
 102    if (mode == AoS) {
 103       fprintf(fp, "%.1f\t", cycles / type.length);
 104       fprintf(fp, "aos\t");
 105    }
 106
 107    if (mode == SoA) {
 108       fprintf(fp, "%.1f\t", cycles / (4 * type.length));
 109       fprintf(fp, "soa\t");
 110    }
 111
 112    fprintf(fp, "%s%u%sx%u\t",
 113            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
 114            type.width,
 115            type.norm ? "n" : "",
 116            type.length);
 117
 118    fprintf(fp,
 119            "%s\t%s\t%s\t",
 120            blend->rt[0].rgb_func != blend->rt[0].alpha_func ? "true" : "false",
 121            blend->rt[0].rgb_src_factor != blend->rt[0].alpha_src_factor ? "true" : "false",
 122            blend->rt[0].rgb_dst_factor != blend->rt[0].alpha_dst_factor ? "true" : "false");
 123
 124    fprintf(fp,
 125            "%s\t%s\t%s\t%s\t%s\t%s\n",
 126            util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
 127            util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
 128            util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
 129            util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
 130            util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
 131            util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
 132
 133    fflush(fp);
 134 }
 135
 136
 137 static void
 138 dump_blend_type(FILE *fp,
 139                 const struct pipe_blend_state *blend,
 140                 enum vector_mode mode,
 141                 struct lp_type type)
 142 {
 143    fprintf(fp, "%s", mode ? "soa" : "aos");
 144
 145    fprintf(fp, " type=%s%u%sx%u",
 146            type.floating ? "f" : (type.fixed ? "h" : (type.sign ? "s" : "u")),
 147            type.width,
 148            type.norm ? "n" : "",
 149            type.length);
 150
 151    fprintf(fp,
 152            " %s=%s %s=%s %s=%s %s=%s %s=%s %s=%s",
 153            "rgb_func",         util_dump_blend_func(blend->rt[0].rgb_func, TRUE),
 154            "rgb_src_factor",   util_dump_blend_factor(blend->rt[0].rgb_src_factor, TRUE),
 155            "rgb_dst_factor",   util_dump_blend_factor(blend->rt[0].rgb_dst_factor, TRUE),
 156            "alpha_func",       util_dump_blend_func(blend->rt[0].alpha_func, TRUE),
 157            "alpha_src_factor", util_dump_blend_factor(blend->rt[0].alpha_src_factor, TRUE),
 158            "alpha_dst_factor", util_dump_blend_factor(blend->rt[0].alpha_dst_factor, TRUE));
 159
 160    fprintf(fp, " ...\n");
 161    fflush(fp);
 162 }
 163
 164
 165 static LLVMValueRef
 166 add_blend_test(LLVMModuleRef module,
 167                const struct pipe_blend_state *blend,
 168                enum vector_mode mode,
 169                struct lp_type type)
 170 {
 171    LLVMTypeRef vec_type;
 172    LLVMTypeRef args[4];
 173    LLVMValueRef func;
 174    LLVMValueRef src_ptr;
 175    LLVMValueRef dst_ptr;
 176    LLVMValueRef const_ptr;
 177    LLVMValueRef res_ptr;
 178    LLVMBasicBlockRef block;
 179    LLVMBuilderRef builder;
 180    const unsigned rt = 0;
 181
 182    vec_type = lp_build_vec_type(type);
 183
 184    args[3] = args[2] = args[1] = args[0] = LLVMPointerType(vec_type, 0);
 185    func = LLVMAddFunction(module, "test", LLVMFunctionType(LLVMVoidType(), args, 4, 0));
 186    LLVMSetFunctionCallConv(func, LLVMCCallConv);
 187    src_ptr = LLVMGetParam(func, 0);
 188    dst_ptr = LLVMGetParam(func, 1);
 189    const_ptr = LLVMGetParam(func, 2);
 190    res_ptr = LLVMGetParam(func, 3);
 191
 192    block = LLVMAppendBasicBlock(func, "entry");
 193    builder = LLVMCreateBuilder();
 194    LLVMPositionBuilderAtEnd(builder, block);
 195
 196    if (mode == AoS) {
 197       LLVMValueRef src;
 198       LLVMValueRef dst;
 199       LLVMValueRef con;
 200       LLVMValueRef res;
 201
 202       src = LLVMBuildLoad(builder, src_ptr, "src");
 203       dst = LLVMBuildLoad(builder, dst_ptr, "dst");
 204       con = LLVMBuildLoad(builder, const_ptr, "const");
 205
 206       res = lp_build_blend_aos(builder, blend, type, rt, src, dst, con, 3);
 207
 208       lp_build_name(res, "res");
 209
 210       LLVMBuildStore(builder, res, res_ptr);
 211    }
 212
 213    if (mode == SoA) {
 214       LLVMValueRef src[4];
 215       LLVMValueRef dst[4];
 216       LLVMValueRef con[4];
 217       LLVMValueRef res[4];
 218       unsigned i;
 219
 220       for(i = 0; i < 4; ++i) {
 221          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 222          src[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, src_ptr, &index, 1, ""), "");
 223          dst[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
 224          con[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
 225          lp_build_name(src[i], "src.%c", "rgba"[i]);
 226          lp_build_name(con[i], "con.%c", "rgba"[i]);
 227          lp_build_name(dst[i], "dst.%c", "rgba"[i]);
 228       }
 229
 230       lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
 231
 232       for(i = 0; i < 4; ++i) {
 233          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
 234          lp_build_name(res[i], "res.%c", "rgba"[i]);
 235          LLVMBuildStore(builder, res[i], LLVMBuildGEP(builder, res_ptr, &index, 1, ""));
 236       }
 237    }
 238
 239    LLVMBuildRetVoid(builder);;
 240
 241    LLVMDisposeBuilder(builder);
 242    return func;
 243 }
 244
 245
 246 /** Add and limit result to ceiling of 1.0 */
 247 #define ADD_SAT(R, A, B) \
 248 do { \
 249    R = (A) + (B);  if (R > 1.0f) R = 1.0f; \
 250 } while (0)
 251
 252 /** Subtract and limit result to floor of 0.0 */
 253 #define SUB_SAT(R, A, B) \
 254 do { \
 255    R = (A) - (B);  if (R < 0.0f) R = 0.0f; \
 256 } while (0)
 257
 258
 259 static void
 260 compute_blend_ref_term(unsigned rgb_factor,
 261                        unsigned alpha_factor,
 262                        const double *factor,
 263                        const double *src,
 264                        const double *dst,
 265                        const double *con,
 266                        double *term)
 267 {
 268    double temp;
 269
 270    switch (rgb_factor) {
 271    case PIPE_BLENDFACTOR_ONE:
 272       term[0] = factor[0]; /* R */
 273       term[1] = factor[1]; /* G */
 274       term[2] = factor[2]; /* B */
 275       break;
 276    case PIPE_BLENDFACTOR_SRC_COLOR:
 277       term[0] = factor[0] * src[0]; /* R */
 278       term[1] = factor[1] * src[1]; /* G */
 279       term[2] = factor[2] * src[2]; /* B */
 280       break;
 281    case PIPE_BLENDFACTOR_SRC_ALPHA:
 282       term[0] = factor[0] * src[3]; /* R */
 283       term[1] = factor[1] * src[3]; /* G */
 284       term[2] = factor[2] * src[3]; /* B */
 285       break;
 286    case PIPE_BLENDFACTOR_DST_COLOR:
 287       term[0] = factor[0] * dst[0]; /* R */
 288       term[1] = factor[1] * dst[1]; /* G */
 289       term[2] = factor[2] * dst[2]; /* B */
 290       break;
 291    case PIPE_BLENDFACTOR_DST_ALPHA:
 292       term[0] = factor[0] * dst[3]; /* R */
 293       term[1] = factor[1] * dst[3]; /* G */
 294       term[2] = factor[2] * dst[3]; /* B */
 295       break;
 296    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 297       temp = MIN2(src[3], 1.0f - dst[3]);
 298       term[0] = factor[0] * temp; /* R */
 299       term[1] = factor[1] * temp; /* G */
 300       term[2] = factor[2] * temp; /* B */
 301       break;
 302    case PIPE_BLENDFACTOR_CONST_COLOR:
 303       term[0] = factor[0] * con[0]; /* R */
 304       term[1] = factor[1] * con[1]; /* G */
 305       term[2] = factor[2] * con[2]; /* B */
 306       break;
 307    case PIPE_BLENDFACTOR_CONST_ALPHA:
 308       term[0] = factor[0] * con[3]; /* R */
 309       term[1] = factor[1] * con[3]; /* G */
 310       term[2] = factor[2] * con[3]; /* B */
 311       break;
 312    case PIPE_BLENDFACTOR_SRC1_COLOR:
 313       assert(0); /* to do */
 314       break;
 315    case PIPE_BLENDFACTOR_SRC1_ALPHA:
 316       assert(0); /* to do */
 317       break;
 318    case PIPE_BLENDFACTOR_ZERO:
 319       term[0] = 0.0f; /* R */
 320       term[1] = 0.0f; /* G */
 321       term[2] = 0.0f; /* B */
 322       break;
 323    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 324       term[0] = factor[0] * (1.0f - src[0]); /* R */
 325       term[1] = factor[1] * (1.0f - src[1]); /* G */
 326       term[2] = factor[2] * (1.0f - src[2]); /* B */
 327       break;
 328    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 329       term[0] = factor[0] * (1.0f - src[3]); /* R */
 330       term[1] = factor[1] * (1.0f - src[3]); /* G */
 331       term[2] = factor[2] * (1.0f - src[3]); /* B */
 332       break;
 333    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 334       term[0] = factor[0] * (1.0f - dst[3]); /* R */
 335       term[1] = factor[1] * (1.0f - dst[3]); /* G */
 336       term[2] = factor[2] * (1.0f - dst[3]); /* B */
 337       break;
 338    case PIPE_BLENDFACTOR_INV_DST_COLOR:
 339       term[0] = factor[0] * (1.0f - dst[0]); /* R */
 340       term[1] = factor[1] * (1.0f - dst[1]); /* G */
 341       term[2] = factor[2] * (1.0f - dst[2]); /* B */
 342       break;
 343    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 344       term[0] = factor[0] * (1.0f - con[0]); /* R */
 345       term[1] = factor[1] * (1.0f - con[1]); /* G */
 346       term[2] = factor[2] * (1.0f - con[2]); /* B */
 347       break;
 348    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 349       term[0] = factor[0] * (1.0f - con[3]); /* R */
 350       term[1] = factor[1] * (1.0f - con[3]); /* G */
 351       term[2] = factor[2] * (1.0f - con[3]); /* B */
 352       break;
 353    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
 354       assert(0); /* to do */
 355       break;
 356    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
 357       assert(0); /* to do */
 358       break;
 359    default:
 360       assert(0);
 361    }
 362
 363    /*
 364     * Compute src/first term A
 365     */
 366    switch (alpha_factor) {
 367    case PIPE_BLENDFACTOR_ONE:
 368       term[3] = factor[3]; /* A */
 369       break;
 370    case PIPE_BLENDFACTOR_SRC_COLOR:
 371    case PIPE_BLENDFACTOR_SRC_ALPHA:
 372       term[3] = factor[3] * src[3]; /* A */
 373       break;
 374    case PIPE_BLENDFACTOR_DST_COLOR:
 375    case PIPE_BLENDFACTOR_DST_ALPHA:
 376       term[3] = factor[3] * dst[3]; /* A */
 377       break;
 378    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
 379       term[3] = src[3]; /* A */
 380       break;
 381    case PIPE_BLENDFACTOR_CONST_COLOR:
 382    case PIPE_BLENDFACTOR_CONST_ALPHA:
 383       term[3] = factor[3] * con[3]; /* A */
 384       break;
 385    case PIPE_BLENDFACTOR_ZERO:
 386       term[3] = 0.0f; /* A */
 387       break;
 388    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
 389    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
 390       term[3] = factor[3] * (1.0f - src[3]); /* A */
 391       break;
 392    case PIPE_BLENDFACTOR_INV_DST_COLOR:
 393    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
 394       term[3] = factor[3] * (1.0f - dst[3]); /* A */
 395       break;
 396    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
 397    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
 398       term[3] = factor[3] * (1.0f - con[3]);
 399       break;
 400    default:
 401       assert(0);
 402    }
 403 }
 404
 405
 406 static void
 407 compute_blend_ref(const struct pipe_blend_state *blend,
 408                   const double *src,
 409                   const double *dst,
 410                   const double *con,
 411                   double *res)
 412 {
 413    double src_term[4];
 414    double dst_term[4];
 415
 416    compute_blend_ref_term(blend->rt[0].rgb_src_factor, blend->rt[0].alpha_src_factor,
 417                           src, src, dst, con, src_term);
 418    compute_blend_ref_term(blend->rt[0].rgb_dst_factor, blend->rt[0].alpha_dst_factor,
 419                           dst, src, dst, con, dst_term);
 420
 421    /*
 422     * Combine RGB terms
 423     */
 424    switch (blend->rt[0].rgb_func) {
 425    case PIPE_BLEND_ADD:
 426       ADD_SAT(res[0], src_term[0], dst_term[0]); /* R */
 427       ADD_SAT(res[1], src_term[1], dst_term[1]); /* G */
 428       ADD_SAT(res[2], src_term[2], dst_term[2]); /* B */
 429       break;
 430    case PIPE_BLEND_SUBTRACT:
 431       SUB_SAT(res[0], src_term[0], dst_term[0]); /* R */
 432       SUB_SAT(res[1], src_term[1], dst_term[1]); /* G */
 433       SUB_SAT(res[2], src_term[2], dst_term[2]); /* B */
 434       break;
 435    case PIPE_BLEND_REVERSE_SUBTRACT:
 436       SUB_SAT(res[0], dst_term[0], src_term[0]); /* R */
 437       SUB_SAT(res[1], dst_term[1], src_term[1]); /* G */
 438       SUB_SAT(res[2], dst_term[2], src_term[2]); /* B */
 439       break;
 440    case PIPE_BLEND_MIN:
 441       res[0] = MIN2(src_term[0], dst_term[0]); /* R */
 442       res[1] = MIN2(src_term[1], dst_term[1]); /* G */
 443       res[2] = MIN2(src_term[2], dst_term[2]); /* B */
 444       break;
 445    case PIPE_BLEND_MAX:
 446       res[0] = MAX2(src_term[0], dst_term[0]); /* R */
 447       res[1] = MAX2(src_term[1], dst_term[1]); /* G */
 448       res[2] = MAX2(src_term[2], dst_term[2]); /* B */
 449       break;
 450    default:
 451       assert(0);
 452    }
 453
 454    /*
 455     * Combine A terms
 456     */
 457    switch (blend->rt[0].alpha_func) {
 458    case PIPE_BLEND_ADD:
 459       ADD_SAT(res[3], src_term[3], dst_term[3]); /* A */
 460       break;
 461    case PIPE_BLEND_SUBTRACT:
 462       SUB_SAT(res[3], src_term[3], dst_term[3]); /* A */
 463       break;
 464    case PIPE_BLEND_REVERSE_SUBTRACT:
 465       SUB_SAT(res[3], dst_term[3], src_term[3]); /* A */
 466       break;
 467    case PIPE_BLEND_MIN:
 468       res[3] = MIN2(src_term[3], dst_term[3]); /* A */
 469       break;
 470    case PIPE_BLEND_MAX:
 471       res[3] = MAX2(src_term[3], dst_term[3]); /* A */
 472       break;
 473    default:
 474       assert(0);
 475    }
 476 }
 477
 478
 479 PIPE_ALIGN_STACK
 480 static boolean
 481 test_one(unsigned verbose,
 482          FILE *fp,
 483          const struct pipe_blend_state *blend,
 484          enum vector_mode mode,
 485          struct lp_type type)
 486 {
 487    LLVMModuleRef module = NULL;
 488    LLVMValueRef func = NULL;
 489    LLVMExecutionEngineRef engine = lp_build_engine;
 490    LLVMPassManagerRef pass = NULL;
 491    char *error = NULL;
 492    blend_test_ptr_t blend_test_ptr;
 493    boolean success;
 494    const unsigned n = LP_TEST_NUM_SAMPLES;
 495    int64_t cycles[LP_TEST_NUM_SAMPLES];
 496    double cycles_avg = 0.0;
 497    unsigned i, j;
 498    void *code;
 499
 500    if(verbose >= 1)
 501       dump_blend_type(stdout, blend, mode, type);
 502
 503    module = LLVMModuleCreateWithName("test");
 504
 505    func = add_blend_test(module, blend, mode, type);
 506
 507    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
 508       LLVMDumpModule(module);
 509       abort();
 510    }
 511    LLVMDisposeMessage(error);
 512
 513 #if 0
 514    pass = LLVMCreatePassManager();
 515    LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
 516    /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
 517     * but there are more on SVN. */
 518    LLVMAddConstantPropagationPass(pass);
 519    LLVMAddInstructionCombiningPass(pass);
 520    LLVMAddPromoteMemoryToRegisterPass(pass);
 521    LLVMAddGVNPass(pass);
 522    LLVMAddCFGSimplificationPass(pass);
 523    LLVMRunPassManager(pass, module);
 524 #else
 525    (void)pass;
 526 #endif
 527
 528    if(verbose >= 2)
 529       LLVMDumpModule(module);
 530
 531    code = LLVMGetPointerToGlobal(engine, func);
 532    blend_test_ptr = voidptr_to_blend_test_ptr_t(code);
 533
 534    if(verbose >= 2)
 535       lp_disassemble(code);
 536
 537    success = TRUE;
 538    for(i = 0; i < n && success; ++i) {
 539       if(mode == AoS) {
 540          PIPE_ALIGN_VAR(16) uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
 541          PIPE_ALIGN_VAR(16) uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
 542          PIPE_ALIGN_VAR(16) uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
 543          PIPE_ALIGN_VAR(16) uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
 544          PIPE_ALIGN_VAR(16) uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
 545          int64_t start_counter = 0;
 546          int64_t end_counter = 0;
 547
 548          random_vec(type, src);
 549          random_vec(type, dst);
 550          random_vec(type, con);
 551
 552          {
 553             double fsrc[LP_MAX_VECTOR_LENGTH];
 554             double fdst[LP_MAX_VECTOR_LENGTH];
 555             double fcon[LP_MAX_VECTOR_LENGTH];
 556             double fref[LP_MAX_VECTOR_LENGTH];
 557
 558             read_vec(type, src, fsrc);
 559             read_vec(type, dst, fdst);
 560             read_vec(type, con, fcon);
 561
 562             for(j = 0; j < type.length; j += 4)
 563                compute_blend_ref(blend, fsrc + j, fdst + j, fcon + j, fref + j);
 564
 565             write_vec(type, ref, fref);
 566          }
 567
 568          start_counter = rdtsc();
 569          blend_test_ptr(src, dst, con, res);
 570          end_counter = rdtsc();
 571
 572          cycles[i] = end_counter - start_counter;
 573
 574          if(!compare_vec(type, res, ref)) {
 575             success = FALSE;
 576
 577             if(verbose < 1)
 578                dump_blend_type(stderr, blend, mode, type);
 579             fprintf(stderr, "MISMATCH\n");
 580
 581             fprintf(stderr, "  Src: ");
 582             dump_vec(stderr, type, src);
 583             fprintf(stderr, "\n");
 584
 585             fprintf(stderr, "  Dst: ");
 586             dump_vec(stderr, type, dst);
 587             fprintf(stderr, "\n");
 588
 589             fprintf(stderr, "  Con: ");
 590             dump_vec(stderr, type, con);
 591             fprintf(stderr, "\n");
 592
 593             fprintf(stderr, "  Res: ");
 594             dump_vec(stderr, type, res);
 595             fprintf(stderr, "\n");
 596
 597             fprintf(stderr, "  Ref: ");
 598             dump_vec(stderr, type, ref);
 599             fprintf(stderr, "\n");
 600          }
 601       }
 602
 603       if(mode == SoA) {
 604          const unsigned stride = type.length*type.width/8;
 605          PIPE_ALIGN_VAR(16) uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
 606          PIPE_ALIGN_VAR(16) uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
 607          PIPE_ALIGN_VAR(16) uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
 608          PIPE_ALIGN_VAR(16) uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
 609          PIPE_ALIGN_VAR(16) uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
 610          int64_t start_counter = 0;
 611          int64_t end_counter = 0;
 612          boolean mismatch;
 613
 614          for(j = 0; j < 4; ++j) {
 615             random_vec(type, src + j*stride);
 616             random_vec(type, dst + j*stride);
 617             random_vec(type, con + j*stride);
 618          }
 619
 620          {
 621             double fsrc[4];
 622             double fdst[4];
 623             double fcon[4];
 624             double fref[4];
 625             unsigned k;
 626
 627             for(k = 0; k < type.length; ++k) {
 628                for(j = 0; j < 4; ++j) {
 629                   fsrc[j] = read_elem(type, src + j*stride, k);
 630                   fdst[j] = read_elem(type, dst + j*stride, k);
 631                   fcon[j] = read_elem(type, con + j*stride, k);
 632                }
 633
 634                compute_blend_ref(blend, fsrc, fdst, fcon, fref);
 635
 636                for(j = 0; j < 4; ++j)
 637                   write_elem(type, ref + j*stride, k, fref[j]);
 638             }
 639          }
 640
 641          start_counter = rdtsc();
 642          blend_test_ptr(src, dst, con, res);
 643          end_counter = rdtsc();
 644
 645          cycles[i] = end_counter - start_counter;
 646
 647          mismatch = FALSE;
 648          for (j = 0; j < 4; ++j)
 649             if(!compare_vec(type, res + j*stride, ref + j*stride))
 650                mismatch = TRUE;
 651
 652          if (mismatch) {
 653             success = FALSE;
 654
 655             if(verbose < 1)
 656                dump_blend_type(stderr, blend, mode, type);
 657             fprintf(stderr, "MISMATCH\n");
 658             for(j = 0; j < 4; ++j) {
 659                char channel = "RGBA"[j];
 660                fprintf(stderr, "  Src%c: ", channel);
 661                dump_vec(stderr, type, src + j*stride);
 662                fprintf(stderr, "\n");
 663
 664                fprintf(stderr, "  Dst%c: ", channel);
 665                dump_vec(stderr, type, dst + j*stride);
 666                fprintf(stderr, "\n");
 667
 668                fprintf(stderr, "  Con%c: ", channel);
 669                dump_vec(stderr, type, con + j*stride);
 670                fprintf(stderr, "\n");
 671
 672                fprintf(stderr, "  Res%c: ", channel);
 673                dump_vec(stderr, type, res + j*stride);
 674                fprintf(stderr, "\n");
 675
 676                fprintf(stderr, "  Ref%c: ", channel);
 677                dump_vec(stderr, type, ref + j*stride);
 678                fprintf(stderr, "\n");
 679             }
 680          }
 681       }
 682    }
 683
 684    /*
 685     * Unfortunately the output of cycle counter is not very reliable as it comes
 686     * -- sometimes we get outliers (due IRQs perhaps?) which are
 687     * better removed to avoid random or biased data.
 688     */
 689    {
 690       double sum = 0.0, sum2 = 0.0;
 691       double avg, std;
 692       unsigned m;
 693
 694       for(i = 0; i < n; ++i) {
 695          sum += cycles[i];
 696          sum2 += cycles[i]*cycles[i];
 697       }
 698
 699       avg = sum/n;
 700       std = sqrtf((sum2 - n*avg*avg)/n);
 701
 702       m = 0;
 703       sum = 0.0;
 704       for(i = 0; i < n; ++i) {
 705          if(fabs(cycles[i] - avg) <= 4.0*std) {
 706             sum += cycles[i];
 707             ++m;
 708          }
 709       }
 710
 711       cycles_avg = sum/m;
 712
 713    }
 714
 715    if(fp)
 716       write_tsv_row(fp, blend, mode, type, cycles_avg, success);
 717
 718    if (!success) {
 719       if(verbose < 2)
 720          LLVMDumpModule(module);
 721       LLVMWriteBitcodeToFile(module, "blend.bc");
 722       fprintf(stderr, "blend.bc written\n");
 723       fprintf(stderr, "Invoke as \"llc -o - blend.bc\"\n");
 724       abort();
 725    }
 726
 727    LLVMFreeMachineCodeForFunction(engine, func);
 728
 729    if(pass)
 730       LLVMDisposePassManager(pass);
 731
 732    return success;
 733 }
 734
 735
 736 const unsigned
 737 blend_factors[] = {
 738    PIPE_BLENDFACTOR_ZERO,
 739    PIPE_BLENDFACTOR_ONE,
 740    PIPE_BLENDFACTOR_SRC_COLOR,
 741    PIPE_BLENDFACTOR_SRC_ALPHA,
 742    PIPE_BLENDFACTOR_DST_COLOR,
 743    PIPE_BLENDFACTOR_DST_ALPHA,
 744    PIPE_BLENDFACTOR_CONST_COLOR,
 745    PIPE_BLENDFACTOR_CONST_ALPHA,
 746 #if 0
 747    PIPE_BLENDFACTOR_SRC1_COLOR,
 748    PIPE_BLENDFACTOR_SRC1_ALPHA,
 749 #endif
 750    PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE,
 751    PIPE_BLENDFACTOR_INV_SRC_COLOR,
 752    PIPE_BLENDFACTOR_INV_SRC_ALPHA,
 753    PIPE_BLENDFACTOR_INV_DST_COLOR,
 754    PIPE_BLENDFACTOR_INV_DST_ALPHA,
 755    PIPE_BLENDFACTOR_INV_CONST_COLOR,
 756    PIPE_BLENDFACTOR_INV_CONST_ALPHA,
 757 #if 0
 758    PIPE_BLENDFACTOR_INV_SRC1_COLOR,
 759    PIPE_BLENDFACTOR_INV_SRC1_ALPHA,
 760 #endif
 761 };
 762
 763
 764 const unsigned
 765 blend_funcs[] = {
 766    PIPE_BLEND_ADD,
 767    PIPE_BLEND_SUBTRACT,
 768    PIPE_BLEND_REVERSE_SUBTRACT,
 769    PIPE_BLEND_MIN,
 770    PIPE_BLEND_MAX
 771 };
 772
 773
 774 const struct lp_type blend_types[] = {
 775    /* float, fixed,  sign,  norm, width, len */
 776    {   TRUE, FALSE, FALSE,  TRUE,    32,   4 }, /* f32 x 4 */
 777    {  FALSE, FALSE, FALSE,  TRUE,     8,  16 }, /* u8n x 16 */
 778 };
 779
 780
 781 const unsigned num_funcs = sizeof(blend_funcs)/sizeof(blend_funcs[0]);
 782 const unsigned num_factors = sizeof(blend_factors)/sizeof(blend_factors[0]);
 783 const unsigned num_types = sizeof(blend_types)/sizeof(blend_types[0]);
 784
 785
 786 boolean
 787 test_all(unsigned verbose, FILE *fp)
 788 {
 789    const unsigned *rgb_func;
 790    const unsigned *rgb_src_factor;
 791    const unsigned *rgb_dst_factor;
 792    const unsigned *alpha_func;
 793    const unsigned *alpha_src_factor;
 794    const unsigned *alpha_dst_factor;
 795    struct pipe_blend_state blend;
 796    enum vector_mode mode;
 797    const struct lp_type *type;
 798    boolean success = TRUE;
 799
 800    for(rgb_func = blend_funcs; rgb_func < &blend_funcs[num_funcs]; ++rgb_func) {
 801       for(alpha_func = blend_funcs; alpha_func < &blend_funcs[num_funcs]; ++alpha_func) {
 802          for(rgb_src_factor = blend_factors; rgb_src_factor < &blend_factors[num_factors]; ++rgb_src_factor) {
 803             for(rgb_dst_factor = blend_factors; rgb_dst_factor <= rgb_src_factor; ++rgb_dst_factor) {
 804                for(alpha_src_factor = blend_factors; alpha_src_factor < &blend_factors[num_factors]; ++alpha_src_factor) {
 805                   for(alpha_dst_factor = blend_factors; alpha_dst_factor <= alpha_src_factor; ++alpha_dst_factor) {
 806                      for(mode = 0; mode < 2; ++mode) {
 807                         for(type = blend_types; type < &blend_types[num_types]; ++type) {
 808
 809                            if(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
 810                               *alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
 811                               continue;
 812
 813                            memset(&blend, 0, sizeof blend);
 814                            blend.rt[0].blend_enable      = 1;
 815                            blend.rt[0].rgb_func          = *rgb_func;
 816                            blend.rt[0].rgb_src_factor    = *rgb_src_factor;
 817                            blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
 818                            blend.rt[0].alpha_func        = *alpha_func;
 819                            blend.rt[0].alpha_src_factor  = *alpha_src_factor;
 820                            blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
 821                            blend.rt[0].colormask         = PIPE_MASK_RGBA;
 822
 823                            if(!test_one(verbose, fp, &blend, mode, *type))
 824                              success = FALSE;
 825
 826                         }
 827                      }
 828                   }
 829                }
 830             }
 831          }
 832       }
 833    }
 834
 835    return success;
 836 }
 837
 838
 839 boolean
 840 test_some(unsigned verbose, FILE *fp, unsigned long n)
 841 {
 842    const unsigned *rgb_func;
 843    const unsigned *rgb_src_factor;
 844    const unsigned *rgb_dst_factor;
 845    const unsigned *alpha_func;
 846    const unsigned *alpha_src_factor;
 847    const unsigned *alpha_dst_factor;
 848    struct pipe_blend_state blend;
 849    enum vector_mode mode;
 850    const struct lp_type *type;
 851    unsigned long i;
 852    boolean success = TRUE;
 853
 854    for(i = 0; i < n; ++i) {
 855       rgb_func = &blend_funcs[rand() % num_funcs];
 856       alpha_func = &blend_funcs[rand() % num_funcs];
 857       rgb_src_factor = &blend_factors[rand() % num_factors];
 858       alpha_src_factor = &blend_factors[rand() % num_factors];
 859
 860       do {
 861          rgb_dst_factor = &blend_factors[rand() % num_factors];
 862       } while(*rgb_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 863
 864       do {
 865          alpha_dst_factor = &blend_factors[rand() % num_factors];
 866       } while(*alpha_dst_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE);
 867
 868       mode = rand() & 1;
 869
 870       type = &blend_types[rand() % num_types];
 871
 872       memset(&blend, 0, sizeof blend);
 873       blend.rt[0].blend_enable      = 1;
 874       blend.rt[0].rgb_func          = *rgb_func;
 875       blend.rt[0].rgb_src_factor    = *rgb_src_factor;
 876       blend.rt[0].rgb_dst_factor    = *rgb_dst_factor;
 877       blend.rt[0].alpha_func        = *alpha_func;
 878       blend.rt[0].alpha_src_factor  = *alpha_src_factor;
 879       blend.rt[0].alpha_dst_factor  = *alpha_dst_factor;
 880       blend.rt[0].colormask         = PIPE_MASK_RGBA;
 881
 882       if(!test_one(verbose, fp, &blend, mode, *type))
 883         success = FALSE;
 884    }
 885
 886    return success;
 887 }
 888
 889
 890 boolean
 891 test_single(unsigned verbose, FILE *fp)
 892 {
 893    printf("no test_single()");
 894    return TRUE;
 895 }