X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fllvmpipe%2Flp_test_arit.c;h=184e50089f793346f401418daf68d507133e9ab1;hb=0e30c6b8a7e84211bb417362ec73f24ef134ae34;hp=f0e43e0f9ccd2993d0690519fd52cb82db18b70c;hpb=0a1d49504de4d34b003625ee7c901667afa43dea;p=mesa.git diff --git a/src/gallium/drivers/llvmpipe/lp_test_arit.c b/src/gallium/drivers/llvmpipe/lp_test_arit.c index f0e43e0f9cc..184e50089f7 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_test_arit.c @@ -32,8 +32,11 @@ #include "util/u_pointer.h" #include "util/u_memory.h" +#include "util/u_math.h" +#include "util/u_cpu_detect.h" #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_init.h" #include "gallivm/lp_bld_arit.h" @@ -51,7 +54,7 @@ write_tsv_header(FILE *fp) } -typedef float (*unary_func_t)(float); +typedef void (*unary_func_t)(float *out, const float *in); /** @@ -79,10 +82,56 @@ struct unary_test_t */ const float *values; unsigned num_values; + + /* + * Required precision in bits. + */ + double precision; +}; + + +static float negf(float x) +{ + return -x; +} + + +static float sgnf(float x) +{ + if (x > 0.0f) { + return 1.0f; + } + if (x < 0.0f) { + return -1.0f; + } + return 0.0f; +} + + +const float sgn_values[] = { + -INFINITY, + -60, + -4, + -2, + -1, + -1e-007, + 0, + 1e-007, + 0.01, + 0.1, + 0.9, + 0.99, + 1, + 2, + 4, + 60, + INFINITY, + NAN }; const float exp2_values[] = { + -INFINITY, -60, -4, -2, @@ -90,10 +139,16 @@ const float exp2_values[] = { -1e-007, 0, 1e-007, + 0.01, + 0.1, + 0.9, + 0.99, 1, 2, 4, - 60 + 60, + INFINITY, + NAN }; @@ -105,44 +160,132 @@ const float log2_values[] = { */ 1.4012984643248171e-45, #endif + -INFINITY, + 0, 1e-007, + 0.1, 0.5, + 0.99, 1, + 1.01, + 1.1, + 1.9, + 1.99, 2, 4, 100000, - 1e+018 + 1e+018, + INFINITY, + NAN +}; + + +static float rcpf(float x) +{ + return 1.0/x; +} + + +const float rcp_values[] = { + -0.0, 0.0, + -1.0, 1.0, + -1e-007, 1e-007, + -4.0, 4.0, + -1e+035, -100000, + 100000, 1e+035, + 5.88e-39f, // denormal +#if (__STDC_VERSION__ >= 199901L) + INFINITY, -INFINITY, +#endif }; static float rsqrtf(float x) { - return 1.0/sqrt(x); + return 1.0/(float)sqrt(x); } const float rsqrt_values[] = { - -1, -1e-007, - 1e-007, 1, - -4, -1, - 1, 4, - -1e+035, -100000, + // http://msdn.microsoft.com/en-us/library/windows/desktop/bb147346.aspx + 0.0, // must yield infinity + 1.0, // must yield 1.0 + 1e-007, 4.0, 100000, 1e+035, + 5.88e-39f, // denormal +#if (__STDC_VERSION__ >= 199901L) + INFINITY, +#endif }; const float sincos_values[] = { + -INFINITY, -5*M_PI/4, -4*M_PI/4, -4*M_PI/4, -3*M_PI/4, -2*M_PI/4, -1*M_PI/4, - 1*M_PI/4, - 2*M_PI/4, - 3*M_PI/4, - 4*M_PI/4, - 5*M_PI/4, + 1*M_PI/4, + 2*M_PI/4, + 3*M_PI/4, + 4*M_PI/4, + 5*M_PI/4, + INFINITY, + NAN +}; + +const float round_values[] = { + -10.0, -1, 0.0, 12.0, + -1.49, -0.25, 1.25, 2.51, + -0.99, -0.01, 0.01, 0.99, + -1.5, -0.5, 0.5, 1.5, + 1.401298464324817e-45f, // smallest denormal + -1.401298464324817e-45f, + 1.62981451e-08f, + -1.62981451e-08f, + 1.62981451e15f, // large number not representable as 32bit int + -1.62981451e15f, + FLT_EPSILON, + -FLT_EPSILON, + 1.0f - 0.5f*FLT_EPSILON, + -1.0f + FLT_EPSILON, + FLT_MAX, + -FLT_MAX +}; + +static float fractf(float x) +{ + x -= floorf(x); + if (x >= 1.0f) { + // clamp to the largest number smaller than one + x = 1.0f - 0.5f*FLT_EPSILON; + } + return x; +} + + +const float fract_values[] = { + // http://en.wikipedia.org/wiki/IEEE_754-1985#Examples + 0.0f, + -0.0f, + 1.0f, + -1.0f, + 0.5f, + -0.5f, + 1.401298464324817e-45f, // smallest denormal + -1.401298464324817e-45f, + 5.88e-39f, // middle denormal + 1.18e-38f, // largest denormal + -1.18e-38f, + -1.62981451e-08f, + FLT_EPSILON, + -FLT_EPSILON, + 1.0f - 0.5f*FLT_EPSILON, + -1.0f + FLT_EPSILON, + FLT_MAX, + -FLT_MAX }; @@ -150,14 +293,25 @@ const float sincos_values[] = { * Unary test cases. */ -static const struct unary_test_t unary_tests[] = { - {"exp2", &lp_build_exp2, &exp2f, exp2_values, Elements(exp2_values)}, - {"log2", &lp_build_log2, &log2f, log2_values, Elements(log2_values)}, - {"exp", &lp_build_exp, &expf, exp2_values, Elements(exp2_values)}, - {"log", &lp_build_log, &logf, log2_values, Elements(log2_values)}, - {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, Elements(rsqrt_values)}, - {"sin", &lp_build_sin, &sinf, sincos_values, Elements(sincos_values)}, - {"cos", &lp_build_cos, &cosf, sincos_values, Elements(sincos_values)}, +static const struct unary_test_t +unary_tests[] = { + {"abs", &lp_build_abs, &fabsf, sgn_values, ARRAY_SIZE(sgn_values), 20.0 }, + {"neg", &lp_build_negate, &negf, sgn_values, ARRAY_SIZE(sgn_values), 20.0 }, + {"sgn", &lp_build_sgn, &sgnf, sgn_values, ARRAY_SIZE(sgn_values), 20.0 }, + {"exp2", &lp_build_exp2, &exp2f, exp2_values, ARRAY_SIZE(exp2_values), 18.0 }, + {"log2", &lp_build_log2_safe, &log2f, log2_values, ARRAY_SIZE(log2_values), 20.0 }, + {"exp", &lp_build_exp, &expf, exp2_values, ARRAY_SIZE(exp2_values), 18.0 }, + {"log", &lp_build_log_safe, &logf, log2_values, ARRAY_SIZE(log2_values), 20.0 }, + {"rcp", &lp_build_rcp, &rcpf, rcp_values, ARRAY_SIZE(rcp_values), 20.0 }, + {"rsqrt", &lp_build_rsqrt, &rsqrtf, rsqrt_values, ARRAY_SIZE(rsqrt_values), 20.0 }, + {"sin", &lp_build_sin, &sinf, sincos_values, ARRAY_SIZE(sincos_values), 20.0 }, + {"cos", &lp_build_cos, &cosf, sincos_values, ARRAY_SIZE(sincos_values), 20.0 }, + {"sgn", &lp_build_sgn, &sgnf, sgn_values, ARRAY_SIZE(sgn_values), 20.0 }, + {"round", &lp_build_round, &nearbyintf, round_values, ARRAY_SIZE(round_values), 24.0 }, + {"trunc", &lp_build_trunc, &truncf, round_values, ARRAY_SIZE(round_values), 24.0 }, + {"floor", &lp_build_floor, &floorf, round_values, ARRAY_SIZE(round_values), 24.0 }, + {"ceil", &lp_build_ceil, &ceilf, round_values, ARRAY_SIZE(round_values), 24.0 }, + {"fract", &lp_build_fract_safe, &fractf, fract_values, ARRAY_SIZE(fract_values), 24.0 }, }; @@ -166,108 +320,197 @@ static const struct unary_test_t unary_tests[] = { */ static LLVMValueRef build_unary_test_func(struct gallivm_state *gallivm, - LLVMModuleRef module, - LLVMContextRef context, - const struct unary_test_t *test) + const struct unary_test_t *test, + unsigned length, + const char *test_name) { - LLVMTypeRef i32t = LLVMInt32TypeInContext(context); - LLVMTypeRef f32t = LLVMFloatTypeInContext(context); - LLVMTypeRef v4f32t = LLVMVectorType(f32t, 4); - LLVMTypeRef args[1] = { f32t }; - LLVMValueRef func = LLVMAddFunction(module, test->name, LLVMFunctionType(f32t, args, Elements(args), 0)); - LLVMValueRef arg1 = LLVMGetParam(func, 0); + struct lp_type type = lp_type_float_vec(32, length * 32); + LLVMContextRef context = gallivm->context; + LLVMModuleRef module = gallivm->module; + LLVMTypeRef vf32t = lp_build_vec_type(gallivm, type); + LLVMTypeRef args[2] = { LLVMPointerType(vf32t, 0), LLVMPointerType(vf32t, 0) }; + LLVMValueRef func = LLVMAddFunction(module, test_name, + LLVMFunctionType(LLVMVoidTypeInContext(context), + args, ARRAY_SIZE(args), 0)); + LLVMValueRef arg0 = LLVMGetParam(func, 0); + LLVMValueRef arg1 = LLVMGetParam(func, 1); LLVMBuilderRef builder = gallivm->builder; LLVMBasicBlockRef block = LLVMAppendBasicBlockInContext(context, func, "entry"); - LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); LLVMValueRef ret; struct lp_build_context bld; - lp_build_context_init(&bld, gallivm, lp_float32_vec4_type()); + lp_build_context_init(&bld, gallivm, type); LLVMSetFunctionCallConv(func, LLVMCCallConv); LLVMPositionBuilderAtEnd(builder, block); - /* scalar to vector */ - arg1 = LLVMBuildInsertElement(builder, LLVMGetUndef(v4f32t), arg1, index0, ""); + arg1 = LLVMBuildLoad(builder, arg1, ""); ret = test->builder(&bld, arg1); - /* vector to scalar */ - ret = LLVMBuildExtractElement(builder, ret, index0, ""); + LLVMBuildStore(builder, ret, arg0); + + LLVMBuildRetVoid(builder); + + gallivm_verify_function(gallivm, func); - LLVMBuildRet(builder, ret); return func; } +/* + * Flush denorms to zero. + */ +static float +flush_denorm_to_zero(float val) +{ + /* + * If we have a denorm manually set it to (+-)0. + * This is because the reference may or may not do the right thing + * otherwise because we want the result according to treating all + * denormals as zero (FTZ/DAZ). Not using fpclassify because + * a) some compilers are stuck at c89 (msvc) + * b) not sure it reliably works with non-standard ftz/daz mode + * And, right now we only disable denorms with jited code on x86/sse + * (albeit this should be classified as a bug) so to get results which + * match we must only flush them to zero here in that case too. + */ + union fi fi_val; + + fi_val.f = val; + +#if defined(PIPE_ARCH_SSE) + if (util_cpu_caps.has_sse) { + if ((fi_val.ui & 0x7f800000) == 0) { + fi_val.ui &= 0xff800000; + } + } +#endif + + return fi_val.f; +} + /* * Test one LLVM unary arithmetic builder function. */ static boolean -test_unary(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, const struct unary_test_t *test) +test_unary(unsigned verbose, FILE *fp, const struct unary_test_t *test, unsigned length) { - LLVMModuleRef module = gallivm->module; + char test_name[128]; + snprintf(test_name, sizeof test_name, "%s.v%u", test->name, length); + LLVMContextRef context; + struct gallivm_state *gallivm; LLVMValueRef test_func; - LLVMExecutionEngineRef engine = gallivm->engine; - LLVMContextRef context = gallivm->context; - char *error = NULL; unary_func_t test_func_jit; boolean success = TRUE; - int i; + int i, j; + float *in, *out; - test_func = build_unary_test_func(gallivm, module, context, test); + in = align_malloc(length * 4, length * 4); + out = align_malloc(length * 4, length * 4); - if (LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - printf("LLVMVerifyModule: %s\n", error); - LLVMDumpModule(module); - abort(); + /* random NaNs or 0s could wreak havoc */ + for (i = 0; i < length; i++) { + in[i] = 1.0; } - LLVMDisposeMessage(error); - test_func_jit = (unary_func_t) pointer_to_func(LLVMGetPointerToGlobal(engine, test_func)); + context = LLVMContextCreate(); + gallivm = gallivm_create("test_module", context); - for (i = 0; i < test->num_values; ++i) { - float value = test->values[i]; - float ref = test->ref(value); - float src = test_func_jit(value); + test_func = build_unary_test_func(gallivm, test, length, test_name); - double error = fabs(src - ref); - double precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + gallivm_compile_module(gallivm); - bool pass = precision >= 20.0; + test_func_jit = (unary_func_t) gallivm_jit_function(gallivm, test_func); - if (isnan(ref)) { - continue; - } + gallivm_free_ir(gallivm); + + for (j = 0; j < (test->num_values + length - 1) / length; j++) { + int num_vals = ((j + 1) * length <= test->num_values) ? length : + test->num_values % length; - if (!pass || verbose) { - printf("%s(%.9g): ref = %.9g, src = %.9g, precision = %f bits, %s\n", - test->name, value, ref, src, precision, - pass ? "PASS" : "FAIL"); + for (i = 0; i < num_vals; ++i) { + in[i] = test->values[i+j*length]; } - if (!pass) { - success = FALSE; + test_func_jit(out, in); + for (i = 0; i < num_vals; ++i) { + float testval, ref; + double error, precision; + boolean expected_pass = TRUE; + bool pass; + + testval = flush_denorm_to_zero(in[i]); + ref = flush_denorm_to_zero(test->ref(testval)); + + if (util_inf_sign(ref) && util_inf_sign(out[i]) == util_inf_sign(ref)) { + error = 0; + } else { + error = fabs(out[i] - ref); + } + precision = error ? -log2(error/fabs(ref)) : FLT_MANT_DIG; + + pass = precision >= test->precision; + + if (isnan(ref)) { + continue; + } + + if (!util_cpu_caps.has_neon && + test->ref == &nearbyintf && length == 2 && + ref != roundf(testval)) { + /* FIXME: The generic (non SSE) path in lp_build_iround, which is + * always taken for length==2 regardless of native round support, + * does not round to even. */ + expected_pass = FALSE; + } + + if (test->ref == &expf && util_inf_sign(testval) == -1) { + /* XXX: 64bits MSVCRT's expf(-inf) returns -inf instead of 0 */ +#if defined(_MSC_VER) && defined(_WIN64) + expected_pass = FALSE; +#endif + } + + if (pass != expected_pass || verbose) { + printf("%s(%.9g): ref = %.9g, out = %.9g, precision = %f bits, %s%s\n", + test_name, in[i], ref, out[i], precision, + pass ? "PASS" : "FAIL", + !expected_pass ? (pass ? " (unexpected)" : " (expected)" ): ""); + fflush(stdout); + } + + if (pass != expected_pass) { + success = FALSE; + } } } - LLVMFreeMachineCodeForFunction(engine, test_func); + gallivm_destroy(gallivm); + LLVMContextDispose(context); + + align_free(in); + align_free(out); return success; } boolean -test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_all(unsigned verbose, FILE *fp) { boolean success = TRUE; int i; - for (i = 0; i < Elements(unary_tests); ++i) { - if (!test_unary(gallivm, verbose, fp, &unary_tests[i])) { - success = FALSE; + for (i = 0; i < ARRAY_SIZE(unary_tests); ++i) { + unsigned max_length = lp_native_vector_width / 32; + unsigned length; + for (length = 1; length <= max_length; length *= 2) { + if (!test_unary(verbose, fp, &unary_tests[i], length)) { + success = FALSE; + } } } @@ -276,19 +519,19 @@ test_all(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) boolean -test_some(struct gallivm_state *gallivm, unsigned verbose, FILE *fp, +test_some(unsigned verbose, FILE *fp, unsigned long n) { /* * Not randomly generated test cases, so test all. */ - return test_all(gallivm, verbose, fp); + return test_all(verbose, fp); } boolean -test_single(struct gallivm_state *gallivm, unsigned verbose, FILE *fp) +test_single(unsigned verbose, FILE *fp) { return TRUE; }