From: Markus Trippelsdorf Date: Sat, 16 Dec 2017 04:28:08 +0000 (+0000) Subject: re PR target/83358 (division not converted with Intel tuning since r253934) X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=02308bd3ec458762af1109d0ca6d2be757d555a0;p=gcc.git re PR target/83358 (division not converted with Intel tuning since r253934) 2017-12-15 Markus Trippelsdorf PR target/83358 * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase div/mod latencies a bit. PR target/83358 * gcc.target/i386/pr83358-1.c: New test. * gcc.target/i386/pr83358-2.c: New test. From-SVN: r255739 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4528b6db5f2..c2d037a9f8d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2017-12-15 Markus Trippelsdorf + + PR target/83358 + * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase + div/mod latencies a bit. + 2017-12-15 Jeff Law PR tree-optimization/36550 diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 312467d9788..64821933830 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1541,9 +1541,11 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (4), /* DI */ COSTS_N_INSNS (4)}, /* other */ 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (8), /* HI */ - COSTS_N_INSNS (11), /* SI */ + /* Expanding div/mod currently doesn't consider parallelism. So the cost + model is not realistic. We compensate by increasing the latencies a bit. */ + {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (14), /* SI */ COSTS_N_INSNS (76), /* DI */ COSTS_N_INSNS (76)}, /* other */ COSTS_N_INSNS (1), /* cost of movsx */ @@ -2342,11 +2344,11 @@ struct processor_costs core_cost = { COSTS_N_INSNS (4), /* DI */ COSTS_N_INSNS (4)}, /* other */ 0, /* cost of multiply per each bit set */ - {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */ - COSTS_N_INSNS (8), /* HI */ - /* 8-11 */ - COSTS_N_INSNS (11), /* SI */ - /* 24-81 */ + /* Expanding div/mod currently doesn't consider parallelism. So the cost + model is not realistic. We compensate by increasing the latencies a bit. */ + {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (11), /* HI */ + COSTS_N_INSNS (14), /* SI */ COSTS_N_INSNS (81), /* DI */ COSTS_N_INSNS (81)}, /* other */ COSTS_N_INSNS (1), /* cost of movsx */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5c91661c71b..10dc8b2ef19 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2017-12-15 Markus Trippelsdorf + + PR target/83358 + * gcc.target/i386/pr83358-1.c: New test. + * gcc.target/i386/pr83358-2.c: New test. + 2017-12-15 Jeff Law PR tree-optimization/36550 diff --git a/gcc/testsuite/gcc.target/i386/pr83358-1.c b/gcc/testsuite/gcc.target/i386/pr83358-1.c new file mode 100644 index 00000000000..96427b2f56d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83358-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=core2" } */ + +#include + +void bin2ascii(uint64_t val, char *dst) { + const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000; + int64_t hix = val / POW10_10; + int64_t lox = val % POW10_10; + int32_t v0 = hix / 100000; + int32_t v1 = hix % 100000; + int32_t v2 = lox / 100000; + int32_t v3 = lox % 100000; + for (int i = 4; i != 0; --i) { + dst[i + 0 * 5] = v0 % 10 + '0'; + v0 /= 10; + dst[i + 1 * 5] = v1 % 10 + '0'; + v1 /= 10; + dst[i + 2 * 5] = v2 % 10 + '0'; + v2 /= 10; + dst[i + 3 * 5] = v3 % 10 + '0'; + v3 /= 10; + } + dst[0 * 5] = v0 + '0'; + dst[1 * 5] = v1 + '0'; + dst[2 * 5] = v2 + '0'; + dst[3 * 5] = v3 + '0'; + dst[4 * 5] = 0; +} + +/* { dg-final { scan-assembler-not "idiv" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr83358-2.c b/gcc/testsuite/gcc.target/i386/pr83358-2.c new file mode 100644 index 00000000000..f6039bf72fe --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr83358-2.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=skylake-avx512" } */ + +#include + +void bin2ascii(uint64_t val, char *dst) { + const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000; + int64_t hix = val / POW10_10; + int64_t lox = val % POW10_10; + int32_t v0 = hix / 100000; + int32_t v1 = hix % 100000; + int32_t v2 = lox / 100000; + int32_t v3 = lox % 100000; + for (int i = 4; i != 0; --i) { + dst[i + 0 * 5] = v0 % 10 + '0'; + v0 /= 10; + dst[i + 1 * 5] = v1 % 10 + '0'; + v1 /= 10; + dst[i + 2 * 5] = v2 % 10 + '0'; + v2 /= 10; + dst[i + 3 * 5] = v3 % 10 + '0'; + v3 /= 10; + } + dst[0 * 5] = v0 + '0'; + dst[1 * 5] = v1 + '0'; + dst[2 * 5] = v2 + '0'; + dst[3 * 5] = v3 + '0'; + dst[4 * 5] = 0; +} + +/* { dg-final { scan-assembler-not "idiv" } } */