+2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
+
+ PR target/83358
+ * config/i386/x86-tune-costs.h (skylake_cost, core_cost): Increase
+ div/mod latencies a bit.
+
2017-12-15 Jeff Law <law@redhat.com>
PR tree-optimization/36550
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- COSTS_N_INSNS (11), /* SI */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (76), /* DI */
COSTS_N_INSNS (76)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
COSTS_N_INSNS (4), /* DI */
COSTS_N_INSNS (4)}, /* other */
0, /* cost of multiply per each bit set */
- {COSTS_N_INSNS (8), /* cost of a divide/mod for QI */
- COSTS_N_INSNS (8), /* HI */
- /* 8-11 */
- COSTS_N_INSNS (11), /* SI */
- /* 24-81 */
+ /* Expanding div/mod currently doesn't consider parallelism. So the cost
+ model is not realistic. We compensate by increasing the latencies a bit. */
+ {COSTS_N_INSNS (11), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (11), /* HI */
+ COSTS_N_INSNS (14), /* SI */
COSTS_N_INSNS (81), /* DI */
COSTS_N_INSNS (81)}, /* other */
COSTS_N_INSNS (1), /* cost of movsx */
+2017-12-15 Markus Trippelsdorf <markus@trippelsdorf.de>
+
+ PR target/83358
+ * gcc.target/i386/pr83358-1.c: New test.
+ * gcc.target/i386/pr83358-2.c: New test.
+
2017-12-15 Jeff Law <law@redhat.com>
PR tree-optimization/36550
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=core2" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=skylake-avx512" } */
+
+#include <stdint.h>
+
+void bin2ascii(uint64_t val, char *dst) {
+ const int64_t POW10_10 = ((int64_t)10) * 1000 * 1000 * 1000;
+ int64_t hix = val / POW10_10;
+ int64_t lox = val % POW10_10;
+ int32_t v0 = hix / 100000;
+ int32_t v1 = hix % 100000;
+ int32_t v2 = lox / 100000;
+ int32_t v3 = lox % 100000;
+ for (int i = 4; i != 0; --i) {
+ dst[i + 0 * 5] = v0 % 10 + '0';
+ v0 /= 10;
+ dst[i + 1 * 5] = v1 % 10 + '0';
+ v1 /= 10;
+ dst[i + 2 * 5] = v2 % 10 + '0';
+ v2 /= 10;
+ dst[i + 3 * 5] = v3 % 10 + '0';
+ v3 /= 10;
+ }
+ dst[0 * 5] = v0 + '0';
+ dst[1 * 5] = v1 + '0';
+ dst[2 * 5] = v2 + '0';
+ dst[3 * 5] = v3 + '0';
+ dst[4 * 5] = 0;
+}
+
+/* { dg-final { scan-assembler-not "idiv" } } */