From c1d56e6a737418b7b48a53b355f588ea14f1b9a9 Mon Sep 17 00:00:00 2001 From: Iain Buclaw Date: Thu, 4 Feb 2021 22:34:22 +0100 Subject: [PATCH] d: Merge upstream dmd 46133f761, druntime 0fd4364c D front-end changes: - Backported built-in function handling from upstream. - Added new intrinsic `byteswap(ushort)`. Druntime changes: - Update intrinsic modules core.bitop, core.checkedint, core.simd, core.vararg, and core.volatile. - Backport platform-specific fixes for runtime modules core.cpuid, core.internal.traits, and rt.lifetime. - Backport openbsd fixes for core.stdc.stdio. - Backport solaris fixes for core.sys.posix.locale, and core.thread.osthread (PR98910). gcc/d/ChangeLog: * dmd/MERGE: Merge upstream dmd 46133f761. * d-builtins.cc (d_build_builtins_module): Set builtins as BUILTINgcc. (maybe_set_builtin_1): Likewise. * d-frontend.cc (eval_builtin): Adjust condition for early return. * intrinsics.cc (maybe_set_intrinsic): Set intrinsics as BUILTINgcc. (maybe_expand_intrinsic): Add case for INTRINSIC_BSWAP16. * intrinsics.def (INTRINSIC_BT): Update signature. (INTRINSIC_BT64): Likewise. (INTRINSIC_BSWAP16): New intrinsic. (INTRINSIC_VLOAD8): Update module. (INTRINSIC_VLOAD16): Likewise. (INTRINSIC_VLOAD32): Likewise. (INTRINSIC_VLOAD64): Likewise. (INTRINSIC_VSTORE8): Likewise. (INTRINSIC_VSTORE16): Likewise. (INTRINSIC_VSTORE32): Likewise. (INTRINSIC_VSTORE64): Likewise. (INTRINSIC_ADDS): Update signature. (INTRINSIC_ADDSL): Likewise. (INTRINSIC_ADDU): Likewise. (INTRINSIC_ADDUL): Likewise. (INTRINSIC_SUBS): Likewise. (INTRINSIC_SUBSL): Likewise. (INTRINSIC_SUBU): Likewise. (INTRINSIC_SUBUL): Likewise. (INTRINSIC_MULS): Likewise. (INTRINSIC_MULSL): Likewise. (INTRINSIC_MULU): Likewise. (INTRINSIC_MULUI): Likewise. (INTRINSIC_MULUL): Likewise. (INTRINSIC_NEGS): Likewise. (INTRINSIC_NEGSL): Likewise. libphobos/ChangeLog: PR d/98910 * libdruntime/MERGE: Merge upstream druntime 0fd4364c. * libdruntime/Makefile.am (DRUNTIME_DSOURCES): Add core/volatile.d. * libdruntime/Makefile.in: Regenerate. * testsuite/libphobos.allocations/tls_gc_integration.d: Update test. gcc/testsuite/ChangeLog: * gdc.dg/intrinsics.d: Update test. --- gcc/d/d-builtins.cc | 4 +- gcc/d/d-frontend.cc | 2 +- gcc/d/dmd/MERGE | 2 +- gcc/d/dmd/declaration.h | 40 +- gcc/d/dmd/dinterpret.c | 2 +- gcc/d/dmd/idgen.c | 38 + gcc/d/dmd/root/ctfloat.h | 18 + gcc/d/intrinsics.cc | 7 +- gcc/d/intrinsics.def | 53 +- gcc/testsuite/gdc.dg/intrinsics.d | 3 + libphobos/libdruntime/MERGE | 2 +- libphobos/libdruntime/Makefile.am | 31 +- libphobos/libdruntime/Makefile.in | 48 +- libphobos/libdruntime/core/bitop.d | 145 ++- libphobos/libdruntime/core/checkedint.d | 48 +- libphobos/libdruntime/core/cpuid.d | 69 +- libphobos/libdruntime/core/internal/traits.d | 152 ++- libphobos/libdruntime/core/simd.d | 1096 ++++++++++------- libphobos/libdruntime/core/stdc/stdio.d | 2 +- libphobos/libdruntime/core/sys/posix/locale.d | 28 + libphobos/libdruntime/core/thread/osthread.d | 8 - libphobos/libdruntime/core/vararg.d | 122 ++ libphobos/libdruntime/core/volatile.d | 67 + libphobos/libdruntime/rt/lifetime.d | 109 +- .../tls_gc_integration.d | 2 +- 25 files changed, 1329 insertions(+), 769 deletions(-) create mode 100644 libphobos/libdruntime/core/volatile.d diff --git a/gcc/d/d-builtins.cc b/gcc/d/d-builtins.cc index 3f1533b592f..c45edc2fa3f 100644 --- a/gcc/d/d-builtins.cc +++ b/gcc/d/d-builtins.cc @@ -566,7 +566,7 @@ d_build_builtins_module (Module *m) STCextern, tf); DECL_LANG_SPECIFIC (decl) = build_lang_decl (func); func->csym = decl; - func->builtin = BUILTINyes; + func->builtin = BUILTINgcc; members->push (func); } @@ -706,7 +706,7 @@ maybe_set_builtin_1 (Dsymbol *d) /* Found a match, tell the frontend this is a builtin. */ DECL_LANG_SPECIFIC (t) = build_lang_decl (fd); fd->csym = t; - fd->builtin = BUILTINyes; + fd->builtin = BUILTINgcc; return; } } diff --git a/gcc/d/d-frontend.cc b/gcc/d/d-frontend.cc index 32550ecfd64..84c70f8ee6a 100644 --- a/gcc/d/d-frontend.cc +++ b/gcc/d/d-frontend.cc @@ -158,7 +158,7 @@ isBuiltin (FuncDeclaration *fd) Expression * eval_builtin (Loc loc, FuncDeclaration *fd, Expressions *arguments) { - if (fd->builtin != BUILTINyes) + if (fd->builtin == BUILTINunimp) return NULL; tree decl = get_symbol_decl (fd); diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index 342871f9a1a..89397c836e2 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -5e2a81d9cbcd653d9eed52344d664e72ba1355bc +46133f76172c26c89e2ebf9cd058cd1f1e8807ed The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/declaration.h b/gcc/d/dmd/declaration.h index a464f9beb71..6a3ec9e7d5c 100644 --- a/gcc/d/dmd/declaration.h +++ b/gcc/d/dmd/declaration.h @@ -503,9 +503,43 @@ enum ILS enum BUILTIN { - BUILTINunknown = -1, // not known if this is a builtin - BUILTINno, // this is not a builtin - BUILTINyes // this is a builtin + BUILTINunknown = 255, /// not known if this is a builtin + BUILTINunimp = 0, /// this is not a builtin + BUILTINgcc, /// this is a GCC builtin + BUILTINllvm, /// this is an LLVM builtin + BUILTINsin, + BUILTINcos, + BUILTINtan, + BUILTINsqrt, + BUILTINfabs, + BUILTINldexp, + BUILTINlog, + BUILTINlog2, + BUILTINlog10, + BUILTINexp, + BUILTINexpm1, + BUILTINexp2, + BUILTINround, + BUILTINfloor, + BUILTINceil, + BUILTINtrunc, + BUILTINcopysign, + BUILTINpow, + BUILTINfmin, + BUILTINfmax, + BUILTINfma, + BUILTINisnan, + BUILTINisinfinity, + BUILTINisfinite, + BUILTINbsf, + BUILTINbsr, + BUILTINbswap, + BUILTINpopcnt, + BUILTINyl2x, + BUILTINyl2xp1, + BUILTINtoPrecFloat, + BUILTINtoPrecDouble, + BUILTINtoPrecReal }; Expression *eval_builtin(Loc loc, FuncDeclaration *fd, Expressions *arguments); diff --git a/gcc/d/dmd/dinterpret.c b/gcc/d/dmd/dinterpret.c index 74c5b40741f..f868790f9e7 100644 --- a/gcc/d/dmd/dinterpret.c +++ b/gcc/d/dmd/dinterpret.c @@ -6801,7 +6801,7 @@ Expression *evaluateIfBuiltin(UnionExp *pue, InterState *istate, Loc loc, size_t nargs = arguments ? arguments->length : 0; if (!pthis) { - if (isBuiltin(fd) == BUILTINyes) + if (isBuiltin(fd) != BUILTINunimp) { Expressions args; args.setDim(nargs); diff --git a/gcc/d/dmd/idgen.c b/gcc/d/dmd/idgen.c index 09855a05688..322a293cf09 100644 --- a/gcc/d/dmd/idgen.c +++ b/gcc/d/dmd/idgen.c @@ -291,6 +291,8 @@ Msgtable msgtable[] = { "entrypoint", "__entrypoint" }, // varargs implementation + { "stdc", NULL }, + { "stdarg", NULL }, { "va_start", NULL }, // Builtin functions @@ -304,16 +306,52 @@ Msgtable msgtable[] = { "_sqrt", "sqrt" }, { "_pow", "pow" }, { "atan2", NULL }, + { "rint", NULL }, + { "ldexp", NULL }, { "rndtol", NULL }, + { "exp", NULL }, { "expm1", NULL }, { "exp2", NULL }, { "yl2x", NULL }, { "yl2xp1", NULL }, + { "log", NULL }, + { "log2", NULL }, + { "log10", NULL }, + { "round", NULL }, + { "floor", NULL }, + { "trunc", NULL }, + { "fmax", NULL }, + { "fmin", NULL }, + { "fma", NULL }, + { "isnan", NULL }, + { "isInfinity", NULL }, + { "isfinite", NULL }, + { "ceil", NULL }, + { "copysign", NULL }, { "fabs", NULL }, + { "toPrec", NULL }, + { "simd", NULL }, + { "__prefetch", NULL }, + { "__simd_sto", NULL }, + { "__simd", NULL }, + { "__simd_ib", NULL }, { "bitop", NULL }, { "bsf", NULL }, { "bsr", NULL }, + { "btc", NULL }, + { "btr", NULL }, + { "bts", NULL }, { "bswap", NULL }, + { "_volatile", "volatile" }, + { "volatileLoad", NULL }, + { "volatileStore", NULL }, + { "_popcnt", NULL }, + { "inp", NULL }, + { "inpl", NULL }, + { "inpw", NULL }, + { "outp", NULL }, + { "outpl", NULL }, + { "outpw", NULL }, // Traits { "isAbstractClass", NULL }, diff --git a/gcc/d/dmd/root/ctfloat.h b/gcc/d/dmd/root/ctfloat.h index c72b8fc2062..4cdf362e598 100644 --- a/gcc/d/dmd/root/ctfloat.h +++ b/gcc/d/dmd/root/ctfloat.h @@ -29,6 +29,24 @@ struct CTFloat static real_t fabs(real_t x); static real_t ldexp(real_t n, int exp); + static real_t round(real_t x); + static real_t floor(real_t x); + static real_t ceil(real_t x); + static real_t trunc(real_t x); + static real_t log(real_t x); + static real_t log2(real_t x); + static real_t log10(real_t x); + static real_t pow(real_t x, real_t y); + static real_t exp(real_t x); + static real_t expm1(real_t x); + static real_t exp2(real_t x); + + static real_t fmin(real_t x, real_t y); + static real_t fmax(real_t x, real_t y); + static real_t copysign(real_t x, real_t s); + + static real_t fma(real_t x, real_t y, real_t z); + static bool isIdentical(real_t a, real_t b); static bool isNaN(real_t r); static bool isSNaN(real_t r); diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc index 70c98500085..7f97c1d1fa8 100644 --- a/gcc/d/intrinsics.cc +++ b/gcc/d/intrinsics.cc @@ -81,7 +81,7 @@ maybe_set_intrinsic (FuncDeclaration *decl) /* The builtin flag is updated only if we can evaluate the intrinsic at compile-time. Such as the math or bitop intrinsics. */ - decl->builtin = BUILTINno; + decl->builtin = BUILTINunimp; /* Check if it's a compiler intrinsic. We only require that any internally recognised intrinsics are declared in a module with @@ -177,12 +177,12 @@ maybe_set_intrinsic (FuncDeclaration *decl) built-in function. It could be `int pow(int, int)'. */ tree rettype = TREE_TYPE (TREE_TYPE (decl->csym)); if (mathfn_built_in (rettype, BUILT_IN_POW) != NULL_TREE) - decl->builtin = BUILTINyes; + decl->builtin = BUILTINgcc; break; } default: - decl->builtin = BUILTINyes; + decl->builtin = BUILTINgcc; break; } @@ -809,6 +809,7 @@ maybe_expand_intrinsic (tree callexp) case INTRINSIC_ROR_TIARG: return expand_intrinsic_rotate (intrinsic, callexp); + case INTRINSIC_BSWAP16: case INTRINSIC_BSWAP32: case INTRINSIC_BSWAP64: case INTRINSIC_CEIL: diff --git a/gcc/d/intrinsics.def b/gcc/d/intrinsics.def index e44843e77ff..dc6b104f6d5 100644 --- a/gcc/d/intrinsics.def +++ b/gcc/d/intrinsics.def @@ -42,17 +42,18 @@ DEF_D_BUILTIN (NONE, NONE, 0, 0, 0) DEF_D_BUILTIN (BSF, NONE, "bsf", "core.bitop", "FNaNbNiNfkZi") DEF_D_BUILTIN (BSR, NONE, "bsr", "core.bitop", "FNaNbNiNfkZi") -DEF_D_BUILTIN (BT, NONE, "bt", "core.bitop", "FNaNbNixPkkZi") +DEF_D_BUILTIN (BT, NONE, "bt", "core.bitop", "FNaNbNiMxPkkZi") DEF_D_BUILTIN (BTC, NONE, "btc", "core.bitop", "FNaNbNiPkkZi") DEF_D_BUILTIN (BTR, NONE, "btr", "core.bitop", "FNaNbNiPkkZi") DEF_D_BUILTIN (BTS, NONE, "bts", "core.bitop", "FNaNbNiPkkZi") DEF_D_BUILTIN (BSF64, NONE, "bsf", "core.bitop", "FNaNbNiNfmZi") DEF_D_BUILTIN (BSR64, NONE, "bsr", "core.bitop", "FNaNbNiNfmZi") -DEF_D_BUILTIN (BT64, NONE, "bt", "core.bitop", "FNaNbNixPmmZi") +DEF_D_BUILTIN (BT64, NONE, "bt", "core.bitop", "FNaNbNiMxPmmZi") DEF_D_BUILTIN (BTC64, NONE, "btc", "core.bitop", "FNaNbNiPmmZi") DEF_D_BUILTIN (BTR64, NONE, "btr", "core.bitop", "FNaNbNiPmmZi") DEF_D_BUILTIN (BTS64, NONE, "bts", "core.bitop", "FNaNbNiPmmZi") +DEF_D_BUILTIN (BSWAP16, BSWAP16, "byteswap", "core.bitop", "FNaNbNiNftZt") DEF_D_BUILTIN (BSWAP32, BSWAP32, "bswap", "core.bitop", "FNaNbNiNfkZk") DEF_D_BUILTIN (BSWAP64, BSWAP64, "bswap", "core.bitop", "FNaNbNiNfmZm") @@ -64,32 +65,34 @@ DEF_D_BUILTIN (ROL_TIARG, NONE, "rol", "core.bitop", "FNaI1TZI1T") DEF_D_BUILTIN (ROR, NONE, "ror", "core.bitop", "FNaI1TkZI1T") DEF_D_BUILTIN (ROR_TIARG, NONE, "ror", "core.bitop", "FNaI1TZI1T") -DEF_D_BUILTIN (VLOAD8, NONE, "volatileLoad", "core.bitop", "FNbNiNfPhZh") -DEF_D_BUILTIN (VLOAD16, NONE, "volatileLoad", "core.bitop", "FNbNiNfPtZt") -DEF_D_BUILTIN (VLOAD32, NONE, "volatileLoad", "core.bitop", "FNbNiNfPkZk") -DEF_D_BUILTIN (VLOAD64, NONE, "volatileLoad", "core.bitop", "FNbNiNfPmZm") -DEF_D_BUILTIN (VSTORE8, NONE, "volatileStore", "core.bitop", "FNbNiNfPhhZv") -DEF_D_BUILTIN (VSTORE16, NONE, "volatileStore", "core.bitop", "FNbNiNfPttZv") -DEF_D_BUILTIN (VSTORE32, NONE, "volatileStore", "core.bitop", "FNbNiNfPkkZv") -DEF_D_BUILTIN (VSTORE64, NONE, "volatileStore", "core.bitop", "FNbNiNfPmmZv") +/* core.volatile intrinsics. */ + +DEF_D_BUILTIN (VLOAD8, NONE, "volatileLoad", "core.volatile", "FNbNiNfPhZh") +DEF_D_BUILTIN (VLOAD16, NONE, "volatileLoad", "core.volatile", "FNbNiNfPtZt") +DEF_D_BUILTIN (VLOAD32, NONE, "volatileLoad", "core.volatile", "FNbNiNfPkZk") +DEF_D_BUILTIN (VLOAD64, NONE, "volatileLoad", "core.volatile", "FNbNiNfPmZm") +DEF_D_BUILTIN (VSTORE8, NONE, "volatileStore", "core.volatile", "FNbNiNfPhhZv") +DEF_D_BUILTIN (VSTORE16, NONE, "volatileStore", "core.volatile", "FNbNiNfPttZv") +DEF_D_BUILTIN (VSTORE32, NONE, "volatileStore", "core.volatile", "FNbNiNfPkkZv") +DEF_D_BUILTIN (VSTORE64, NONE, "volatileStore", "core.volatile", "FNbNiNfPmmZv") /* core.checkedint intrinsics. */ -DEF_D_BUILTIN (ADDS, NONE, "adds", "core.checkedint", "FNaNbNiNfiiKbZi") -DEF_D_BUILTIN (ADDSL, NONE, "adds", "core.checkedint", "FNaNbNiNfllKbZl") -DEF_D_BUILTIN (ADDU, NONE, "addu", "core.checkedint", "FNaNbNiNfkkKbZk") -DEF_D_BUILTIN (ADDUL, NONE, "addu", "core.checkedint", "FNaNbNiNfmmKbZm") -DEF_D_BUILTIN (SUBS, NONE, "subs", "core.checkedint", "FNaNbNiNfiiKbZi") -DEF_D_BUILTIN (SUBSL, NONE, "subs", "core.checkedint", "FNaNbNiNfllKbZl") -DEF_D_BUILTIN (SUBU, NONE, "subu", "core.checkedint", "FNaNbNiNfkkKbZk") -DEF_D_BUILTIN (SUBUL, NONE, "subu", "core.checkedint", "FNaNbNiNfmmKbZm") -DEF_D_BUILTIN (MULS, NONE, "muls", "core.checkedint", "FNaNbNiNfiiKbZi") -DEF_D_BUILTIN (MULSL, NONE, "muls", "core.checkedint", "FNaNbNiNfllKbZl") -DEF_D_BUILTIN (MULU, NONE, "mulu", "core.checkedint", "FNaNbNiNfkkKbZk") -DEF_D_BUILTIN (MULUI, NONE, "mulu", "core.checkedint", "FNaNbNiNfmkKbZm") -DEF_D_BUILTIN (MULUL, NONE, "mulu", "core.checkedint", "FNaNbNiNfmmKbZm") -DEF_D_BUILTIN (NEGS, NONE, "negs", "core.checkedint", "FNaNbNiNfiKbZi") -DEF_D_BUILTIN (NEGSL, NONE, "negs", "core.checkedint", "FNaNbNiNflKbZl") +DEF_D_BUILTIN (ADDS, NONE, "adds", "core.checkedint", "FiiKbZi") +DEF_D_BUILTIN (ADDSL, NONE, "adds", "core.checkedint", "FllKbZl") +DEF_D_BUILTIN (ADDU, NONE, "addu", "core.checkedint", "FkkKbZk") +DEF_D_BUILTIN (ADDUL, NONE, "addu", "core.checkedint", "FmmKbZm") +DEF_D_BUILTIN (SUBS, NONE, "subs", "core.checkedint", "FiiKbZi") +DEF_D_BUILTIN (SUBSL, NONE, "subs", "core.checkedint", "FllKbZl") +DEF_D_BUILTIN (SUBU, NONE, "subu", "core.checkedint", "FkkKbZk") +DEF_D_BUILTIN (SUBUL, NONE, "subu", "core.checkedint", "FmmKbZm") +DEF_D_BUILTIN (MULS, NONE, "muls", "core.checkedint", "FiiKbZi") +DEF_D_BUILTIN (MULSL, NONE, "muls", "core.checkedint", "FllKbZl") +DEF_D_BUILTIN (MULU, NONE, "mulu", "core.checkedint", "FkkKbZk") +DEF_D_BUILTIN (MULUI, NONE, "mulu", "core.checkedint", "FmkKbZm") +DEF_D_BUILTIN (MULUL, NONE, "mulu", "core.checkedint", "FmmKbZm") +DEF_D_BUILTIN (NEGS, NONE, "negs", "core.checkedint", "FiKbZi") +DEF_D_BUILTIN (NEGSL, NONE, "negs", "core.checkedint", "FlKbZl") /* core.math intrinsics. */ diff --git a/gcc/testsuite/gdc.dg/intrinsics.d b/gcc/testsuite/gdc.dg/intrinsics.d index a7752370582..d9ccc0ec5ce 100644 --- a/gcc/testsuite/gdc.dg/intrinsics.d +++ b/gcc/testsuite/gdc.dg/intrinsics.d @@ -3,6 +3,7 @@ import core.bitop; import core.checkedint; import core.math; +import core.volatile; import core.stdc.stdarg; ////////////////////////////////////////////////////// @@ -24,6 +25,8 @@ int test_btc(size_t *a, size_t b) { return btc(a, b); } int test_btr(size_t *a, size_t b) { return btr(a, b); } // { dg-final { scan-tree-dump-not " = bts " "original" } } int test_bts(size_t *a, size_t b) { return bts(a, b); } +// { dg-final { scan-tree-dump " __builtin_bswap16 " "original" } } +ushort test_byteswap(ushort a) { return byteswap(a); } // { dg-final { scan-tree-dump " __builtin_bswap32 " "original" } } uint test_bswap(uint a) { return bswap(a); } // { dg-final { scan-tree-dump " __builtin_bswap64 " "original" } } diff --git a/libphobos/libdruntime/MERGE b/libphobos/libdruntime/MERGE index 3485bde1200..a4a9a940baf 100644 --- a/libphobos/libdruntime/MERGE +++ b/libphobos/libdruntime/MERGE @@ -1,4 +1,4 @@ -9d0c8364450064d0b6e68da4384f8acd19eb454f +0fd4364c4a4eb2ce0ebb8f613092c5bed7a63bf9 The first line of this file holds the git revision number of the last merge done from the dlang/druntime repository. diff --git a/libphobos/libdruntime/Makefile.am b/libphobos/libdruntime/Makefile.am index df2c06c3dab..945271e028f 100644 --- a/libphobos/libdruntime/Makefile.am +++ b/libphobos/libdruntime/Makefile.am @@ -182,21 +182,22 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \ core/sync/rwmutex.d core/sync/semaphore.d core/thread/context.d \ core/thread/fiber.d core/thread/osthread.d core/thread/package.d \ core/thread/threadbase.d core/thread/threadgroup.d core/thread/types.d \ - core/time.d core/vararg.d gc/bits.d gc/config.d gc/gcinterface.d \ - gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d gc/pooltable.d \ - gc/proxy.d gcc/attribute.d gcc/backtrace.d gcc/builtins.d gcc/deh.d \ - gcc/emutls.d gcc/gthread.d gcc/sections/android.d \ - gcc/sections/elf_shared.d gcc/sections/osx.d gcc/sections/package.d \ - gcc/sections/win32.d gcc/sections/win64.d gcc/unwind/arm.d \ - gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \ - gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \ - rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycast.d rt/arraycat.d \ - rt/cast_.d rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d \ - rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \ - rt/obj.d rt/qsort.d rt/sections.d rt/switch_.d rt/tlsgc.d \ - rt/util/array.d rt/util/container/array.d rt/util/container/common.d \ - rt/util/container/hashtab.d rt/util/container/treap.d rt/util/random.d \ - rt/util/typeinfo.d rt/util/utf.d + core/time.d core/vararg.d core/volatile.d gc/bits.d gc/config.d \ + gc/gcinterface.d gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d \ + gc/pooltable.d gc/proxy.d gcc/attribute.d gcc/backtrace.d \ + gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \ + gcc/sections/android.d gcc/sections/elf_shared.d gcc/sections/osx.d \ + gcc/sections/package.d gcc/sections/win32.d gcc/sections/win64.d \ + gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \ + gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \ + rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \ + rt/arraycast.d rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d \ + rt/deh.d rt/dmain2.d rt/invariant.d rt/lifetime.d rt/memory.d \ + rt/minfo.d rt/monitor_.d rt/obj.d rt/qsort.d rt/sections.d \ + rt/switch_.d rt/tlsgc.d rt/util/array.d rt/util/container/array.d \ + rt/util/container/common.d rt/util/container/hashtab.d \ + rt/util/container/treap.d rt/util/random.d rt/util/typeinfo.d \ + rt/util/utf.d DRUNTIME_DSOURCES_STDCXX = core/stdcpp/exception.d \ core/stdcpp/typeinfo.d diff --git a/libphobos/libdruntime/Makefile.in b/libphobos/libdruntime/Makefile.in index aadc6f3ede0..06c02961c86 100644 --- a/libphobos/libdruntime/Makefile.in +++ b/libphobos/libdruntime/Makefile.in @@ -206,14 +206,14 @@ am__objects_1 = core/atomic.lo core/attribute.lo core/bitop.lo \ core/thread/context.lo core/thread/fiber.lo \ core/thread/osthread.lo core/thread/package.lo \ core/thread/threadbase.lo core/thread/threadgroup.lo \ - core/thread/types.lo core/time.lo core/vararg.lo gc/bits.lo \ - gc/config.lo gc/gcinterface.lo gc/impl/conservative/gc.lo \ - gc/impl/manual/gc.lo gc/os.lo gc/pooltable.lo gc/proxy.lo \ - gcc/attribute.lo gcc/backtrace.lo gcc/builtins.lo gcc/deh.lo \ - gcc/emutls.lo gcc/gthread.lo gcc/sections/android.lo \ - gcc/sections/elf_shared.lo gcc/sections/osx.lo \ - gcc/sections/package.lo gcc/sections/win32.lo \ - gcc/sections/win64.lo gcc/unwind/arm.lo \ + core/thread/types.lo core/time.lo core/vararg.lo \ + core/volatile.lo gc/bits.lo gc/config.lo gc/gcinterface.lo \ + gc/impl/conservative/gc.lo gc/impl/manual/gc.lo gc/os.lo \ + gc/pooltable.lo gc/proxy.lo gcc/attribute.lo gcc/backtrace.lo \ + gcc/builtins.lo gcc/deh.lo gcc/emutls.lo gcc/gthread.lo \ + gcc/sections/android.lo gcc/sections/elf_shared.lo \ + gcc/sections/osx.lo gcc/sections/package.lo \ + gcc/sections/win32.lo gcc/sections/win64.lo gcc/unwind/arm.lo \ gcc/unwind/arm_common.lo gcc/unwind/c6x.lo \ gcc/unwind/generic.lo gcc/unwind/package.lo gcc/unwind/pe.lo \ object.lo rt/aApply.lo rt/aApplyR.lo rt/aaA.lo rt/adi.lo \ @@ -808,21 +808,22 @@ DRUNTIME_DSOURCES = core/atomic.d core/attribute.d core/bitop.d \ core/sync/rwmutex.d core/sync/semaphore.d core/thread/context.d \ core/thread/fiber.d core/thread/osthread.d core/thread/package.d \ core/thread/threadbase.d core/thread/threadgroup.d core/thread/types.d \ - core/time.d core/vararg.d gc/bits.d gc/config.d gc/gcinterface.d \ - gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d gc/pooltable.d \ - gc/proxy.d gcc/attribute.d gcc/backtrace.d gcc/builtins.d gcc/deh.d \ - gcc/emutls.d gcc/gthread.d gcc/sections/android.d \ - gcc/sections/elf_shared.d gcc/sections/osx.d gcc/sections/package.d \ - gcc/sections/win32.d gcc/sections/win64.d gcc/unwind/arm.d \ - gcc/unwind/arm_common.d gcc/unwind/c6x.d gcc/unwind/generic.d \ - gcc/unwind/package.d gcc/unwind/pe.d object.d rt/aApply.d rt/aApplyR.d \ - rt/aaA.d rt/adi.d rt/arrayassign.d rt/arraycast.d rt/arraycat.d \ - rt/cast_.d rt/config.d rt/critical_.d rt/deh.d rt/dmain2.d \ - rt/invariant.d rt/lifetime.d rt/memory.d rt/minfo.d rt/monitor_.d \ - rt/obj.d rt/qsort.d rt/sections.d rt/switch_.d rt/tlsgc.d \ - rt/util/array.d rt/util/container/array.d rt/util/container/common.d \ - rt/util/container/hashtab.d rt/util/container/treap.d rt/util/random.d \ - rt/util/typeinfo.d rt/util/utf.d + core/time.d core/vararg.d core/volatile.d gc/bits.d gc/config.d \ + gc/gcinterface.d gc/impl/conservative/gc.d gc/impl/manual/gc.d gc/os.d \ + gc/pooltable.d gc/proxy.d gcc/attribute.d gcc/backtrace.d \ + gcc/builtins.d gcc/deh.d gcc/emutls.d gcc/gthread.d \ + gcc/sections/android.d gcc/sections/elf_shared.d gcc/sections/osx.d \ + gcc/sections/package.d gcc/sections/win32.d gcc/sections/win64.d \ + gcc/unwind/arm.d gcc/unwind/arm_common.d gcc/unwind/c6x.d \ + gcc/unwind/generic.d gcc/unwind/package.d gcc/unwind/pe.d object.d \ + rt/aApply.d rt/aApplyR.d rt/aaA.d rt/adi.d rt/arrayassign.d \ + rt/arraycast.d rt/arraycat.d rt/cast_.d rt/config.d rt/critical_.d \ + rt/deh.d rt/dmain2.d rt/invariant.d rt/lifetime.d rt/memory.d \ + rt/minfo.d rt/monitor_.d rt/obj.d rt/qsort.d rt/sections.d \ + rt/switch_.d rt/tlsgc.d rt/util/array.d rt/util/container/array.d \ + rt/util/container/common.d rt/util/container/hashtab.d \ + rt/util/container/treap.d rt/util/random.d rt/util/typeinfo.d \ + rt/util/utf.d DRUNTIME_DSOURCES_STDCXX = core/stdcpp/exception.d \ core/stdcpp/typeinfo.d @@ -1178,6 +1179,7 @@ core/thread/threadgroup.lo: core/thread/$(am__dirstamp) core/thread/types.lo: core/thread/$(am__dirstamp) core/time.lo: core/$(am__dirstamp) core/vararg.lo: core/$(am__dirstamp) +core/volatile.lo: core/$(am__dirstamp) gc/$(am__dirstamp): @$(MKDIR_P) gc @: > gc/$(am__dirstamp) diff --git a/libphobos/libdruntime/core/bitop.d b/libphobos/libdruntime/core/bitop.d index 0daee55c7e9..25b5cd515b2 100644 --- a/libphobos/libdruntime/core/bitop.d +++ b/libphobos/libdruntime/core/bitop.d @@ -267,7 +267,7 @@ unittest * (No longer an intrisic - the compiler recognizes the patterns * in the body.) */ -int bt(in size_t* p, size_t bitnum) pure @system +int bt(const scope size_t* p, size_t bitnum) pure @system { static if (size_t.sizeof == 8) return ((p[bitnum >> 6] & (1L << (bitnum & 63)))) != 0; @@ -494,6 +494,34 @@ struct BitRange testIt(100, 6, 45, 89, 92, 99); } +/** + * Swaps bytes in a 2 byte ushort. + * Params: + * x = value + * Returns: + * `x` with bytes swapped + */ +pragma(inline, false) +ushort byteswap(ushort x) pure +{ + /* Calling it bswap(ushort) would break existing code that calls bswap(uint). + * + * This pattern is meant to be recognized by the dmd code generator. + * Don't change it without checking that an XCH instruction is still + * used to implement it. + * Inlining may also throw it off. + */ + return cast(ushort) (((x >> 8) & 0xFF) | ((x << 8) & 0xFF00u)); +} + +/// +unittest +{ + assert(byteswap(cast(ushort)0xF234) == 0x34F2); + static ushort xx = 0xF234; + assert(byteswap(xx) == 0x34F2); +} + /** * Swaps bytes in a 4 byte uint end-to-end, i.e. byte 0 becomes * byte 3, byte 1 becomes byte 2, byte 2 becomes byte 1, byte 3 @@ -501,19 +529,27 @@ struct BitRange */ uint bswap(uint v) pure; +/// +unittest +{ + assert(bswap(0x01020304u) == 0x04030201u); + static uint xx = 0x10203040u; + assert(bswap(xx) == 0x40302010u); +} + /** * Swaps bytes in an 8 byte ulong end-to-end, i.e. byte 0 becomes * byte 7, byte 1 becomes byte 6, etc. + * This is meant to be recognized by the compiler as an intrinsic. */ -ulong bswap(ulong v) pure -{ - auto sv = Split64(v); - - const temp = sv.lo; - sv.lo = bswap(sv.hi); - sv.hi = bswap(temp); +ulong bswap(ulong v) pure; - return (cast(ulong) sv.hi << 32) | sv.lo; +/// +unittest +{ + assert(bswap(0x01020304_05060708uL) == 0x08070605_04030201uL); + static ulong xx = 0x10203040_50607080uL; + assert(bswap(xx) == 0x80706050_40302010uL); } version (DigitalMars) version (AnyX86) @system // not pure @@ -722,57 +758,14 @@ version (DigitalMars) version (AnyX86) } -/************************************* - * Read/write value from/to the memory location indicated by ptr. - * - * These functions are recognized by the compiler, and calls to them are guaranteed - * to not be removed (as dead assignment elimination or presumed to have no effect) - * or reordered in the same thread. - * - * These reordering guarantees are only made with regards to other - * operations done through these functions; the compiler is free to reorder regular - * loads/stores with regards to loads/stores done through these functions. - * - * This is useful when dealing with memory-mapped I/O (MMIO) where a store can - * have an effect other than just writing a value, or where sequential loads - * with no intervening stores can retrieve - * different values from the same location due to external stores to the location. - * - * These functions will, when possible, do the load/store as a single operation. In - * general, this is possible when the size of the operation is less than or equal to - * $(D (void*).sizeof), although some targets may support larger operations. If the - * load/store cannot be done as a single operation, multiple smaller operations will be used. - * - * These are not to be conflated with atomic operations. They do not guarantee any - * atomicity. This may be provided by coincidence as a result of the instructions - * used on the target, but this should not be relied on for portable programs. - * Further, no memory fences are implied by these functions. - * They should not be used for communication between threads. - * They may be used to guarantee a write or read cycle occurs at a specified address. - */ - -ubyte volatileLoad(ubyte * ptr); -ushort volatileLoad(ushort* ptr); /// ditto -uint volatileLoad(uint * ptr); /// ditto -ulong volatileLoad(ulong * ptr); /// ditto - -void volatileStore(ubyte * ptr, ubyte value); /// ditto -void volatileStore(ushort* ptr, ushort value); /// ditto -void volatileStore(uint * ptr, uint value); /// ditto -void volatileStore(ulong * ptr, ulong value); /// ditto - -@system unittest +deprecated("volatileLoad has been moved to core.volatile. Use core.volatile.volatileLoad instead.") { - alias TT(T...) = T; + public import core.volatile : volatileLoad; +} - foreach (T; TT!(ubyte, ushort, uint, ulong)) - { - T u; - T* p = &u; - volatileStore(p, 1); - T r = volatileLoad(p); - assert(r == u); - } +deprecated("volatileStore has been moved to core.volatile. Use core.volatile.volatileStore instead.") +{ + public import core.volatile : volatileStore; } @@ -954,51 +947,51 @@ version (D_InlineAsm_X86_64) * Bitwise rotate `value` left (`rol`) or right (`ror`) by * `count` bit positions. */ -pure T rol(T)(in T value, in uint count) +pure T rol(T)(const T value, const uint count) if (__traits(isIntegral, T) && __traits(isUnsigned, T)) { assert(count < 8 * T.sizeof); - return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1)))); + return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count))); } /// ditto -pure T ror(T)(in T value, in uint count) +pure T ror(T)(const T value, const uint count) if (__traits(isIntegral, T) && __traits(isUnsigned, T)) { assert(count < 8 * T.sizeof); - return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1)))); + return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count))); } /// ditto -pure T rol(uint count, T)(in T value) +pure T rol(uint count, T)(const T value) if (__traits(isIntegral, T) && __traits(isUnsigned, T)) { static assert(count < 8 * T.sizeof); - return cast(T) ((value << count) | (value >> (-count & (T.sizeof * 8 - 1)))); + return cast(T) ((value << count) | (value >> (T.sizeof * 8 - count))); } /// ditto -pure T ror(uint count, T)(in T value) +pure T ror(uint count, T)(const T value) if (__traits(isIntegral, T) && __traits(isUnsigned, T)) { static assert(count < 8 * T.sizeof); - return cast(T) ((value >> count) | (value << (-count & (T.sizeof * 8 - 1)))); + return cast(T) ((value >> count) | (value << (T.sizeof * 8 - count))); } /// unittest { - ubyte a = 0b10101010U; - ulong b = ulong.max; + ubyte a = 0b11110000U; + ulong b = ~1UL; - assert(rol(a, 1) == 0b01010101); - assert(ror(a, 1) == 0b01010101); - assert(rol(a, 3) == 0b01010101); - assert(ror(a, 3) == 0b01010101); + assert(rol(a, 1) == 0b11100001); + assert(ror(a, 1) == 0b01111000); + assert(rol(a, 3) == 0b10000111); + assert(ror(a, 3) == 0b00011110); assert(rol(a, 0) == a); assert(ror(a, 0) == a); - assert(rol(b, 63) == ulong.max); - assert(ror(b, 63) == ulong.max); + assert(rol(b, 63) == ~(1UL << 63)); + assert(ror(b, 63) == ~2UL); - assert(rol!3(a) == 0b01010101); - assert(ror!3(a) == 0b01010101); + assert(rol!3(a) == 0b10000111); + assert(ror!3(a) == 0b00011110); } diff --git a/libphobos/libdruntime/core/checkedint.d b/libphobos/libdruntime/core/checkedint.d index 237c8e412cc..57209adcbeb 100644 --- a/libphobos/libdruntime/core/checkedint.d +++ b/libphobos/libdruntime/core/checkedint.d @@ -47,7 +47,7 @@ pure: */ pragma(inline, true) -int adds(int x, int y, ref bool overflow) +int adds()(int x, int y, ref bool overflow) { long r = cast(long)x + cast(long)y; if (r < int.min || r > int.max) @@ -75,7 +75,7 @@ unittest /// ditto pragma(inline, true) -long adds(long x, long y, ref bool overflow) +long adds()(long x, long y, ref bool overflow) { long r = cast(ulong)x + cast(ulong)y; if (x < 0 && y < 0 && r >= 0 || @@ -106,7 +106,7 @@ static if (is(cent)) { /// ditto pragma(inline, true) -cent adds(cent x, cent y, ref bool overflow) +cent adds()(cent x, cent y, ref bool overflow) { cent r = cast(ucent)x + cast(ucent)y; if (x < 0 && y < 0 && r >= 0 || @@ -149,7 +149,7 @@ unittest */ pragma(inline, true) -uint addu(uint x, uint y, ref bool overflow) +uint addu()(uint x, uint y, ref bool overflow) { immutable uint r = x + y; if (r < x || r < y) @@ -177,7 +177,7 @@ unittest /// ditto pragma(inline, true) -ulong addu(ulong x, ulong y, ref bool overflow) +ulong addu()(ulong x, ulong y, ref bool overflow) { immutable ulong r = x + y; if (r < x || r < y) @@ -207,7 +207,7 @@ static if (is(ucent)) { /// ditto pragma(inline, true) -ucent addu(ucent x, ucent y, ref bool overflow) +ucent addu()(ucent x, ucent y, ref bool overflow) { immutable ucent r = x + y; if (r < x || r < y) @@ -249,7 +249,7 @@ unittest */ pragma(inline, true) -int subs(int x, int y, ref bool overflow) +int subs()(int x, int y, ref bool overflow) { immutable long r = cast(long)x - cast(long)y; if (r < int.min || r > int.max) @@ -277,7 +277,7 @@ unittest /// ditto pragma(inline, true) -long subs(long x, long y, ref bool overflow) +long subs()(long x, long y, ref bool overflow) { immutable long r = cast(ulong)x - cast(ulong)y; if (x < 0 && y >= 0 && r >= 0 || @@ -310,7 +310,7 @@ static if (is(cent)) { /// ditto pragma(inline, true) -cent subs(cent x, cent y, ref bool overflow) +cent subs()(cent x, cent y, ref bool overflow) { immutable cent r = cast(ucent)x - cast(ucent)y; if (x < 0 && y >= 0 && r >= 0 || @@ -355,7 +355,7 @@ unittest */ pragma(inline, true) -uint subu(uint x, uint y, ref bool overflow) +uint subu()(uint x, uint y, ref bool overflow) { if (x < y) overflow = true; @@ -383,7 +383,7 @@ unittest /// ditto pragma(inline, true) -ulong subu(ulong x, ulong y, ref bool overflow) +ulong subu()(ulong x, ulong y, ref bool overflow) { if (x < y) overflow = true; @@ -412,7 +412,7 @@ static if (is(ucent)) { /// ditto pragma(inline, true) -ucent subu(ucent x, ucent y, ref bool overflow) +ucent subu()(ucent x, ucent y, ref bool overflow) { if (x < y) overflow = true; @@ -450,7 +450,7 @@ unittest */ pragma(inline, true) -int negs(int x, ref bool overflow) +int negs()(int x, ref bool overflow) { if (x == int.min) overflow = true; @@ -474,7 +474,7 @@ unittest /// ditto pragma(inline, true) -long negs(long x, ref bool overflow) +long negs()(long x, ref bool overflow) { if (x == long.min) overflow = true; @@ -500,7 +500,7 @@ static if (is(cent)) { /// ditto pragma(inline, true) -cent negs(cent x, ref bool overflow) +cent negs()(cent x, ref bool overflow) { if (x == cent.min) overflow = true; @@ -538,7 +538,7 @@ unittest */ pragma(inline, true) -int muls(int x, int y, ref bool overflow) +int muls()(int x, int y, ref bool overflow) { long r = cast(long)x * cast(long)y; if (r < int.min || r > int.max) @@ -568,11 +568,13 @@ unittest /// ditto pragma(inline, true) -long muls(long x, long y, ref bool overflow) +long muls()(long x, long y, ref bool overflow) { immutable long r = cast(ulong)x * cast(ulong)y; enum not0or1 = ~1L; - if ((x & not0or1) && ((r == y)? r : (r / x) != y)) + if ((x & not0or1) && + ((r == y) ? r != 0 + : (r == 0x8000_0000_0000_0000 && x == -1L) || ((r / x) != y))) overflow = true; return r; } @@ -604,7 +606,7 @@ static if (is(cent)) { /// ditto pragma(inline, true) -cent muls(cent x, cent y, ref bool overflow) +cent muls()(cent x, cent y, ref bool overflow) { immutable cent r = cast(ucent)x * cast(ucent)y; enum not0or1 = ~1L; @@ -652,7 +654,7 @@ unittest */ pragma(inline, true) -uint mulu(uint x, uint y, ref bool overflow) +uint mulu()(uint x, uint y, ref bool overflow) { immutable ulong r = ulong(x) * ulong(y); if (r >> 32) @@ -682,7 +684,7 @@ unittest /// ditto pragma(inline, true) -ulong mulu(ulong x, uint y, ref bool overflow) +ulong mulu()(ulong x, uint y, ref bool overflow) { ulong r = x * y; if (x >> 32 && @@ -693,7 +695,7 @@ ulong mulu(ulong x, uint y, ref bool overflow) /// ditto pragma(inline, true) -ulong mulu(ulong x, ulong y, ref bool overflow) +ulong mulu()(ulong x, ulong y, ref bool overflow) { immutable ulong r = x * y; if ((x | y) >> 32 && @@ -751,7 +753,7 @@ static if (is(ucent)) { /// ditto pragma(inline, true) -ucent mulu(ucent x, ucent y, ref bool overflow) +ucent mulu()(ucent x, ucent y, ref bool overflow) { immutable ucent r = x * y; if (x && (r / x) != y) diff --git a/libphobos/libdruntime/core/cpuid.d b/libphobos/libdruntime/core/cpuid.d index 2ba13b55bf1..e31f776d7ee 100644 --- a/libphobos/libdruntime/core/cpuid.d +++ b/libphobos/libdruntime/core/cpuid.d @@ -56,6 +56,9 @@ module core.cpuid; +version (GNU) version = GNU_OR_LDC; +version (LDC) version = GNU_OR_LDC; + @trusted: nothrow: @nogc: @@ -318,10 +321,10 @@ private: struct CpuFeatures { bool probablyIntel; // true = _probably_ an Intel processor, might be faking - bool probablyAMD; // true = _probably_ an AMD processor + bool probablyAMD; // true = _probably_ an AMD or Hygon processor string processorName; - char [12] vendorID; - char [48] processorNameBuffer; + char [12] vendorID = 0; + char [48] processorNameBuffer = 0; uint features = 0; // mmx, sse, sse2, hyperthreading, etc uint miscfeatures = 0; // sse3, etc. uint extfeatures = 0; // HLE, AVX2, RTM, etc. @@ -426,7 +429,7 @@ CpuFeatures* getCpuFeatures() @nogc nothrow } -version (GNU) { +version (GNU_OR_LDC) { version (X86) enum supportedX86 = true; else version (X86_64) @@ -509,12 +512,12 @@ void getcacheinfoCPUID2() // for old single-core CPUs. uint numinfos = 1; do { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2); } else asm pure nothrow @nogc { mov EAX, 2; cpuid; - mov a, EAX; + mov a+0, EAX; mov a+4, EBX; mov a+8, ECX; mov a+12, EDX; @@ -553,7 +556,7 @@ void getcacheinfoCPUID4() int cachenum = 0; for (;;) { uint a, b, number_of_sets; - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx"; } else asm pure nothrow @nogc { mov EAX, 4; @@ -593,7 +596,7 @@ void getcacheinfoCPUID4() void getAMDcacheinfo() { uint dummy, c5, c6, d6; - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx"; } else asm pure nothrow @nogc { mov EAX, 0x8000_0005; // L1 cache @@ -612,7 +615,7 @@ void getAMDcacheinfo() // AMD K6-III or K6-2+ or later. ubyte numcores = 1; if (max_extended_cpuid >= 0x8000_0008) { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx"; } else asm pure nothrow @nogc { mov EAX, 0x8000_0008; @@ -623,7 +626,7 @@ void getAMDcacheinfo() if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; } - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx"; } else asm pure nothrow @nogc { mov EAX, 0x8000_0006; // L2/L3 cache @@ -652,7 +655,7 @@ void getCpuInfo0B() int threadsPerCore; uint a, b, c, d; do { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level); } else asm pure nothrow @nogc { mov EAX, 0x0B; @@ -684,7 +687,7 @@ void cpuidX86() uint a, b, c, d; uint* venptr = cast(uint*)cf.vendorID.ptr; - version (GNU) + version (GNU_OR_LDC) { asm pure nothrow @nogc { "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0); @@ -729,9 +732,9 @@ void cpuidX86() cf.probablyIntel = cf.vendorID == "GenuineIntel"; - cf.probablyAMD = cf.vendorID == "AuthenticAMD"; + cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine"); uint apic = 0; // brand index, apic id - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1); } else { asm pure nothrow @nogc { @@ -754,7 +757,7 @@ void cpuidX86() if (max_cpuid >= 7) { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx"; } else { uint ext; @@ -770,8 +773,11 @@ void cpuidX86() if (cf.miscfeatures & OSXSAVE_BIT) { - version (GNU) asm pure nothrow @nogc { - "xgetbv" : "=a" (a), "=d" (d) : "c" (0); + version (GNU_OR_LDC) asm pure nothrow @nogc { + /* Old assemblers do not recognize xgetbv, and there is no easy way + * to conditionally compile based on the assembler used, so use the + * raw .byte sequence instead. */ + ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0); } else asm pure nothrow @nogc { mov ECX, 0; xgetbv; @@ -784,7 +790,7 @@ void cpuidX86() cf.amdfeatures = 0; cf.amdmiscfeatures = 0; if (max_extended_cpuid >= 0x8000_0001) { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx"; } else { asm pure nothrow @nogc { @@ -805,7 +811,7 @@ void cpuidX86() cf.maxCores = 1; if (hyperThreadingBit) { // determine max number of cores for AMD - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx"; } else asm pure nothrow @nogc { mov EAX, 0x8000_0008; @@ -818,7 +824,7 @@ void cpuidX86() if (max_extended_cpuid >= 0x8000_0004) { uint* pnb = cast(uint*)cf.processorNameBuffer.ptr; - version (GNU) + version (GNU_OR_LDC) { asm pure nothrow @nogc { "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002); @@ -950,7 +956,7 @@ void cpuidX86() else cf.maxThreads = cf.maxCores; if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) { - version (GNU) asm pure nothrow @nogc { + version (GNU_OR_LDC) asm pure nothrow @nogc { "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx"; } else { asm pure nothrow @nogc { @@ -974,21 +980,18 @@ bool hasCPUID() else { uint flags; - version (GNU) + version (GNU_OR_LDC) { // http://wiki.osdev.org/CPUID#Checking_CPUID_availability - // ASM template supports both AT&T and Intel syntax. asm nothrow @nogc { " - pushf{l|d} # Save EFLAGS - pushf{l|d} # Store EFLAGS - xor{l $0x00200000, (%%esp)| dword ptr [esp], 0x00200000} - # Invert the ID bit in stored EFLAGS - popf{l|d} # Load stored EFLAGS (with ID bit inverted) - pushf{l|d} # Store EFLAGS again (ID bit may or may not be inverted) - pop {%%}eax # eax = modified EFLAGS (ID bit may or may not be inverted) - xor {(%%esp), %%eax|eax, [esp]} - # eax = whichever bits were changed - popf{l|d} # Restore original EFLAGS + pushfl # Save EFLAGS + pushfl # Store EFLAGS + xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS + popfl # Load stored EFLAGS (with ID bit inverted) + pushfl # Store EFLAGS again (ID bit may or may not be inverted) + popl %%eax # eax = modified EFLAGS (ID bit may or may not be inverted) + xorl (%%esp), %%eax # eax = whichever bits were changed + popfl # Restore original EFLAGS " : "=a" (flags); } } diff --git a/libphobos/libdruntime/core/internal/traits.d b/libphobos/libdruntime/core/internal/traits.d index e56f016c355..9f79dd014b8 100644 --- a/libphobos/libdruntime/core/internal/traits.d +++ b/libphobos/libdruntime/core/internal/traits.d @@ -8,10 +8,16 @@ */ module core.internal.traits; -/// taken from std.typetuple.TypeTuple -template TypeTuple(TList...) +alias AliasSeq(TList...) = TList; + +template Fields(T) { - alias TypeTuple = TList; + static if (is(T == struct) || is(T == union)) + alias Fields = typeof(T.tupleof[0 .. $ - __traits(isNested, T)]); + else static if (is(T == class)) + alias Fields = typeof(T.tupleof); + else + alias Fields = AliasSeq!T; } T trustedCast(T, U)(auto ref U u) @trusted pure nothrow @@ -109,17 +115,17 @@ template staticIota(int beg, int end) { static if (beg >= end) { - alias staticIota = TypeTuple!(); + alias staticIota = AliasSeq!(); } else { - alias staticIota = TypeTuple!(+beg); + alias staticIota = AliasSeq!(+beg); } } else { enum mid = beg + (end - beg) / 2; - alias staticIota = TypeTuple!(staticIota!(beg, mid), staticIota!(mid, end)); + alias staticIota = AliasSeq!(staticIota!(beg, mid), staticIota!(mid, end)); } } @@ -235,24 +241,150 @@ template hasElaborateCopyConstructor(T...) enum bool hasElaborateCopyConstructor = false; } +template hasUnsharedIndirections(T) +{ + static if (is(T == immutable)) + enum hasUnsharedIndirections = false; + else static if (is(T == struct) || is(T == union)) + enum hasUnsharedIndirections = anySatisfy!(.hasUnsharedIndirections, Fields!T); + else static if (is(T : E[N], E, size_t N)) + enum hasUnsharedIndirections = is(E == void) ? false : hasUnsharedIndirections!E; + else static if (isFunctionPointer!T) + enum hasUnsharedIndirections = false; + else static if (isPointer!T) + enum hasUnsharedIndirections = !is(T : shared(U)*, U) && !is(T : immutable(U)*, U); + else static if (isDynamicArray!T) + enum hasUnsharedIndirections = !is(T : shared(V)[], V) && !is(T : immutable(V)[], V); + else static if (is(T == class) || is(T == interface)) + enum hasUnsharedIndirections = !is(T : shared(W), W); + else + enum hasUnsharedIndirections = isDelegate!T || __traits(isAssociativeArray, T); // TODO: how to handle these? +} + +unittest +{ + static struct Foo { shared(int)* val; } + + static assert(!hasUnsharedIndirections!(immutable(char)*)); + static assert(!hasUnsharedIndirections!(string)); + + static assert(!hasUnsharedIndirections!(Foo)); + static assert( hasUnsharedIndirections!(Foo*)); + static assert(!hasUnsharedIndirections!(shared(Foo)*)); + static assert(!hasUnsharedIndirections!(immutable(Foo)*)); +} + +enum bool isAggregateType(T) = is(T == struct) || is(T == union) || + is(T == class) || is(T == interface); + +enum bool isPointer(T) = is(T == U*, U) && !isAggregateType!T; + +enum bool isDynamicArray(T) = is(DynamicArrayTypeOf!T) && !isAggregateType!T; + +template OriginalType(T) +{ + template Impl(T) + { + static if (is(T U == enum)) alias Impl = OriginalType!U; + else alias Impl = T; + } + + alias OriginalType = ModifyTypePreservingTQ!(Impl, T); +} + +template DynamicArrayTypeOf(T) +{ + static if (is(AliasThisTypeOf!T AT) && !is(AT[] == AT)) + alias X = DynamicArrayTypeOf!AT; + else + alias X = OriginalType!T; + + static if (is(Unqual!X : E[], E) && !is(typeof({ enum n = X.length; }))) + alias DynamicArrayTypeOf = X; + else + static assert(0, T.stringof ~ " is not a dynamic array"); +} + +private template AliasThisTypeOf(T) + if (isAggregateType!T) +{ + alias members = __traits(getAliasThis, T); + + static if (members.length == 1) + alias AliasThisTypeOf = typeof(__traits(getMember, T.init, members[0])); + else + static assert(0, T.stringof~" does not have alias this type"); +} + +template isFunctionPointer(T...) + if (T.length == 1) +{ + static if (is(T[0] U) || is(typeof(T[0]) U)) + { + static if (is(U F : F*) && is(F == function)) + enum bool isFunctionPointer = true; + else + enum bool isFunctionPointer = false; + } + else + enum bool isFunctionPointer = false; +} + +template isDelegate(T...) + if (T.length == 1) +{ + static if (is(typeof(& T[0]) U : U*) && is(typeof(& T[0]) U == delegate)) + { + // T is a (nested) function symbol. + enum bool isDelegate = true; + } + else static if (is(T[0] W) || is(typeof(T[0]) W)) + { + // T is an expression or a type. Take the type of it and examine. + enum bool isDelegate = is(W == delegate); + } + else + enum bool isDelegate = false; +} + // std.meta.Filter template Filter(alias pred, TList...) { static if (TList.length == 0) { - alias Filter = TypeTuple!(); + alias Filter = AliasSeq!(); } else static if (TList.length == 1) { static if (pred!(TList[0])) - alias Filter = TypeTuple!(TList[0]); + alias Filter = AliasSeq!(TList[0]); else - alias Filter = TypeTuple!(); + alias Filter = AliasSeq!(); + } + /* The next case speeds up compilation by reducing + * the number of Filter instantiations + */ + else static if (TList.length == 2) + { + static if (pred!(TList[0])) + { + static if (pred!(TList[1])) + alias Filter = AliasSeq!(TList[0], TList[1]); + else + alias Filter = AliasSeq!(TList[0]); + } + else + { + static if (pred!(TList[1])) + alias Filter = AliasSeq!(TList[1]); + else + alias Filter = AliasSeq!(); + } } else { alias Filter = - TypeTuple!( + AliasSeq!( Filter!(pred, TList[ 0 .. $/2]), Filter!(pred, TList[$/2 .. $ ])); } diff --git a/libphobos/libdruntime/core/simd.d b/libphobos/libdruntime/core/simd.d index 32e2aaf5cfd..11a47118319 100644 --- a/libphobos/libdruntime/core/simd.d +++ b/libphobos/libdruntime/core/simd.d @@ -5,9 +5,10 @@ * * Source: $(DRUNTIMESRC core/_simd.d) * - * Copyright: Copyright Digital Mars 2012. - * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). - * Authors: $(WEB digitalmars.com, Walter Bright), + * Copyright: Copyright Digital Mars 2012-2020 + * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). + * Authors: $(HTTP digitalmars.com, Walter Bright), + * Source: $(DRUNTIMESRC core/_simd.d) */ module core.simd; @@ -38,470 +39,523 @@ template Vector(T) /* Handy aliases */ -static if (is(Vector!(void[8]))) alias Vector!(void[8]) void8; /// -static if (is(Vector!(double[1]))) alias Vector!(double[1]) double1; /// -static if (is(Vector!(float[2]))) alias Vector!(float[2]) float2; /// -static if (is(Vector!(byte[8]))) alias Vector!(byte[8]) byte8; /// -static if (is(Vector!(ubyte[8]))) alias Vector!(ubyte[8]) ubyte8; /// -static if (is(Vector!(short[4]))) alias Vector!(short[4]) short4; /// -static if (is(Vector!(ushort[4]))) alias Vector!(ushort[4]) ushort4; /// -static if (is(Vector!(int[2]))) alias Vector!(int[2]) int2; /// -static if (is(Vector!(uint[2]))) alias Vector!(uint[2]) uint2; /// -static if (is(Vector!(long[1]))) alias Vector!(long[1]) long1; /// -static if (is(Vector!(ulong[1]))) alias Vector!(ulong[1]) ulong1; /// - -static if (is(Vector!(void[16]))) alias Vector!(void[16]) void16; /// -static if (is(Vector!(double[2]))) alias Vector!(double[2]) double2; /// -static if (is(Vector!(float[4]))) alias Vector!(float[4]) float4; /// -static if (is(Vector!(byte[16]))) alias Vector!(byte[16]) byte16; /// -static if (is(Vector!(ubyte[16]))) alias Vector!(ubyte[16]) ubyte16; /// -static if (is(Vector!(short[8]))) alias Vector!(short[8]) short8; /// -static if (is(Vector!(ushort[8]))) alias Vector!(ushort[8]) ushort8; /// -static if (is(Vector!(int[4]))) alias Vector!(int[4]) int4; /// -static if (is(Vector!(uint[4]))) alias Vector!(uint[4]) uint4; /// -static if (is(Vector!(long[2]))) alias Vector!(long[2]) long2; /// -static if (is(Vector!(ulong[2]))) alias Vector!(ulong[2]) ulong2; /// - -static if (is(Vector!(void[32]))) alias Vector!(void[32]) void32; /// -static if (is(Vector!(double[4]))) alias Vector!(double[4]) double4; /// -static if (is(Vector!(float[8]))) alias Vector!(float[8]) float8; /// -static if (is(Vector!(byte[32]))) alias Vector!(byte[32]) byte32; /// -static if (is(Vector!(ubyte[32]))) alias Vector!(ubyte[32]) ubyte32; /// -static if (is(Vector!(short[16]))) alias Vector!(short[16]) short16; /// -static if (is(Vector!(ushort[16]))) alias Vector!(ushort[16]) ushort16; /// -static if (is(Vector!(int[8]))) alias Vector!(int[8]) int8; /// -static if (is(Vector!(uint[8]))) alias Vector!(uint[8]) uint8; /// -static if (is(Vector!(long[4]))) alias Vector!(long[4]) long4; /// -static if (is(Vector!(ulong[4]))) alias Vector!(ulong[4]) ulong4; /// +static if (is(Vector!(void[8]))) alias Vector!(void[8]) void8; /// +static if (is(Vector!(double[1]))) alias Vector!(double[1]) double1; /// +static if (is(Vector!(float[2]))) alias Vector!(float[2]) float2; /// +static if (is(Vector!(byte[8]))) alias Vector!(byte[8]) byte8; /// +static if (is(Vector!(ubyte[8]))) alias Vector!(ubyte[8]) ubyte8; /// +static if (is(Vector!(short[4]))) alias Vector!(short[4]) short4; /// +static if (is(Vector!(ushort[4]))) alias Vector!(ushort[4]) ushort4; /// +static if (is(Vector!(int[2]))) alias Vector!(int[2]) int2; /// +static if (is(Vector!(uint[2]))) alias Vector!(uint[2]) uint2; /// +static if (is(Vector!(long[1]))) alias Vector!(long[1]) long1; /// +static if (is(Vector!(ulong[1]))) alias Vector!(ulong[1]) ulong1; /// + +static if (is(Vector!(void[16]))) alias Vector!(void[16]) void16; /// +static if (is(Vector!(double[2]))) alias Vector!(double[2]) double2; /// +static if (is(Vector!(float[4]))) alias Vector!(float[4]) float4; /// +static if (is(Vector!(byte[16]))) alias Vector!(byte[16]) byte16; /// +static if (is(Vector!(ubyte[16]))) alias Vector!(ubyte[16]) ubyte16; /// +static if (is(Vector!(short[8]))) alias Vector!(short[8]) short8; /// +static if (is(Vector!(ushort[8]))) alias Vector!(ushort[8]) ushort8; /// +static if (is(Vector!(int[4]))) alias Vector!(int[4]) int4; /// +static if (is(Vector!(uint[4]))) alias Vector!(uint[4]) uint4; /// +static if (is(Vector!(long[2]))) alias Vector!(long[2]) long2; /// +static if (is(Vector!(ulong[2]))) alias Vector!(ulong[2]) ulong2; /// + +static if (is(Vector!(void[32]))) alias Vector!(void[32]) void32; /// +static if (is(Vector!(double[4]))) alias Vector!(double[4]) double4; /// +static if (is(Vector!(float[8]))) alias Vector!(float[8]) float8; /// +static if (is(Vector!(byte[32]))) alias Vector!(byte[32]) byte32; /// +static if (is(Vector!(ubyte[32]))) alias Vector!(ubyte[32]) ubyte32; /// +static if (is(Vector!(short[16]))) alias Vector!(short[16]) short16; /// +static if (is(Vector!(ushort[16]))) alias Vector!(ushort[16]) ushort16; /// +static if (is(Vector!(int[8]))) alias Vector!(int[8]) int8; /// +static if (is(Vector!(uint[8]))) alias Vector!(uint[8]) uint8; /// +static if (is(Vector!(long[4]))) alias Vector!(long[4]) long4; /// +static if (is(Vector!(ulong[4]))) alias Vector!(ulong[4]) ulong4; /// + +static if (is(Vector!(void[64]))) alias Vector!(void[64]) void64; /// +static if (is(Vector!(double[8]))) alias Vector!(double[8]) double8; /// +static if (is(Vector!(float[16]))) alias Vector!(float[16]) float16; /// +static if (is(Vector!(byte[64]))) alias Vector!(byte[64]) byte64; /// +static if (is(Vector!(ubyte[64]))) alias Vector!(ubyte[64]) ubyte64; /// +static if (is(Vector!(short[32]))) alias Vector!(short[32]) short32; /// +static if (is(Vector!(ushort[32]))) alias Vector!(ushort[32]) ushort32; /// +static if (is(Vector!(int[16]))) alias Vector!(int[16]) int16; /// +static if (is(Vector!(uint[16]))) alias Vector!(uint[16]) uint16; /// +static if (is(Vector!(long[8]))) alias Vector!(long[8]) long8; /// +static if (is(Vector!(ulong[8]))) alias Vector!(ulong[8]) ulong8; /// version (D_SIMD) { - /** XMM opcodes that conform to the following: - * - * opcode xmm1,xmm2/mem - * - * and do not have side effects (i.e. do not write to memory). - */ - enum XMM - { - ADDSS = 0xF30F58, - ADDSD = 0xF20F58, - ADDPS = 0x000F58, - ADDPD = 0x660F58, - PADDB = 0x660FFC, - PADDW = 0x660FFD, - PADDD = 0x660FFE, - PADDQ = 0x660FD4, - - SUBSS = 0xF30F5C, - SUBSD = 0xF20F5C, - SUBPS = 0x000F5C, - SUBPD = 0x660F5C, - PSUBB = 0x660FF8, - PSUBW = 0x660FF9, - PSUBD = 0x660FFA, - PSUBQ = 0x660FFB, - - MULSS = 0xF30F59, - MULSD = 0xF20F59, - MULPS = 0x000F59, - MULPD = 0x660F59, - PMULLW = 0x660FD5, - - DIVSS = 0xF30F5E, - DIVSD = 0xF20F5E, - DIVPS = 0x000F5E, - DIVPD = 0x660F5E, - - PAND = 0x660FDB, - POR = 0x660FEB, - - UCOMISS = 0x000F2E, - UCOMISD = 0x660F2E, - - XORPS = 0x000F57, - XORPD = 0x660F57, - - // Use STO and LOD instead of MOV to distinguish the direction - STOSS = 0xF30F11, - STOSD = 0xF20F11, - STOAPS = 0x000F29, - STOAPD = 0x660F29, - STODQA = 0x660F7F, - STOD = 0x660F7E, // MOVD reg/mem64, xmm 66 0F 7E /r - STOQ = 0x660FD6, - - LODSS = 0xF30F10, - LODSD = 0xF20F10, - LODAPS = 0x000F28, - LODAPD = 0x660F28, - LODDQA = 0x660F6F, - LODD = 0x660F6E, // MOVD xmm, reg/mem64 66 0F 6E /r - LODQ = 0xF30F7E, - - LODDQU = 0xF30F6F, // MOVDQU xmm1, xmm2/mem128 F3 0F 6F /r - STODQU = 0xF30F7F, // MOVDQU xmm1/mem128, xmm2 F3 0F 7F /r - MOVDQ2Q = 0xF20FD6, // MOVDQ2Q mmx, xmm F2 0F D6 /r - MOVHLPS = 0x0F12, // MOVHLPS xmm1, xmm2 0F 12 /r - LODHPD = 0x660F16, - STOHPD = 0x660F17, // MOVHPD mem64, xmm 66 0F 17 /r - LODHPS = 0x0F16, - STOHPS = 0x0F17, - MOVLHPS = 0x0F16, - LODLPD = 0x660F12, - STOLPD = 0x660F13, - LODLPS = 0x0F12, - STOLPS = 0x0F13, - MOVMSKPD = 0x660F50, - MOVMSKPS = 0x0F50, - MOVNTDQ = 0x660FE7, - MOVNTI = 0x0FC3, - MOVNTPD = 0x660F2B, - MOVNTPS = 0x0F2B, - MOVNTQ = 0x0FE7, - MOVQ2DQ = 0xF30FD6, - LODUPD = 0x660F10, - STOUPD = 0x660F11, - LODUPS = 0x0F10, - STOUPS = 0x0F11, - - PACKSSDW = 0x660F6B, - PACKSSWB = 0x660F63, - PACKUSWB = 0x660F67, - PADDSB = 0x660FEC, - PADDSW = 0x660FED, - PADDUSB = 0x660FDC, - PADDUSW = 0x660FDD, - PANDN = 0x660FDF, - PCMPEQB = 0x660F74, - PCMPEQD = 0x660F76, - PCMPEQW = 0x660F75, - PCMPGTB = 0x660F64, - PCMPGTD = 0x660F66, - PCMPGTW = 0x660F65, - PMADDWD = 0x660FF5, - PSLLW = 0x660FF1, - PSLLD = 0x660FF2, - PSLLQ = 0x660FF3, - PSRAW = 0x660FE1, - PSRAD = 0x660FE2, - PSRLW = 0x660FD1, - PSRLD = 0x660FD2, - PSRLQ = 0x660FD3, - PSUBSB = 0x660FE8, - PSUBSW = 0x660FE9, - PSUBUSB = 0x660FD8, - PSUBUSW = 0x660FD9, - PUNPCKHBW = 0x660F68, - PUNPCKHDQ = 0x660F6A, - PUNPCKHWD = 0x660F69, - PUNPCKLBW = 0x660F60, - PUNPCKLDQ = 0x660F62, - PUNPCKLWD = 0x660F61, - PXOR = 0x660FEF, - ANDPD = 0x660F54, - ANDPS = 0x0F54, - ANDNPD = 0x660F55, - ANDNPS = 0x0F55, - CMPPS = 0x0FC2, - CMPPD = 0x660FC2, - CMPSD = 0xF20FC2, - CMPSS = 0xF30FC2, - COMISD = 0x660F2F, - COMISS = 0x0F2F, - CVTDQ2PD = 0xF30FE6, - CVTDQ2PS = 0x0F5B, - CVTPD2DQ = 0xF20FE6, - CVTPD2PI = 0x660F2D, - CVTPD2PS = 0x660F5A, - CVTPI2PD = 0x660F2A, - CVTPI2PS = 0x0F2A, - CVTPS2DQ = 0x660F5B, - CVTPS2PD = 0x0F5A, - CVTPS2PI = 0x0F2D, - CVTSD2SI = 0xF20F2D, - CVTSD2SS = 0xF20F5A, - CVTSI2SD = 0xF20F2A, - CVTSI2SS = 0xF30F2A, - CVTSS2SD = 0xF30F5A, - CVTSS2SI = 0xF30F2D, - CVTTPD2PI = 0x660F2C, - CVTTPD2DQ = 0x660FE6, - CVTTPS2DQ = 0xF30F5B, - CVTTPS2PI = 0x0F2C, - CVTTSD2SI = 0xF20F2C, - CVTTSS2SI = 0xF30F2C, - MASKMOVDQU = 0x660FF7, - MASKMOVQ = 0x0FF7, - MAXPD = 0x660F5F, - MAXPS = 0x0F5F, - MAXSD = 0xF20F5F, - MAXSS = 0xF30F5F, - MINPD = 0x660F5D, - MINPS = 0x0F5D, - MINSD = 0xF20F5D, - MINSS = 0xF30F5D, - ORPD = 0x660F56, - ORPS = 0x0F56, - PAVGB = 0x660FE0, - PAVGW = 0x660FE3, - PMAXSW = 0x660FEE, - //PINSRW = 0x660FC4, - PMAXUB = 0x660FDE, - PMINSW = 0x660FEA, - PMINUB = 0x660FDA, - //PMOVMSKB = 0x660FD7, - PMULHUW = 0x660FE4, - PMULHW = 0x660FE5, - PMULUDQ = 0x660FF4, - PSADBW = 0x660FF6, - PUNPCKHQDQ = 0x660F6D, - PUNPCKLQDQ = 0x660F6C, - RCPPS = 0x0F53, - RCPSS = 0xF30F53, - RSQRTPS = 0x0F52, - RSQRTSS = 0xF30F52, - SQRTPD = 0x660F51, - SHUFPD = 0x660FC6, - SHUFPS = 0x0FC6, - SQRTPS = 0x0F51, - SQRTSD = 0xF20F51, - SQRTSS = 0xF30F51, - UNPCKHPD = 0x660F15, - UNPCKHPS = 0x0F15, - UNPCKLPD = 0x660F14, - UNPCKLPS = 0x0F14, - - PSHUFD = 0x660F70, - PSHUFHW = 0xF30F70, - PSHUFLW = 0xF20F70, - PSHUFW = 0x0F70, - PSLLDQ = 0x07660F73, - PSRLDQ = 0x03660F73, - - //PREFETCH = 0x0F18, - -// SSE3 Pentium 4 (Prescott) - - ADDSUBPD = 0x660FD0, - ADDSUBPS = 0xF20FD0, - HADDPD = 0x660F7C, - HADDPS = 0xF20F7C, - HSUBPD = 0x660F7D, - HSUBPS = 0xF20F7D, - MOVDDUP = 0xF20F12, - MOVSHDUP = 0xF30F16, - MOVSLDUP = 0xF30F12, - LDDQU = 0xF20FF0, - MONITOR = 0x0F01C8, - MWAIT = 0x0F01C9, - -// SSSE3 - PALIGNR = 0x660F3A0F, - PHADDD = 0x660F3802, - PHADDW = 0x660F3801, - PHADDSW = 0x660F3803, - PABSB = 0x660F381C, - PABSD = 0x660F381E, - PABSW = 0x660F381D, - PSIGNB = 0x660F3808, - PSIGND = 0x660F380A, - PSIGNW = 0x660F3809, - PSHUFB = 0x660F3800, - PMADDUBSW = 0x660F3804, - PMULHRSW = 0x660F380B, - PHSUBD = 0x660F3806, - PHSUBW = 0x660F3805, - PHSUBSW = 0x660F3807, - -// SSE4.1 - - BLENDPD = 0x660F3A0D, - BLENDPS = 0x660F3A0C, - BLENDVPD = 0x660F3815, - BLENDVPS = 0x660F3814, - DPPD = 0x660F3A41, - DPPS = 0x660F3A40, - EXTRACTPS = 0x660F3A17, - INSERTPS = 0x660F3A21, - MPSADBW = 0x660F3A42, - PBLENDVB = 0x660F3810, - PBLENDW = 0x660F3A0E, - PEXTRD = 0x660F3A16, - PEXTRQ = 0x660F3A16, - PINSRB = 0x660F3A20, - PINSRD = 0x660F3A22, - PINSRQ = 0x660F3A22, - - MOVNTDQA = 0x660F382A, - PACKUSDW = 0x660F382B, - PCMPEQQ = 0x660F3829, - PEXTRB = 0x660F3A14, - PHMINPOSUW = 0x660F3841, - PMAXSB = 0x660F383C, - PMAXSD = 0x660F383D, - PMAXUD = 0x660F383F, - PMAXUW = 0x660F383E, - PMINSB = 0x660F3838, - PMINSD = 0x660F3839, - PMINUD = 0x660F383B, - PMINUW = 0x660F383A, - PMOVSXBW = 0x660F3820, - PMOVSXBD = 0x660F3821, - PMOVSXBQ = 0x660F3822, - PMOVSXWD = 0x660F3823, - PMOVSXWQ = 0x660F3824, - PMOVSXDQ = 0x660F3825, - PMOVZXBW = 0x660F3830, - PMOVZXBD = 0x660F3831, - PMOVZXBQ = 0x660F3832, - PMOVZXWD = 0x660F3833, - PMOVZXWQ = 0x660F3834, - PMOVZXDQ = 0x660F3835, - PMULDQ = 0x660F3828, - PMULLD = 0x660F3840, - PTEST = 0x660F3817, - - ROUNDPD = 0x660F3A09, - ROUNDPS = 0x660F3A08, - ROUNDSD = 0x660F3A0B, - ROUNDSS = 0x660F3A0A, - -// SSE4.2 - PCMPESTRI = 0x660F3A61, - PCMPESTRM = 0x660F3A60, - PCMPISTRI = 0x660F3A63, - PCMPISTRM = 0x660F3A62, - PCMPGTQ = 0x660F3837, - //CRC32 - -// SSE4a (AMD only) - // EXTRQ,INSERTQ,MOVNTSD,MOVNTSS - -// POPCNT and LZCNT (have their own CPUID bits) - POPCNT = 0xF30FB8, - // LZCNT - } - - /** - * Generate two operand instruction with XMM 128 bit operands. - * - * This is a compiler magic function - it doesn't behave like - * regular D functions. - * - * Parameters: - * opcode any of the XMM opcodes; it must be a compile time constant - * op1 first operand - * op2 second operand - * Returns: - * result of opcode - */ - pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); - - /** - * Unary SIMD instructions. - */ - pure @safe void16 __simd(XMM opcode, void16 op1); - pure @safe void16 __simd(XMM opcode, double d); /// - pure @safe void16 __simd(XMM opcode, float f); /// - - /**** - * For instructions: - * CMPPD, CMPSS, CMPSD, CMPPS, - * PSHUFD, PSHUFHW, PSHUFLW, - * BLENDPD, BLENDPS, DPPD, DPPS, - * MPSADBW, PBLENDW, - * ROUNDPD, ROUNDPS, ROUNDSD, ROUNDSS - * Parameters: - * opcode any of the above XMM opcodes; it must be a compile time constant - * op1 first operand - * op2 second operand - * imm8 third operand; must be a compile time constant - * Returns: - * result of opcode - */ - pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); - - /*** - * For instructions with the imm8 version: - * PSLLD, PSLLQ, PSLLW, PSRAD, PSRAW, PSRLD, PSRLQ, PSRLW, - * PSRLDQ, PSLLDQ - * Parameters: - * opcode any of the XMM opcodes; it must be a compile time constant - * op1 first operand - * imm8 second operand; must be a compile time constant - * Returns: - * result of opcode - */ - pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); - - /***** - * For "store" operations of the form: - * op1 op= op2 - * Returns: - * op2 - * These cannot be marked as pure, as semantic() doesn't check them. - */ - @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); - @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); /// - @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); /// - - /* The following use overloading to ensure correct typing. - * Compile with inlining on for best performance. - */ - - pure @safe short8 pcmpeq()(short8 v1, short8 v2) - { - return __simd(XMM.PCMPEQW, v1, v2); - } - - pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2) - { - return __simd(XMM.PCMPEQW, v1, v2); - } - - /********************* - * Emit prefetch instruction. - * Params: - * address = address to be prefetched - * writeFetch = true for write fetch, false for read fetch - * locality = 0..3 (0 meaning least local, 3 meaning most local) - * Note: - * The Intel mappings are: - * $(TABLE - * $(THEAD writeFetch, locality, Instruction) - * $(TROW false, 0, prefetchnta) - * $(TROW false, 1, prefetch2) - * $(TROW false, 2, prefetch1) - * $(TROW false, 3, prefetch0) - * $(TROW false, 0, prefetchw) - * $(TROW false, 1, prefetchw) - * $(TROW false, 2, prefetchw) - * $(TROW false, 3, prefetchw) - * ) - */ - void prefetch(bool writeFetch, ubyte locality)(const(void)* address) - { + /** XMM opcodes that conform to the following: + * + * opcode xmm1,xmm2/mem + * + * and do not have side effects (i.e. do not write to memory). + */ + enum XMM + { + ADDSS = 0xF30F58, + ADDSD = 0xF20F58, + ADDPS = 0x000F58, + ADDPD = 0x660F58, + PADDB = 0x660FFC, + PADDW = 0x660FFD, + PADDD = 0x660FFE, + PADDQ = 0x660FD4, + + SUBSS = 0xF30F5C, + SUBSD = 0xF20F5C, + SUBPS = 0x000F5C, + SUBPD = 0x660F5C, + PSUBB = 0x660FF8, + PSUBW = 0x660FF9, + PSUBD = 0x660FFA, + PSUBQ = 0x660FFB, + + MULSS = 0xF30F59, + MULSD = 0xF20F59, + MULPS = 0x000F59, + MULPD = 0x660F59, + PMULLW = 0x660FD5, + + DIVSS = 0xF30F5E, + DIVSD = 0xF20F5E, + DIVPS = 0x000F5E, + DIVPD = 0x660F5E, + + PAND = 0x660FDB, + POR = 0x660FEB, + + UCOMISS = 0x000F2E, + UCOMISD = 0x660F2E, + + XORPS = 0x000F57, + XORPD = 0x660F57, + + // Use STO and LOD instead of MOV to distinguish the direction + // (Destination is first operand, Source is second operand) + STOSS = 0xF30F11, /// MOVSS xmm1/m32, xmm2 + STOSD = 0xF20F11, /// MOVSD xmm1/m64, xmm2 + STOAPS = 0x000F29, /// MOVAPS xmm2/m128, xmm1 + STOAPD = 0x660F29, /// MOVAPD xmm2/m128, xmm1 + STODQA = 0x660F7F, /// MOVDQA xmm2/m128, xmm1 + STOD = 0x660F7E, /// MOVD reg/mem64, xmm 66 0F 7E /r + STOQ = 0x660FD6, /// MOVQ xmm2/m64, xmm1 + + LODSS = 0xF30F10, /// MOVSS xmm1, xmm2/m32 + LODSD = 0xF20F10, /// MOVSD xmm1, xmm2/m64 + LODAPS = 0x000F28, /// MOVAPS xmm1, xmm2/m128 + LODAPD = 0x660F28, /// MOVAPD xmm1, xmm2/m128 + LODDQA = 0x660F6F, /// MOVDQA xmm1, xmm2/m128 + LODD = 0x660F6E, /// MOVD xmm, reg/mem64 66 0F 6E /r + LODQ = 0xF30F7E, /// MOVQ xmm1, xmm2/m64 + + LODDQU = 0xF30F6F, /// MOVDQU xmm1, xmm2/mem128 F3 0F 6F /r + STODQU = 0xF30F7F, /// MOVDQU xmm1/mem128, xmm2 F3 0F 7F /r + MOVDQ2Q = 0xF20FD6, /// MOVDQ2Q mmx, xmm F2 0F D6 /r + MOVHLPS = 0x0F12, /// MOVHLPS xmm1, xmm2 0F 12 /r + LODHPD = 0x660F16, /// MOVHPD xmm1, m64 + STOHPD = 0x660F17, /// MOVHPD mem64, xmm1 66 0F 17 /r + LODHPS = 0x0F16, /// MOVHPS xmm1, m64 + STOHPS = 0x0F17, /// MOVHPS m64, xmm1 + MOVLHPS = 0x0F16, /// MOVLHPS xmm1, xmm2 + LODLPD = 0x660F12, /// MOVLPD xmm1, m64 + STOLPD = 0x660F13, /// MOVLPD m64, xmm1 + LODLPS = 0x0F12, /// MOVLPS xmm1, m64 + STOLPS = 0x0F13, /// MOVLPS m64, xmm1 + MOVMSKPD = 0x660F50, /// MOVMSKPD reg, xmm + MOVMSKPS = 0x0F50, /// MOVMSKPS reg, xmm + MOVNTDQ = 0x660FE7, /// MOVNTDQ m128, xmm1 + MOVNTI = 0x0FC3, /// MOVNTI m32, r32 + MOVNTPD = 0x660F2B, /// MOVNTPD m128, xmm1 + MOVNTPS = 0x0F2B, /// MOVNTPS m128, xmm1 + MOVNTQ = 0x0FE7, /// MOVNTQ m64, mm + MOVQ2DQ = 0xF30FD6, /// MOVQ2DQ + LODUPD = 0x660F10, /// MOVUPD xmm1, xmm2/m128 + STOUPD = 0x660F11, /// MOVUPD xmm2/m128, xmm1 + LODUPS = 0x0F10, /// MOVUPS xmm1, xmm2/m128 + STOUPS = 0x0F11, /// MOVUPS xmm2/m128, xmm1 + + PACKSSDW = 0x660F6B, + PACKSSWB = 0x660F63, + PACKUSWB = 0x660F67, + PADDSB = 0x660FEC, + PADDSW = 0x660FED, + PADDUSB = 0x660FDC, + PADDUSW = 0x660FDD, + PANDN = 0x660FDF, + PCMPEQB = 0x660F74, + PCMPEQD = 0x660F76, + PCMPEQW = 0x660F75, + PCMPGTB = 0x660F64, + PCMPGTD = 0x660F66, + PCMPGTW = 0x660F65, + PMADDWD = 0x660FF5, + PSLLW = 0x660FF1, + PSLLD = 0x660FF2, + PSLLQ = 0x660FF3, + PSRAW = 0x660FE1, + PSRAD = 0x660FE2, + PSRLW = 0x660FD1, + PSRLD = 0x660FD2, + PSRLQ = 0x660FD3, + PSUBSB = 0x660FE8, + PSUBSW = 0x660FE9, + PSUBUSB = 0x660FD8, + PSUBUSW = 0x660FD9, + PUNPCKHBW = 0x660F68, + PUNPCKHDQ = 0x660F6A, + PUNPCKHWD = 0x660F69, + PUNPCKLBW = 0x660F60, + PUNPCKLDQ = 0x660F62, + PUNPCKLWD = 0x660F61, + PXOR = 0x660FEF, + ANDPD = 0x660F54, + ANDPS = 0x0F54, + ANDNPD = 0x660F55, + ANDNPS = 0x0F55, + CMPPS = 0x0FC2, + CMPPD = 0x660FC2, + CMPSD = 0xF20FC2, + CMPSS = 0xF30FC2, + COMISD = 0x660F2F, + COMISS = 0x0F2F, + CVTDQ2PD = 0xF30FE6, + CVTDQ2PS = 0x0F5B, + CVTPD2DQ = 0xF20FE6, + CVTPD2PI = 0x660F2D, + CVTPD2PS = 0x660F5A, + CVTPI2PD = 0x660F2A, + CVTPI2PS = 0x0F2A, + CVTPS2DQ = 0x660F5B, + CVTPS2PD = 0x0F5A, + CVTPS2PI = 0x0F2D, + CVTSD2SI = 0xF20F2D, + CVTSD2SS = 0xF20F5A, + CVTSI2SD = 0xF20F2A, + CVTSI2SS = 0xF30F2A, + CVTSS2SD = 0xF30F5A, + CVTSS2SI = 0xF30F2D, + CVTTPD2PI = 0x660F2C, + CVTTPD2DQ = 0x660FE6, + CVTTPS2DQ = 0xF30F5B, + CVTTPS2PI = 0x0F2C, + CVTTSD2SI = 0xF20F2C, + CVTTSS2SI = 0xF30F2C, + MASKMOVDQU = 0x660FF7, + MASKMOVQ = 0x0FF7, + MAXPD = 0x660F5F, + MAXPS = 0x0F5F, + MAXSD = 0xF20F5F, + MAXSS = 0xF30F5F, + MINPD = 0x660F5D, + MINPS = 0x0F5D, + MINSD = 0xF20F5D, + MINSS = 0xF30F5D, + ORPD = 0x660F56, + ORPS = 0x0F56, + PAVGB = 0x660FE0, + PAVGW = 0x660FE3, + PMAXSW = 0x660FEE, + //PINSRW = 0x660FC4, + PMAXUB = 0x660FDE, + PMINSW = 0x660FEA, + PMINUB = 0x660FDA, + //PMOVMSKB = 0x660FD7, + PMULHUW = 0x660FE4, + PMULHW = 0x660FE5, + PMULUDQ = 0x660FF4, + PSADBW = 0x660FF6, + PUNPCKHQDQ = 0x660F6D, + PUNPCKLQDQ = 0x660F6C, + RCPPS = 0x0F53, + RCPSS = 0xF30F53, + RSQRTPS = 0x0F52, + RSQRTSS = 0xF30F52, + SQRTPD = 0x660F51, + SHUFPD = 0x660FC6, + SHUFPS = 0x0FC6, + SQRTPS = 0x0F51, + SQRTSD = 0xF20F51, + SQRTSS = 0xF30F51, + UNPCKHPD = 0x660F15, + UNPCKHPS = 0x0F15, + UNPCKLPD = 0x660F14, + UNPCKLPS = 0x0F14, + + PSHUFD = 0x660F70, + PSHUFHW = 0xF30F70, + PSHUFLW = 0xF20F70, + PSHUFW = 0x0F70, + PSLLDQ = 0x07660F73, + PSRLDQ = 0x03660F73, + + //PREFETCH = 0x0F18, + + // SSE3 Pentium 4 (Prescott) + + ADDSUBPD = 0x660FD0, + ADDSUBPS = 0xF20FD0, + HADDPD = 0x660F7C, + HADDPS = 0xF20F7C, + HSUBPD = 0x660F7D, + HSUBPS = 0xF20F7D, + MOVDDUP = 0xF20F12, + MOVSHDUP = 0xF30F16, + MOVSLDUP = 0xF30F12, + LDDQU = 0xF20FF0, + MONITOR = 0x0F01C8, + MWAIT = 0x0F01C9, + + // SSSE3 + PALIGNR = 0x660F3A0F, + PHADDD = 0x660F3802, + PHADDW = 0x660F3801, + PHADDSW = 0x660F3803, + PABSB = 0x660F381C, + PABSD = 0x660F381E, + PABSW = 0x660F381D, + PSIGNB = 0x660F3808, + PSIGND = 0x660F380A, + PSIGNW = 0x660F3809, + PSHUFB = 0x660F3800, + PMADDUBSW = 0x660F3804, + PMULHRSW = 0x660F380B, + PHSUBD = 0x660F3806, + PHSUBW = 0x660F3805, + PHSUBSW = 0x660F3807, + + // SSE4.1 + + BLENDPD = 0x660F3A0D, + BLENDPS = 0x660F3A0C, + BLENDVPD = 0x660F3815, + BLENDVPS = 0x660F3814, + DPPD = 0x660F3A41, + DPPS = 0x660F3A40, + EXTRACTPS = 0x660F3A17, + INSERTPS = 0x660F3A21, + MPSADBW = 0x660F3A42, + PBLENDVB = 0x660F3810, + PBLENDW = 0x660F3A0E, + PEXTRD = 0x660F3A16, + PEXTRQ = 0x660F3A16, + PINSRB = 0x660F3A20, + PINSRD = 0x660F3A22, + PINSRQ = 0x660F3A22, + + MOVNTDQA = 0x660F382A, + PACKUSDW = 0x660F382B, + PCMPEQQ = 0x660F3829, + PEXTRB = 0x660F3A14, + PHMINPOSUW = 0x660F3841, + PMAXSB = 0x660F383C, + PMAXSD = 0x660F383D, + PMAXUD = 0x660F383F, + PMAXUW = 0x660F383E, + PMINSB = 0x660F3838, + PMINSD = 0x660F3839, + PMINUD = 0x660F383B, + PMINUW = 0x660F383A, + PMOVSXBW = 0x660F3820, + PMOVSXBD = 0x660F3821, + PMOVSXBQ = 0x660F3822, + PMOVSXWD = 0x660F3823, + PMOVSXWQ = 0x660F3824, + PMOVSXDQ = 0x660F3825, + PMOVZXBW = 0x660F3830, + PMOVZXBD = 0x660F3831, + PMOVZXBQ = 0x660F3832, + PMOVZXWD = 0x660F3833, + PMOVZXWQ = 0x660F3834, + PMOVZXDQ = 0x660F3835, + PMULDQ = 0x660F3828, + PMULLD = 0x660F3840, + PTEST = 0x660F3817, + + ROUNDPD = 0x660F3A09, + ROUNDPS = 0x660F3A08, + ROUNDSD = 0x660F3A0B, + ROUNDSS = 0x660F3A0A, + + // SSE4.2 + PCMPESTRI = 0x660F3A61, + PCMPESTRM = 0x660F3A60, + PCMPISTRI = 0x660F3A63, + PCMPISTRM = 0x660F3A62, + PCMPGTQ = 0x660F3837, + //CRC32 + + // SSE4a (AMD only) + // EXTRQ,INSERTQ,MOVNTSD,MOVNTSS + + // POPCNT and LZCNT (have their own CPUID bits) + POPCNT = 0xF30FB8, + // LZCNT + } + + /** + * Generate two operand instruction with XMM 128 bit operands. + * + * This is a compiler magic function - it doesn't behave like + * regular D functions. + * + * Parameters: + * opcode = any of the XMM opcodes; it must be a compile time constant + * op1 = first operand + * op2 = second operand + * Returns: + * result of opcode + */ + pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2); + + /// + unittest + { + float4 a; + a = cast(float4)__simd(XMM.PXOR, a, a); + } + + /** + * Unary SIMD instructions. + */ + pure @safe void16 __simd(XMM opcode, void16 op1); + pure @safe void16 __simd(XMM opcode, double d); /// + pure @safe void16 __simd(XMM opcode, float f); /// + + /// + unittest + { + float4 a; + a = cast(float4)__simd(XMM.LODSS, a); + } + + /**** + * For instructions: + * CMPPD, CMPSS, CMPSD, CMPPS, + * PSHUFD, PSHUFHW, PSHUFLW, + * BLENDPD, BLENDPS, DPPD, DPPS, + * MPSADBW, PBLENDW, + * ROUNDPD, ROUNDPS, ROUNDSD, ROUNDSS + * Parameters: + * opcode = any of the above XMM opcodes; it must be a compile time constant + * op1 = first operand + * op2 = second operand + * imm8 = third operand; must be a compile time constant + * Returns: + * result of opcode + */ + pure @safe void16 __simd(XMM opcode, void16 op1, void16 op2, ubyte imm8); + + /// + unittest + { + float4 a; + a = cast(float4)__simd(XMM.CMPPD, a, a, 0x7A); + } + + /*** + * For instructions with the imm8 version: + * PSLLD, PSLLQ, PSLLW, PSRAD, PSRAW, PSRLD, PSRLQ, PSRLW, + * PSRLDQ, PSLLDQ + * Parameters: + * opcode = any of the XMM opcodes; it must be a compile time constant + * op1 = first operand + * imm8 = second operand; must be a compile time constant + * Returns: + * result of opcode + */ + pure @safe void16 __simd_ib(XMM opcode, void16 op1, ubyte imm8); + + /// + unittest + { + float4 a; + a = cast(float4) __simd_ib(XMM.PSRLQ, a, 0x7A); + } + + /***** + * For "store" operations of the form: + * op1 op= op2 + * Returns: + * op2 + * These cannot be marked as pure, as semantic() doesn't check them. + */ + @safe void16 __simd_sto(XMM opcode, void16 op1, void16 op2); + @safe void16 __simd_sto(XMM opcode, double op1, void16 op2); /// + @safe void16 __simd_sto(XMM opcode, float op1, void16 op2); /// + + /// + unittest + { + void16 a; + float f = 1; + double d = 1; + + cast(void)__simd_sto(XMM.STOUPS, a, a); + cast(void)__simd_sto(XMM.STOUPS, f, a); + cast(void)__simd_sto(XMM.STOUPS, d, a); + } + + /* The following use overloading to ensure correct typing. + * Compile with inlining on for best performance. + */ + + pure @safe short8 pcmpeq()(short8 v1, short8 v2) + { + return cast(short8)__simd(XMM.PCMPEQW, v1, v2); + } + + pure @safe ushort8 pcmpeq()(ushort8 v1, ushort8 v2) + { + return cast(ushort8)__simd(XMM.PCMPEQW, v1, v2); + } + + /********************* + * Emit prefetch instruction. + * Params: + * address = address to be prefetched + * writeFetch = true for write fetch, false for read fetch + * locality = 0..3 (0 meaning least local, 3 meaning most local) + * Note: + * The Intel mappings are: + * $(TABLE + * $(THEAD writeFetch, locality, Instruction) + * $(TROW false, 0, prefetchnta) + * $(TROW false, 1, prefetch2) + * $(TROW false, 2, prefetch1) + * $(TROW false, 3, prefetch0) + * $(TROW true, 0, prefetchw) + * $(TROW true, 1, prefetchw) + * $(TROW true, 2, prefetchw) + * $(TROW true, 3, prefetchw) + * ) + */ + void prefetch(bool writeFetch, ubyte locality)(const(void)* address) + { static if (writeFetch) __prefetch(address, 4); else static if (locality < 4) __prefetch(address, 3 - locality); else static assert(0, "0..3 expected for locality"); - } + } - private void __prefetch(const(void*) address, ubyte encoding); + private void __prefetch(const(void*) address, ubyte encoding); - /************************************* - * Load unaligned vector from address. - * This is a compiler intrinsic. - * Params: - * p = pointer to vector - * Returns: - * vector - */ + /************************************* + * Load unaligned vector from address. + * This is a compiler intrinsic. + * Params: + * p = pointer to vector + * Returns: + * vector + */ - V loadUnaligned(V)(const V* p) + V loadUnaligned(V)(const V* p) if (is(V == void16) || is(V == byte16) || is(V == ubyte16) || @@ -510,8 +564,10 @@ version (D_SIMD) is(V == int4) || is(V == uint4) || is(V == long2) || - is(V == ulong2)) - { + is(V == ulong2) || + is(V == double2) || + is(V == float4)) + { pragma(inline, true); static if (is(V == double2)) return cast(V)__simd(XMM.LODUPD, *cast(const void16*)p); @@ -519,19 +575,63 @@ version (D_SIMD) return cast(V)__simd(XMM.LODUPS, *cast(const void16*)p); else return cast(V)__simd(XMM.LODDQU, *cast(const void16*)p); - } - - /************************************* - * Store vector to unaligned address. - * This is a compiler intrinsic. - * Params: - * p = pointer to vector - * value = value to store - * Returns: - * value - */ - - V storeUnaligned(V)(V* p, V value) + } + + @system + unittest + { + // Memory to load into the vector: + // Should have enough data to test all 16-byte alignments, and still + // have room for a 16-byte vector + ubyte[32] data; + foreach (i; 0..data.length) + { + data[i] = cast(ubyte)i; + } + + // to test all alignments from 1 ~ 16 + foreach (i; 0..16) + { + ubyte* d = &data[i]; + + void test(T)() + { + // load the data + T v = loadUnaligned(cast(T*)d); + + // check that the data was loaded correctly + ubyte* ptrToV = cast(ubyte*)&v; + foreach (j; 0..T.sizeof) + { + assert(ptrToV[j] == d[j]); + } + } + + test!void16(); + test!byte16(); + test!ubyte16(); + test!short8(); + test!ushort8(); + test!int4(); + test!uint4(); + test!long2(); + test!ulong2(); + test!double2(); + test!float4(); + } + } + + /************************************* + * Store vector to unaligned address. + * This is a compiler intrinsic. + * Params: + * p = pointer to vector + * value = value to store + * Returns: + * value + */ + + V storeUnaligned(V)(V* p, V value) if (is(V == void16) || is(V == byte16) || is(V == ubyte16) || @@ -540,8 +640,10 @@ version (D_SIMD) is(V == int4) || is(V == uint4) || is(V == long2) || - is(V == ulong2)) - { + is(V == ulong2) || + is(V == double2) || + is(V == float4)) + { pragma(inline, true); static if (is(V == double2)) return cast(V)__simd_sto(XMM.STOUPD, *cast(void16*)p, value); @@ -549,5 +651,53 @@ version (D_SIMD) return cast(V)__simd_sto(XMM.STOUPS, *cast(void16*)p, value); else return cast(V)__simd_sto(XMM.STODQU, *cast(void16*)p, value); - } + } + + @system + unittest + { + // Memory to store the vector to: + // Should have enough data to test all 16-byte alignments, and still + // have room for a 16-byte vector + ubyte[32] data; + + // to test all alignments from 1 ~ 16 + foreach (i; 0..16) + { + ubyte* d = &data[i]; + + void test(T)() + { + T v; + + // populate v` with data + ubyte* ptrToV = cast(ubyte*)&v; + foreach (j; 0..T.sizeof) + { + ptrToV[j] = cast(ubyte)j; + } + + // store `v` to location pointed to by `d` + storeUnaligned(cast(T*)d, v); + + // check that the the data was stored correctly + foreach (j; 0..T.sizeof) + { + assert(ptrToV[j] == d[j]); + } + } + + test!void16(); + test!byte16(); + test!ubyte16(); + test!short8(); + test!ushort8(); + test!int4(); + test!uint4(); + test!long2(); + test!ulong2(); + test!double2(); + test!float4(); + } + } } diff --git a/libphobos/libdruntime/core/stdc/stdio.d b/libphobos/libdruntime/core/stdc/stdio.d index 00efe885289..67011596b1c 100644 --- a/libphobos/libdruntime/core/stdc/stdio.d +++ b/libphobos/libdruntime/core/stdc/stdio.d @@ -1527,7 +1527,7 @@ else version (OpenBSD) { void __sclearerr()(FILE* p) { - p._flags &= ~(__SERR|__SEOF); + p._flags = p._flags & ~(__SERR|__SEOF); } int __sfeof()(FILE* p) diff --git a/libphobos/libdruntime/core/sys/posix/locale.d b/libphobos/libdruntime/core/sys/posix/locale.d index 19c7e209ac7..18558a2696a 100644 --- a/libphobos/libdruntime/core/sys/posix/locale.d +++ b/libphobos/libdruntime/core/sys/posix/locale.d @@ -415,10 +415,38 @@ else version (Solaris) LC_ALL = 6, } + /// + enum + { + LC_CTYPE_MASK = (1 << LC_CTYPE), + LC_NUMERIC_MASK = (1 << LC_NUMERIC), + LC_TIME_MASK = (1 << LC_TIME), + LC_COLLATE_MASK = (1 << LC_COLLATE), + LC_MONETARY_MASK = (1 << LC_MONETARY), + LC_MESSAGES_MASK = (1 << LC_MESSAGES), + LC_ALL_MASK = 0x3f, + } + + private struct _LC_locale_t; + + /// + alias locale_t = _LC_locale_t**; + + /// + enum LC_GLOBAL_LOCALE = (cast(locale_t)-1); + + /// Duplicate existing locale + locale_t duplocale(locale_t locale); + /// Free an allocated locale + void freelocale(locale_t locale); /// Natural language formatting for C lconv* localeconv(); + /// Create a new locale + locale_t newlocale(int mask, const char* locale, locale_t base); /// Set the C library's notion of natural language formatting style char* setlocale(int category, const char* locale); + /// Set the per-thread locale + locale_t uselocale (locale_t locale); } else static assert(false, "unimplemented platform"); diff --git a/libphobos/libdruntime/core/thread/osthread.d b/libphobos/libdruntime/core/thread/osthread.d index d81e0aa0607..defdc9586f1 100644 --- a/libphobos/libdruntime/core/thread/osthread.d +++ b/libphobos/libdruntime/core/thread/osthread.d @@ -1461,14 +1461,6 @@ in (fn) fn(sp); } -version (Solaris) -{ - import core.sys.solaris.sys.priocntl; - import core.sys.solaris.sys.types; - import core.sys.posix.sys.wait : idtype_t; -} - - version (Windows) private extern (D) void scanWindowsOnly(scope ScanAllThreadsTypeFn scan, ThreadBase _t) nothrow { diff --git a/libphobos/libdruntime/core/vararg.d b/libphobos/libdruntime/core/vararg.d index a02ffeaea23..935b2bdb287 100644 --- a/libphobos/libdruntime/core/vararg.d +++ b/libphobos/libdruntime/core/vararg.d @@ -17,3 +17,125 @@ module core.vararg; public import core.stdc.stdarg; + + +version (GNU) { /* TypeInfo-based va_arg overload unsupported */ } +else: + +version (ARM) version = ARM_Any; +version (AArch64) version = ARM_Any; +version (MIPS32) version = MIPS_Any; +version (MIPS64) version = MIPS_Any; +version (PPC) version = PPC_Any; +version (PPC64) version = PPC_Any; + +version (ARM_Any) +{ + // Darwin uses a simpler varargs implementation + version (OSX) {} + else version (iOS) {} + else version (TVOS) {} + else version (WatchOS) {} + else: + + version (ARM) version = AAPCS32; + version (AArch64) version = AAPCS64; +} + + +/// +alias va_arg = core.stdc.stdarg.va_arg; + + +/** + * Retrieve and store through parmn the next value that is of TypeInfo ti. + * Used when the static type is not known. + */ +void va_arg()(ref va_list ap, TypeInfo ti, void* parmn) +{ + version (X86) + { + // Wait until everyone updates to get TypeInfo.talign + //auto talign = ti.talign; + //auto p = cast(void*)(cast(size_t)ap + talign - 1) & ~(talign - 1); + auto p = ap; + auto tsize = ti.tsize; + ap = cast(va_list) (p + tsize.alignUp); + parmn[0..tsize] = p[0..tsize]; + } + else version (Win64) + { + version (LDC) enum isLDC = true; + else enum isLDC = false; + + // Wait until everyone updates to get TypeInfo.talign + //auto talign = ti.talign; + //auto p = cast(void*)(cast(size_t)ap + talign - 1) & ~(talign - 1); + auto p = ap; + auto tsize = ti.tsize; + void* q; + if (isLDC && tsize == 16 && cast(TypeInfo_Array) ti) + { + q = p; + ap = cast(va_list) (p + tsize); + } + else + { + q = (tsize > size_t.sizeof || (tsize & (tsize - 1)) != 0) ? *cast(void**) p : p; + ap = cast(va_list) (p + size_t.sizeof); + } + parmn[0..tsize] = q[0..tsize]; + } + else version (X86_64) + { + static import core.internal.vararg.sysv_x64; + core.internal.vararg.sysv_x64.va_arg(ap, ti, parmn); + } + else version (AAPCS32) + { + const tsize = ti.tsize; + if (ti.talign >= 8) + ap.__ap = ap.__ap.alignUp!8; + auto p = ap.__ap; + version (BigEndian) + p = adjustForBigEndian(p, tsize); + ap.__ap += tsize.alignUp; + parmn[0..tsize] = p[0..tsize]; + } + else version (AAPCS64) + { + static import core.internal.vararg.aarch64; + core.internal.vararg.aarch64.va_arg(ap, ti, parmn); + } + else version (ARM_Any) + { + const tsize = ti.tsize; + auto p = cast(void*) ap; + version (BigEndian) + p = adjustForBigEndian(p, tsize); + ap += tsize.alignUp; + parmn[0..tsize] = p[0..tsize]; + } + else version (PPC_Any) + { + if (ti.talign >= 8) + ap = ap.alignUp!8; + const tsize = ti.tsize; + auto p = cast(void*) ap; + version (BigEndian) + p = adjustForBigEndian(p, tsize); + ap += tsize.alignUp; + parmn[0..tsize] = p[0..tsize]; + } + else version (MIPS_Any) + { + const tsize = ti.tsize; + auto p = cast(void*) ap; + version (BigEndian) + p = adjustForBigEndian(p, tsize); + ap += tsize.alignUp; + parmn[0..tsize] = p[0..tsize]; + } + else + static assert(0, "Unsupported platform"); +} diff --git a/libphobos/libdruntime/core/volatile.d b/libphobos/libdruntime/core/volatile.d new file mode 100644 index 00000000000..1703450c65f --- /dev/null +++ b/libphobos/libdruntime/core/volatile.d @@ -0,0 +1,67 @@ +/** + * This module declares intrinsics for volatile operations. + * + * Copyright: Copyright © 2019, The D Language Foundation + * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) + * Authors: Walter Bright, Ernesto Castellotti + * Source: $(DRUNTIMESRC core/volatile.d) + */ + +module core.volatile; + +nothrow: +@safe: +@nogc: + +/************************************* + * Read/write value from/to the memory location indicated by ptr. + * + * These functions are recognized by the compiler, and calls to them are guaranteed + * to not be removed (as dead assignment elimination or presumed to have no effect) + * or reordered in the same thread. + * + * These reordering guarantees are only made with regards to other + * operations done through these functions; the compiler is free to reorder regular + * loads/stores with regards to loads/stores done through these functions. + * + * This is useful when dealing with memory-mapped I/O (MMIO) where a store can + * have an effect other than just writing a value, or where sequential loads + * with no intervening stores can retrieve + * different values from the same location due to external stores to the location. + * + * These functions will, when possible, do the load/store as a single operation. In + * general, this is possible when the size of the operation is less than or equal to + * $(D (void*).sizeof), although some targets may support larger operations. If the + * load/store cannot be done as a single operation, multiple smaller operations will be used. + * + * These are not to be conflated with atomic operations. They do not guarantee any + * atomicity. This may be provided by coincidence as a result of the instructions + * used on the target, but this should not be relied on for portable programs. + * Further, no memory fences are implied by these functions. + * They should not be used for communication between threads. + * They may be used to guarantee a write or read cycle occurs at a specified address. + */ + +ubyte volatileLoad(ubyte * ptr); +ushort volatileLoad(ushort* ptr); /// ditto +uint volatileLoad(uint * ptr); /// ditto +ulong volatileLoad(ulong * ptr); /// ditto + +void volatileStore(ubyte * ptr, ubyte value); /// ditto +void volatileStore(ushort* ptr, ushort value); /// ditto +void volatileStore(uint * ptr, uint value); /// ditto +void volatileStore(ulong * ptr, ulong value); /// ditto + +@system unittest +{ + alias TT(T...) = T; + + foreach (T; TT!(ubyte, ushort, uint, ulong)) + { + T u; + T* p = &u; + volatileStore(p, 1); + T r = volatileLoad(p); + assert(r == u); + } +} diff --git a/libphobos/libdruntime/rt/lifetime.d b/libphobos/libdruntime/rt/lifetime.d index 95f0ff57f42..6a6eb50eefa 100644 --- a/libphobos/libdruntime/rt/lifetime.d +++ b/libphobos/libdruntime/rt/lifetime.d @@ -44,17 +44,9 @@ private } } -private immutable bool callStructDtorsDuringGC; - extern (C) void lifetime_init() { // this is run before static ctors, so it is safe to modify immutables - import rt.config; - string s = rt_configOption("callStructDtorsDuringGC"); - if (s != null) - cast() callStructDtorsDuringGC = s[0] == '1' || s[0] == 'y' || s[0] == 'Y'; - else - cast() callStructDtorsDuringGC = true; } /** @@ -214,9 +206,6 @@ inout(TypeInfo) unqualify(inout(TypeInfo) cti) pure nothrow @nogc // size used to store the TypeInfo at the end of an allocation for structs that have a destructor size_t structTypeInfoSize(const TypeInfo ti) pure nothrow @nogc { - if (!callStructDtorsDuringGC) - return 0; - if (ti && typeid(ti) is typeid(TypeInfo_Struct)) // avoid a complete dynamic type cast { auto sti = cast(TypeInfo_Struct)cast(void*)ti; @@ -975,7 +964,7 @@ extern (C) void[] _d_newarrayT(const TypeInfo ti, size_t length) pure nothrow */ extern (C) void[] _d_newarrayiT(const TypeInfo ti, size_t length) pure nothrow { - import core.internal.traits : TypeTuple; + import core.internal.traits : AliasSeq; void[] result = _d_newarrayU(ti, length); auto tinext = unqualify(ti.next); @@ -985,7 +974,7 @@ extern (C) void[] _d_newarrayiT(const TypeInfo ti, size_t length) pure nothrow switch (init.length) { - foreach (T; TypeTuple!(ubyte, ushort, uint, ulong)) + foreach (T; AliasSeq!(ubyte, ushort, uint, ulong)) { case T.sizeof: (cast(T*)result.ptr)[0 .. size * length / T.sizeof] = *cast(T*)init.ptr; @@ -2539,33 +2528,30 @@ unittest delete arr1; assert(dtorCount == 7); - if (callStructDtorsDuringGC) - { - dtorCount = 0; - S1* s2 = new S1; - GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); - assert(dtorCount == 1); - GC.free(s2); + dtorCount = 0; + S1* s2 = new S1; + GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); + assert(dtorCount == 1); + GC.free(s2); - dtorCount = 0; - const(S1)* s3 = new const(S1); - GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); - assert(dtorCount == 1); - GC.free(cast(void*)s3); + dtorCount = 0; + const(S1)* s3 = new const(S1); + GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); + assert(dtorCount == 1); + GC.free(cast(void*)s3); - dtorCount = 0; - shared(S1)* s4 = new shared(S1); - GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); - assert(dtorCount == 1); - GC.free(cast(void*)s4); + dtorCount = 0; + shared(S1)* s4 = new shared(S1); + GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); + assert(dtorCount == 1); + GC.free(cast(void*)s4); - dtorCount = 0; - const(S1)[] carr1 = new const(S1)[5]; - BlkInfo blkinf1 = GC.query(carr1.ptr); - GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); - assert(dtorCount == 5); - GC.free(blkinf1.base); - } + dtorCount = 0; + const(S1)[] carr1 = new const(S1)[5]; + BlkInfo blkinf1 = GC.query(carr1.ptr); + GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); + assert(dtorCount == 5); + GC.free(blkinf1.base); dtorCount = 0; S1[] arr2 = new S1[10]; @@ -2573,14 +2559,11 @@ unittest arr2.assumeSafeAppend; assert(dtorCount == 4); // destructors run explicitely? - if (callStructDtorsDuringGC) - { - dtorCount = 0; - BlkInfo blkinf = GC.query(arr2.ptr); - GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); - assert(dtorCount == 6); - GC.free(blkinf.base); - } + dtorCount = 0; + BlkInfo blkinf = GC.query(arr2.ptr); + GC.runFinalizers((cast(char*)(typeid(S1).xdtor))[0..1]); + assert(dtorCount == 6); + GC.free(blkinf.base); // associative arrays import rt.aaA : entryDtor; @@ -2590,36 +2573,27 @@ unittest S1[int] aa1; aa1[0] = S1(0); aa1[1] = S1(1); - if (callStructDtorsDuringGC) - { - dtorCount = 0; - aa1 = null; - GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); - assert(dtorCount == 2); - } + dtorCount = 0; + aa1 = null; + GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); + assert(dtorCount == 2); int[S1] aa2; aa2[S1(0)] = 0; aa2[S1(1)] = 1; aa2[S1(2)] = 2; - if (callStructDtorsDuringGC) - { - dtorCount = 0; - aa2 = null; - GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); - assert(dtorCount == 3); - } + dtorCount = 0; + aa2 = null; + GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); + assert(dtorCount == 3); S1[2][int] aa3; aa3[0] = [S1(0),S1(2)]; aa3[1] = [S1(1),S1(3)]; - if (callStructDtorsDuringGC) - { - dtorCount = 0; - aa3 = null; - GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); - assert(dtorCount == 4); - } + dtorCount = 0; + aa3 = null; + GC.runFinalizers((cast(char*)(&entryDtor))[0..1]); + assert(dtorCount == 4); } // test class finalizers exception handling @@ -2661,9 +2635,6 @@ unittest debug(SENTINEL) {} else unittest { - if (!callStructDtorsDuringGC) - return; - bool test(E)() { import core.exception; diff --git a/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d b/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d index 44eb40c366d..7c084abcaf1 100644 --- a/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d +++ b/libphobos/testsuite/libphobos.allocations/tls_gc_integration.d @@ -1,4 +1,4 @@ -import core.memory, core.thread, core.bitop; +import core.memory, core.thread, core.volatile; /* * This test repeatedly performs operations on GC-allocated objects which -- 2.30.2