x86: introduce .hfloat directive

[binutils-gdb.git] / gas / doc / c-i386.texi
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi

index 80bbcbe20f5b6a38f76810fe2cef8983a7d87dec..664237c75c9a18e514ca2927a7877b301ced8cad 100644 (file)
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -1,4 +1,4 @@
-@c Copyright (C) 1991-2019 Free Software Foundation, Inc.
+@c Copyright (C) 1991-2021 Free Software Foundation, Inc.
  @c This is part of the GAS manual.
  @c For copying conditions, see the file as.texinfo.
  @c man end
  @c This is part of the GAS manual.
  @c For copying conditions, see the file as.texinfo.
  @c man end
@@ -37,6 +37,7 @@ extending the Intel architecture to 64-bits.
  * i386-TBM::                    AMD's Trailing Bit Manipulation Instructions
  * i386-16bit::                  Writing 16-bit Code
  * i386-Arch::                   Specifying an x86 CPU architecture
  * i386-TBM::                    AMD's Trailing Bit Manipulation Instructions
  * i386-16bit::                  Writing 16-bit Code
  * i386-Arch::                   Specifying an x86 CPU architecture
+* i386-ISA::                    AMD64 ISA vs. Intel64 ISA
  * i386-Bugs::                   AT&T Syntax bugs
  * i386-Notes::                  Notes
  @end menu
  * i386-Bugs::                   AT&T Syntax bugs
  * i386-Notes::                  Notes
  @end menu
@@ -124,6 +125,7 @@ processor names are recognized:
  @code{bdver4},
  @code{znver1},
  @code{znver2},
  @code{bdver4},
  @code{znver1},
  @code{znver2},
+@code{znver3},
  @code{btver1},
  @code{btver2},
  @code{generic32} and
  @code{btver1},
  @code{btver2},
  @code{generic32} and
@@ -150,6 +152,7 @@ accept various extension mnemonics.  For example,
  @code{sse},
  @code{sse2},
  @code{sse3},
  @code{sse},
  @code{sse2},
  @code{sse3},
+@code{sse4a},
  @code{ssse3},
  @code{sse4.1},
  @code{sse4.2},
  @code{ssse3},
  @code{sse4.1},
  @code{sse4.2},
@@ -157,6 +160,7 @@ accept various extension mnemonics.  For example,
  @code{nosse},
  @code{nosse2},
  @code{nosse3},
  @code{nosse},
  @code{nosse2},
  @code{nosse3},
+@code{nosse4a},
  @code{nossse3},
  @code{nosse4.1},
  @code{nosse4.2},
  @code{nossse3},
  @code{nosse4.1},
  @code{nosse4.2},
@@ -184,6 +188,13 @@ accept various extension mnemonics.  For example,
  @code{movdiri},
  @code{movdir64b},
  @code{enqcmd},
  @code{movdiri},
  @code{movdir64b},
  @code{enqcmd},
+@code{serialize},
+@code{tsxldtrk},
+@code{kl},
+@code{nokl},
+@code{widekl},
+@code{nowidekl},
+@code{hreset},
  @code{avx512f},
  @code{avx512cd},
  @code{avx512er},
  @code{avx512f},
  @code{avx512cd},
  @code{avx512er},
@@ -199,7 +210,11 @@ accept various extension mnemonics.  For example,
  @code{avx512_vbmi2},
  @code{avx512_vnni},
  @code{avx512_bitalg},
  @code{avx512_vbmi2},
  @code{avx512_vnni},
  @code{avx512_bitalg},
+@code{avx512_vp2intersect},
+@code{tdx},
  @code{avx512_bf16},
  @code{avx512_bf16},
+@code{avx_vnni},
+@code{avx512_fp16},
  @code{noavx512f},
  @code{noavx512cd},
  @code{noavx512er},
  @code{noavx512f},
  @code{noavx512cd},
  @code{noavx512er},
@@ -216,8 +231,21 @@ accept various extension mnemonics.  For example,
  @code{noavx512_vnni},
  @code{noavx512_bitalg},
  @code{noavx512_vp2intersect},
  @code{noavx512_vnni},
  @code{noavx512_bitalg},
  @code{noavx512_vp2intersect},
+@code{notdx},
  @code{noavx512_bf16},
  @code{noavx512_bf16},
+@code{noavx_vnni},
+@code{noavx512_fp16},
  @code{noenqcmd},
  @code{noenqcmd},
+@code{noserialize},
+@code{notsxldtrk},
+@code{amx_int8},
+@code{noamx_int8},
+@code{amx_bf16},
+@code{noamx_bf16},
+@code{amx_tile},
+@code{noamx_tile},
+@code{nouintr},
+@code{nohreset},
  @code{vmx},
  @code{vmfunc},
  @code{smx},
  @code{vmx},
  @code{vmfunc},
  @code{smx},
@@ -235,6 +263,7 @@ accept various extension mnemonics.  For example,
  @code{movbe},
  @code{ept},
  @code{lzcnt},
  @code{movbe},
  @code{ept},
  @code{lzcnt},
+@code{popcnt},
  @code{hle},
  @code{rtm},
  @code{invpcid},
  @code{hle},
  @code{rtm},
  @code{invpcid},
@@ -244,7 +273,11 @@ accept various extension mnemonics.  For example,
  @code{wbnoinvd},
  @code{pconfig},
  @code{waitpkg},
  @code{wbnoinvd},
  @code{pconfig},
  @code{waitpkg},
+@code{uintr},
  @code{cldemote},
  @code{cldemote},
+@code{rdpru},
+@code{mcommit},
+@code{sev_es},
  @code{lwp},
  @code{fma4},
  @code{xop},
  @code{lwp},
  @code{fma4},
  @code{xop},
@@ -255,8 +288,10 @@ accept various extension mnemonics.  For example,
  @code{3dnowa},
  @code{sse4a},
  @code{sse5},
  @code{3dnowa},
  @code{sse4a},
  @code{sse5},
-@code{svme},
-@code{abm} and
+@code{snp},
+@code{invlpgb},
+@code{tlbsync},
+@code{svme} and
  @code{padlock}.
  Note that rather than extending a basic instruction set, the extension
  mnemonics starting with @code{no} revoke the respective functionality.
  @code{padlock}.
  Note that rather than extending a basic instruction set, the extension
  mnemonics starting with @code{no} revoke the respective functionality.
@@ -380,9 +415,10 @@ with default visibility can be preempted.  The resulting code is
  slightly bigger.  This option only affects the handling of branch
  instructions.
  
  slightly bigger.  This option only affects the handling of branch
  instructions.
  
+@cindex @samp{-mbig-obj} option, i386
  @cindex @samp{-mbig-obj} option, x86-64
  @item -mbig-obj
  @cindex @samp{-mbig-obj} option, x86-64
  @item -mbig-obj
-On x86-64 PE/COFF target this option forces the use of big object file
+On PE/COFF target this option forces the use of big object file
  format, which allows more than 32768 sections.
  
  @cindex @samp{-momit-lock-prefix=} option, i386
  format, which allows more than 32768 sections.
  
  @cindex @samp{-momit-lock-prefix=} option, i386
@@ -421,6 +457,90 @@ R_X86_64_REX_GOTPCRELX, in 64-bit mode.
  relocations.  The default can be controlled by a configure option
  @option{--enable-x86-relax-relocations}.
  
  relocations.  The default can be controlled by a configure option
  @option{--enable-x86-relax-relocations}.
  
+@cindex @samp{-malign-branch-boundary=} option, i386
+@cindex @samp{-malign-branch-boundary=} option, x86-64
+@item -malign-branch-boundary=@var{NUM}
+This option controls how the assembler should align branches with segment
+prefixes or NOP.  @var{NUM} must be a power of 2.  It should be 0 or
+no less than 16.  Branches will be aligned within @var{NUM} byte
+boundary.  @option{-malign-branch-boundary=0}, which is the default,
+doesn't align branches.
+
+@cindex @samp{-malign-branch=} option, i386
+@cindex @samp{-malign-branch=} option, x86-64
+@item -malign-branch=@var{TYPE}[+@var{TYPE}...]
+This option specifies types of branches to align. @var{TYPE} is
+combination of @samp{jcc}, which aligns conditional jumps,
+@samp{fused}, which aligns fused conditional jumps, @samp{jmp},
+which aligns unconditional jumps, @samp{call} which aligns calls,
+@samp{ret}, which aligns rets, @samp{indirect}, which aligns indirect
+jumps and calls.  The default is @option{-malign-branch=jcc+fused+jmp}.
+
+@cindex @samp{-malign-branch-prefix-size=} option, i386
+@cindex @samp{-malign-branch-prefix-size=} option, x86-64
+@item -malign-branch-prefix-size=@var{NUM}
+This option specifies the maximum number of prefixes on an instruction
+to align branches.  @var{NUM} should be between 0 and 5.  The default
+@var{NUM} is 5.
+
+@cindex @samp{-mbranches-within-32B-boundaries} option, i386
+@cindex @samp{-mbranches-within-32B-boundaries} option, x86-64
+@item -mbranches-within-32B-boundaries
+This option aligns conditional jumps, fused conditional jumps and
+unconditional jumps within 32 byte boundary with up to 5 segment prefixes
+on an instruction.  It is equivalent to
+@option{-malign-branch-boundary=32}
+@option{-malign-branch=jcc+fused+jmp}
+@option{-malign-branch-prefix-size=5}.
+The default doesn't align branches.
+
+@cindex @samp{-mlfence-after-load=} option, i386
+@cindex @samp{-mlfence-after-load=} option, x86-64
+@item -mlfence-after-load=@var{no}
+@itemx -mlfence-after-load=@var{yes}
+These options control whether the assembler should generate lfence
+after load instructions.  @option{-mlfence-after-load=@var{yes}} will
+generate lfence.  @option{-mlfence-after-load=@var{no}} will not generate
+lfence, which is the default.
+
+@cindex @samp{-mlfence-before-indirect-branch=} option, i386
+@cindex @samp{-mlfence-before-indirect-branch=} option, x86-64
+@item -mlfence-before-indirect-branch=@var{none}
+@item -mlfence-before-indirect-branch=@var{all}
+@item -mlfence-before-indirect-branch=@var{register}
+@itemx -mlfence-before-indirect-branch=@var{memory}
+These options control whether the assembler should generate lfence
+before indirect near branch instructions.
+@option{-mlfence-before-indirect-branch=@var{all}} will generate lfence
+before indirect near branch via register and issue a warning before
+indirect near branch via memory.
+It also implicitly sets @option{-mlfence-before-ret=@var{shl}} when
+there's no explicit @option{-mlfence-before-ret=}.
+@option{-mlfence-before-indirect-branch=@var{register}} will generate
+lfence before indirect near branch via register.
+@option{-mlfence-before-indirect-branch=@var{memory}} will issue a
+warning before indirect near branch via memory.
+@option{-mlfence-before-indirect-branch=@var{none}} will not generate
+lfence nor issue warning, which is the default.  Note that lfence won't
+be generated before indirect near branch via register with
+@option{-mlfence-after-load=@var{yes}} since lfence will be generated
+after loading branch target register.
+
+@cindex @samp{-mlfence-before-ret=} option, i386
+@cindex @samp{-mlfence-before-ret=} option, x86-64
+@item -mlfence-before-ret=@var{none}
+@item -mlfence-before-ret=@var{shl}
+@item -mlfence-before-ret=@var{or}
+@item -mlfence-before-ret=@var{yes}
+@itemx -mlfence-before-ret=@var{not}
+These options control whether the assembler should generate lfence
+before ret.  @option{-mlfence-before-ret=@var{or}} will generate
+generate or instruction with lfence.
+@option{-mlfence-before-ret=@var{shl/yes}} will generate shl instruction
+with lfence. @option{-mlfence-before-ret=@var{not}} will generate not
+instruction with lfence. @option{-mlfence-before-ret=@var{none}} will not
+generate lfence, which is the default.
+
  @cindex @samp{-mx86-used-note=} option, i386
  @cindex @samp{-mx86-used-note=} option, x86-64
  @item -mx86-used-note=@var{no}
  @cindex @samp{-mx86-used-note=} option, i386
  @cindex @samp{-mx86-used-note=} option, x86-64
  @item -mx86-used-note=@var{no}
@@ -448,7 +568,8 @@ with 01, 10 and 11 RC bits, respectively.
  @item -mamd64
  @itemx -mintel64
  This option specifies that the assembler should accept only AMD64 or
  @item -mamd64
  @itemx -mintel64
  This option specifies that the assembler should accept only AMD64 or
-Intel64 ISA in 64-bit mode.  The default is to accept both.
+Intel64 ISA in 64-bit mode.  The default is to accept common, Intel64
+only and AMD64 ISAs.
  
  @cindex @samp{-O0} option, i386
  @cindex @samp{-O0} option, x86-64
  
  @cindex @samp{-O0} option, i386
  @cindex @samp{-O0} option, x86-64
@@ -465,13 +586,21 @@ Optimize instruction encoding with smaller instruction size.  @samp{-O}
  and @samp{-O1} encode 64-bit register load instructions with 64-bit
  immediate as 32-bit register load instructions with 31-bit or 32-bits
  immediates, encode 64-bit register clearing instructions with 32-bit
  and @samp{-O1} encode 64-bit register load instructions with 64-bit
  immediate as 32-bit register load instructions with 31-bit or 32-bits
  immediates, encode 64-bit register clearing instructions with 32-bit
-register clearing instructions and encode 256-bit/512-bit VEX/EVEX
-vector register clearing instructions with 128-bit VEX vector register
-clearing instructions as well as encode 128-bit/256-bit EVEX vector
+register clearing instructions, encode 256-bit/512-bit VEX/EVEX vector
+register clearing instructions with 128-bit VEX vector register
+clearing instructions, encode 128-bit/256-bit EVEX vector
  register load/store instructions with VEX vector register load/store
  register load/store instructions with VEX vector register load/store
-instructions.  @samp{-O2} includes @samp{-O1} optimization plus
-encodes 256-bit/512-bit EVEX vector register clearing instructions with
-128-bit EVEX vector register clearing instructions.
+instructions, and encode 128-bit/256-bit EVEX packed integer logical
+instructions with 128-bit/256-bit VEX packed integer logical.
+
+@samp{-O2} includes @samp{-O1} optimization plus encodes
+256-bit/512-bit EVEX vector register clearing instructions with 128-bit
+EVEX vector register clearing instructions.  In 64-bit mode VEX encoded
+instructions with commutative source operands will also have their
+source operands swapped if this allows using the 2-byte VEX prefix form
+instead of the 3-byte one.  Certain forms of AND as well as OR with the
+same (register) operand specified twice will also be changed to TEST.
+
  @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit
  and 64-bit register tests with immediate as 8-bit register test with
  immediate.  @samp{-O0} turns off this optimization.
  @samp{-Os} includes @samp{-O2} optimization plus encodes 16-bit, 32-bit
  and 64-bit register tests with immediate as 8-bit register test with
  immediate.  @samp{-O0} turns off this optimization.
@@ -508,6 +637,12 @@ The directive is intended to be used for data which requires a large
  amount of space, and it is only available for ELF based x86_64
  targets.
  
  amount of space, and it is only available for ELF based x86_64
  targets.
  
+@cindex @code{value} directive
+@item .value @var{expression} [, @var{expression}]
+This directive behaves in the same way as the @code{.short} directive,
+taking a series of comma separated expressions and storing them as
+two-byte wide values into the current section.
+
  @c FIXME: Document other x86 specific directives ?  Eg: .code16gcc,
  
  @end table
  @c FIXME: Document other x86 specific directives ?  Eg: .code16gcc,
  
  @end table
@@ -670,21 +805,30 @@ assembler which assumes that a missing mnemonic suffix implies long
  operand size.  (This incompatibility does not affect compiler output
  since compilers always explicitly specify the mnemonic suffix.)
  
  operand size.  (This incompatibility does not affect compiler output
  since compilers always explicitly specify the mnemonic suffix.)
  
-Almost all instructions have the same names in AT&T and Intel format.
-There are a few exceptions.  The sign extend and zero extend
-instructions need two sizes to specify them.  They need a size to
-sign/zero extend @emph{from} and a size to zero extend @emph{to}.  This
-is accomplished by using two instruction mnemonic suffixes in AT&T
-syntax.  Base names for sign extend and zero extend are
-@samp{movs@dots{}} and @samp{movz@dots{}} in AT&T syntax (@samp{movsx}
-and @samp{movzx} in Intel syntax).  The instruction mnemonic suffixes
-are tacked on to this base name, the @emph{from} suffix before the
-@emph{to} suffix.  Thus, @samp{movsbl %al, %edx} is AT&T syntax for
-``move sign extend @emph{from} %al @emph{to} %edx.''  Possible suffixes,
-thus, are @samp{bl} (from byte to long), @samp{bw} (from byte to word),
-@samp{wl} (from word to long), @samp{bq} (from byte to quadruple word),
-@samp{wq} (from word to quadruple word), and @samp{lq} (from long to
-quadruple word).
+When there is no sizing suffix and no (suitable) register operands to
+deduce the size of memory operands, with a few exceptions and where long
+operand size is possible in the first place, operand size will default
+to long in 32- and 64-bit modes.  Similarly it will default to short in
+16-bit mode. Noteworthy exceptions are
+
+@itemize @bullet
+@item
+Instructions with an implicit on-stack operand as well as branches,
+which default to quad in 64-bit mode.
+
+@item
+Sign- and zero-extending moves, which default to byte size source
+operands.
+
+@item
+Floating point insns with integer operands, which default to short (for
+perhaps historical reasons).
+
+@item
+CRC32 with a 64-bit destination, which defaults to a quad source
+operand.
+
+@end itemize
  
  @cindex encoding options, i386
  @cindex encoding options, x86-64
  
  @cindex encoding options, i386
  @cindex encoding options, x86-64
@@ -698,6 +842,9 @@ Different encoding options can be specified via pseudo prefixes:
  @item
  @samp{@{disp32@}} -- prefer 32-bit displacement.
  
  @item
  @samp{@{disp32@}} -- prefer 32-bit displacement.
  
+@item
+@samp{@{disp16@}} -- prefer 16-bit displacement.
+
  @item
  @samp{@{load@}} -- prefer load-form instruction.
  
  @item
  @samp{@{load@}} -- prefer load-form instruction.
  
@@ -705,10 +852,10 @@ Different encoding options can be specified via pseudo prefixes:
  @samp{@{store@}} -- prefer store-form instruction.
  
  @item
  @samp{@{store@}} -- prefer store-form instruction.
  
  @item
-@samp{@{vex2@}} -- prefer 2-byte VEX prefix for VEX instruction.
+@samp{@{vex@}} --  encode with VEX prefix.
  
  @item
  
  @item
-@samp{@{vex3@}} -- prefer 3-byte VEX prefix for VEX instruction.
+@samp{@{vex3@}} -- encode with 3-byte VEX prefix.
  
  @item
  @samp{@{evex@}} --  encode with EVEX prefix.
  
  @item
  @samp{@{evex@}} --  encode with EVEX prefix.
@@ -722,6 +869,10 @@ prefix which generates REX prefix unconditionally.
  @samp{@{nooptimize@}} -- disable instruction size optimization.
  @end itemize
  
  @samp{@{nooptimize@}} -- disable instruction size optimization.
  @end itemize
  
+Mnemonics of Intel VNNI instructions are encoded with the EVEX prefix
+by default.  The pseudo @samp{@{vex@}} prefix can be used to encode
+mnemonics of Intel VNNI instructions with the VEX prefix.
+
  @cindex conversion instructions, i386
  @cindex i386 conversion instructions
  @cindex conversion instructions, x86-64
  @cindex conversion instructions, i386
  @cindex i386 conversion instructions
  @cindex conversion instructions, x86-64
@@ -755,6 +906,59 @@ are called @samp{cbtw}, @samp{cwtl}, @samp{cwtd}, @samp{cltd}, @samp{cltq}, and
  @samp{cqto} in AT&T naming.  @code{@value{AS}} accepts either naming for these
  instructions.
  
  @samp{cqto} in AT&T naming.  @code{@value{AS}} accepts either naming for these
  instructions.
  
+@cindex extension instructions, i386
+@cindex i386 extension instructions
+@cindex extension instructions, x86-64
+@cindex x86-64 extension instructions
+The Intel-syntax extension instructions
+
+@itemize @bullet
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg16}.
+
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg32}.
+
+@item
+@samp{movsx} --- sign-extend @samp{reg8/mem8} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movsx} --- sign-extend @samp{reg16/mem16} to @samp{reg32}
+
+@item
+@samp{movsx} --- sign-extend @samp{reg16/mem16} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movsxd} --- sign-extend @samp{reg32/mem32} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg16}.
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg32}.
+
+@item
+@samp{movzx} --- zero-extend @samp{reg8/mem8} to @samp{reg64}
+(x86-64 only).
+
+@item
+@samp{movzx} --- zero-extend @samp{reg16/mem16} to @samp{reg32}
+
+@item
+@samp{movzx} --- zero-extend @samp{reg16/mem16} to @samp{reg64}
+(x86-64 only).
+@end itemize
+
+@noindent
+are called @samp{movsbw/movsxb/movsx}, @samp{movsbl/movsxb/movsx},
+@samp{movsbq/movsxb/movsx}, @samp{movswl/movsxw}, @samp{movswq/movsxw},
+@samp{movslq/movsxl}, @samp{movzbw/movzxb/movzx},
+@samp{movzbl/movzxb/movzx}, @samp{movzbq/movzxb/movzx},
+@samp{movzwl/movzxw} and @samp{movzwq/movzxw} in AT&T syntax.
+
  @cindex jump instructions, i386
  @cindex call instructions, i386
  @cindex jump instructions, x86-64
  @cindex jump instructions, i386
  @cindex call instructions, i386
  @cindex jump instructions, x86-64
@@ -778,6 +982,12 @@ Several x87 instructions, @samp{fadd}, @samp{fdiv}, @samp{fdivp},
  assembler with different mnemonics from those in Intel IA32 specification.
  @code{@value{GCC}} generates those instructions with AT&T mnemonic.
  
  assembler with different mnemonics from those in Intel IA32 specification.
  @code{@value{GCC}} generates those instructions with AT&T mnemonic.
  
+@itemize @bullet
+@item @samp{movslq} with AT&T mnemonic only accepts 64-bit destination
+register.  @samp{movsxd} should be used to encode 16-bit or 32-bit
+destination register with both AT&T and Intel mnemonics.
+@end itemize
+
  @node i386-Regs
  @section Register Naming
  
  @node i386-Regs
  @section Register Naming
  
@@ -875,16 +1085,6 @@ available in 32-bit mode).  The bottom 128 bits are overlaid with the
  
  @end itemize
  
  
  @end itemize
  
-The AVX2 extensions made in 64-bit mode more registers available:
-
-@itemize @bullet
-
-@item
-the 16 128-bit registers @samp{%xmm16}--@samp{%xmm31} and the 16 256-bit
-registers @samp{%ymm16}--@samp{%ymm31}.
-
-@end itemize
-
  The AVX512 extensions added the following registers:
  
  @itemize @bullet
  The AVX512 extensions added the following registers:
  
  @itemize @bullet
@@ -1113,18 +1313,21 @@ data type.  Constructors build these data types into memory.
  @cindex @code{single} directive, i386
  @cindex @code{double} directive, i386
  @cindex @code{tfloat} directive, i386
  @cindex @code{single} directive, i386
  @cindex @code{double} directive, i386
  @cindex @code{tfloat} directive, i386
+@cindex @code{hfloat} directive, i386
  @cindex @code{float} directive, x86-64
  @cindex @code{single} directive, x86-64
  @cindex @code{double} directive, x86-64
  @cindex @code{tfloat} directive, x86-64
  @cindex @code{float} directive, x86-64
  @cindex @code{single} directive, x86-64
  @cindex @code{double} directive, x86-64
  @cindex @code{tfloat} directive, x86-64
+@cindex @code{hfloat} directive, x86-64
  @itemize @bullet
  @item
  Floating point constructors are @samp{.float} or @samp{.single},
  @itemize @bullet
  @item
  Floating point constructors are @samp{.float} or @samp{.single},
-@samp{.double}, and @samp{.tfloat} for 32-, 64-, and 80-bit formats.
-These correspond to instruction mnemonic suffixes @samp{s}, @samp{l},
-and @samp{t}. @samp{t} stands for 80-bit (ten byte) real.  The 80387
-only supports this format via the @samp{fldt} (load 80-bit real to stack
-top) and @samp{fstpt} (store 80-bit real and pop stack) instructions.
+@samp{.double}, @samp{.tfloat}, and @samp{.hfloat} for 32-, 64-, 80-, and
+16-bit formats respectively. The former three correspond to instruction
+mnemonic suffixes @samp{s}, @samp{l}, and @samp{t}. @samp{t} stands for
+80-bit (ten byte) real.  The 80387 only supports this format via the
+@samp{fldt} (load 80-bit real to stack top) and @samp{fstpt} (store 80-bit
+real and pop stack) instructions.
  
  @cindex @code{word} directive, i386
  @cindex @code{long} directive, i386
  
  @cindex @code{word} directive, i386
  @cindex @code{long} directive, i386
@@ -1137,7 +1340,7 @@ top) and @samp{fstpt} (store 80-bit real and pop stack) instructions.
  @item
  Integer constructors are @samp{.word}, @samp{.long} or @samp{.int}, and
  @samp{.quad} for the 16-, 32-, and 64-bit integer formats.  The
  @item
  Integer constructors are @samp{.word}, @samp{.long} or @samp{.int}, and
  @samp{.quad} for the 16-, 32-, and 64-bit integer formats.  The
-corresponding instruction mnemonic suffixes are @samp{s} (single),
+corresponding instruction mnemonic suffixes are @samp{s} (short),
  @samp{l} (long), and @samp{q} (quad).  As with the 80-bit real format,
  the 64-bit @samp{q} format is only present in the @samp{fildq} (load
  quad integer to stack top) and @samp{fistpq} (store quad integer and pop
  @samp{l} (long), and @samp{q} (quad).  As with the 80-bit real format,
  the 64-bit @samp{q} format is only present in the @samp{fildq} (load
  quad integer to stack top) and @samp{fistpq} (store quad integer and pop
@@ -1302,16 +1505,17 @@ supported on the CPU specified.  The choices for @var{cpu_type} are:
  @item @samp{corei7} @tab @samp{l1om} @tab @samp{k1om} @tab @samp{iamcu}
  @item @samp{k6} @tab @samp{k6_2} @tab @samp{athlon} @tab @samp{k8}
  @item @samp{amdfam10} @tab @samp{bdver1} @tab @samp{bdver2} @tab @samp{bdver3}
  @item @samp{corei7} @tab @samp{l1om} @tab @samp{k1om} @tab @samp{iamcu}
  @item @samp{k6} @tab @samp{k6_2} @tab @samp{athlon} @tab @samp{k8}
  @item @samp{amdfam10} @tab @samp{bdver1} @tab @samp{bdver2} @tab @samp{bdver3}
-@item @samp{bdver4} @tab @samp{znver1} @tab @samp{znver2} @tab @samp{btver1}
-@item @samp{btver2} @tab @samp{generic32} @tab @samp{generic64}
+@item @samp{bdver4} @tab @samp{znver1} @tab @samp{znver2} @tab @samp{znver3}
+@item @samp{btver1} @tab @samp{btver2} @tab @samp{generic32} @tab @samp{generic64}
  @item @samp{.cmov} @tab @samp{.fxsr} @tab @samp{.mmx}
  @item @samp{.cmov} @tab @samp{.fxsr} @tab @samp{.mmx}
-@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3}
+@item @samp{.sse} @tab @samp{.sse2} @tab @samp{.sse3} @tab @samp{.sse4a}
  @item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4}
  @item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept}
  @item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt}
  @item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
  @item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
  @item @samp{.ssse3} @tab @samp{.sse4.1} @tab @samp{.sse4.2} @tab @samp{.sse4}
  @item @samp{.avx} @tab @samp{.vmx} @tab @samp{.smx} @tab @samp{.ept}
  @item @samp{.clflush} @tab @samp{.movbe} @tab @samp{.xsave} @tab @samp{.xsaveopt}
  @item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
  @item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
-@item @samp{.lzcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc} @tab @samp{.hle}
+@item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
+@item @samp{.hle}
  @item @samp{.rtm} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
  @item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
  @item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
  @item @samp{.rtm} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
  @item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
  @item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
@@ -1320,14 +1524,19 @@ supported on the CPU specified.  The choices for @var{cpu_type} are:
  @item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw}
  @item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni}
  @item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect}
  @item @samp{.avx512vbmi} @tab @samp{.avx512_4fmaps} @tab @samp{.avx512_4vnniw}
  @item @samp{.avx512_vpopcntdq} @tab @samp{.avx512_vbmi2} @tab @samp{.avx512_vnni}
  @item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect}
+@item @samp{.tdx} @tab @samp{.avx_vnni}  @tab @samp{.avx512_fp16}
  @item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt}
  @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
  @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
  @item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @item @samp{.ibt}
  @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
  @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
-@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd}
+@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
+@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
  @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
  @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
-@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme} @tab @samp{.abm}
+@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
  @item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
  @item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
-@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx}
+@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx} @tab @samp{.rdpru}
+@item @samp{.mcommit} @tab @samp{.sev_es} @tab @samp{.snp} @tab @samp{.invlpgb}
+@item @samp{.tlbsync}
  @end multitable
  
  Apart from the warning, there are only two other effects on
  @end multitable
  
  Apart from the warning, there are only two other effects on
@@ -1359,6 +1568,29 @@ For example
   .arch i8086,nojumps
  @end smallexample
  
   .arch i8086,nojumps
  @end smallexample
  
+@node i386-ISA
+@section AMD64 ISA vs. Intel64 ISA
+
+There are some discrepancies between AMD64 and Intel64 ISAs.
+
+@itemize @bullet
+@item For @samp{movsxd} with 16-bit destination register, AMD64
+supports 32-bit source operand and Intel64 supports 16-bit source
+operand.
+
+@item For far branches (with explicit memory operand), both ISAs support
+32- and 16-bit operand size.  Intel64 additionally supports 64-bit
+operand size, encoded as @samp{ljmpq} and @samp{lcallq} in AT&T syntax
+and with an explicit @samp{tbyte ptr} operand size specifier in Intel
+syntax.
+
+@item @samp{lfs}, @samp{lgs}, and @samp{lss} similarly allow for 16-
+and 32-bit operand size (32- and 48-bit memory operand) in both ISAs,
+while Intel64 additionally supports 64-bit operand sise (80-bit memory
+operands).
+
+@end itemize
+
  @node i386-Bugs
  @section AT&T Syntax bugs
  
  @node i386-Bugs
  @section AT&T Syntax bugs