gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 135   i386_cpu_flags enable;                /* cpu feature enable flags */
 136   i386_cpu_flags disable;       /* cpu feature disable flags */
 137 }
 138 arch_entry;
 139
 140 static void update_code_flag (int, int);
 141 static void s_insn (int);
 142 static void set_code_flag (int);
 143 static void set_16bit_gcc_code_flag (int);
 144 static void set_intel_syntax (int);
 145 static void set_intel_mnemonic (int);
 146 static void set_allow_index_reg (int);
 147 static void set_check (int);
 148 static void set_cpu_arch (int);
 149 #ifdef TE_PE
 150 static void pe_directive_secrel (int);
 151 static void pe_directive_secidx (int);
 152 #endif
 153 static void signed_cons (int);
 154 static char *output_invalid (int c);
 155 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 156                                     const char *);
 157 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 158                                        const char *);
 159 static int i386_att_operand (char *);
 160 static int i386_intel_operand (char *, int);
 161 static int i386_intel_simplify (expressionS *);
 162 static int i386_intel_parse_name (const char *, expressionS *);
 163 static const reg_entry *parse_register (const char *, char **);
 164 static const char *parse_insn (const char *, char *, bool);
 165 static char *parse_operands (char *, const char *);
 166 static void swap_operands (void);
 167 static void swap_2_operands (unsigned int, unsigned int);
 168 static enum flag_code i386_addressing_mode (void);
 169 static void optimize_imm (void);
 170 static bool optimize_disp (const insn_template *t);
 171 static const insn_template *match_template (char);
 172 static int check_string (void);
 173 static int process_suffix (void);
 174 static int check_byte_reg (void);
 175 static int check_long_reg (void);
 176 static int check_qword_reg (void);
 177 static int check_word_reg (void);
 178 static int finalize_imm (void);
 179 static int process_operands (void);
 180 static const reg_entry *build_modrm_byte (void);
 181 static void output_insn (void);
 182 static void output_imm (fragS *, offsetT);
 183 static void output_disp (fragS *, offsetT);
 184 #ifndef I386COFF
 185 static void s_bss (int);
 186 #endif
 187 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 188 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 189
 190 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 191 static unsigned int x86_isa_1_used;
 192 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 193 static unsigned int x86_feature_2_used;
 194 /* Generate x86 used ISA and feature properties.  */
 195 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 196 #endif
 197
 198 static const char *default_arch = DEFAULT_ARCH;
 199
 200 /* parse_register() returns this when a register alias cannot be used.  */
 201 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 202                                    { Dw2Inval, Dw2Inval } };
 203
 204 static const reg_entry *reg_eax;
 205 static const reg_entry *reg_ds;
 206 static const reg_entry *reg_es;
 207 static const reg_entry *reg_ss;
 208 static const reg_entry *reg_st0;
 209 static const reg_entry *reg_k0;
 210
 211 /* VEX prefix.  */
 212 typedef struct
 213 {
 214   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 215   unsigned char bytes[4];
 216   unsigned int length;
 217   /* Destination or source register specifier.  */
 218   const reg_entry *register_specifier;
 219 } vex_prefix;
 220
 221 /* 'md_assemble ()' gathers together information and puts it into a
 222    i386_insn.  */
 223
 224 union i386_op
 225   {
 226     expressionS *disps;
 227     expressionS *imms;
 228     const reg_entry *regs;
 229   };
 230
 231 enum i386_error
 232   {
 233     no_error, /* Must be first.  */
 234     operand_size_mismatch,
 235     operand_type_mismatch,
 236     register_type_mismatch,
 237     number_of_operands_mismatch,
 238     invalid_instruction_suffix,
 239     bad_imm4,
 240     unsupported_with_intel_mnemonic,
 241     unsupported_syntax,
 242     unsupported,
 243     unsupported_on_arch,
 244     unsupported_64bit,
 245     invalid_sib_address,
 246     invalid_vsib_address,
 247     invalid_vector_register_set,
 248     invalid_tmm_register_set,
 249     invalid_dest_and_src_register_set,
 250     unsupported_vector_index_register,
 251     unsupported_broadcast,
 252     broadcast_needed,
 253     unsupported_masking,
 254     mask_not_on_destination,
 255     no_default_mask,
 256     unsupported_rc_sae,
 257     invalid_register_operand,
 258   };
 259
 260 struct _i386_insn
 261   {
 262     /* TM holds the template for the insn were currently assembling.  */
 263     insn_template tm;
 264
 265     /* SUFFIX holds the instruction size suffix for byte, word, dword
 266        or qword, if given.  */
 267     char suffix;
 268
 269     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 270     unsigned char opcode_length;
 271
 272     /* OPERANDS gives the number of given operands.  */
 273     unsigned int operands;
 274
 275     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 276        of given register, displacement, memory operands and immediate
 277        operands.  */
 278     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 279
 280     /* TYPES [i] is the type (see above #defines) which tells us how to
 281        use OP[i] for the corresponding operand.  */
 282     i386_operand_type types[MAX_OPERANDS];
 283
 284     /* Displacement expression, immediate expression, or register for each
 285        operand.  */
 286     union i386_op op[MAX_OPERANDS];
 287
 288     /* Flags for operands.  */
 289     unsigned int flags[MAX_OPERANDS];
 290 #define Operand_PCrel 1
 291 #define Operand_Mem   2
 292 #define Operand_Signed 4 /* .insn only */
 293
 294     /* Relocation type for operand */
 295     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 296
 297     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 298        the base index byte below.  */
 299     const reg_entry *base_reg;
 300     const reg_entry *index_reg;
 301     unsigned int log2_scale_factor;
 302
 303     /* SEG gives the seg_entries of this insn.  They are zero unless
 304        explicit segment overrides are given.  */
 305     const reg_entry *seg[2];
 306
 307     /* PREFIX holds all the given prefix opcodes (usually null).
 308        PREFIXES is the number of prefix opcodes.  */
 309     unsigned int prefixes;
 310     unsigned char prefix[MAX_PREFIXES];
 311
 312     /* .insn allows for reserved opcode spaces.  */
 313     unsigned char insn_opcode_space;
 314
 315     /* .insn also allows (requires) specifying immediate size.  */
 316     unsigned char imm_bits[MAX_OPERANDS];
 317
 318     /* Register is in low 3 bits of opcode.  */
 319     bool short_form;
 320
 321     /* The operand to a branch insn indicates an absolute branch.  */
 322     bool jumpabsolute;
 323
 324     /* The operand to a branch insn indicates a far branch.  */
 325     bool far_branch;
 326
 327     /* There is a memory operand of (%dx) which should be only used
 328        with input/output instructions.  */
 329     bool input_output_operand;
 330
 331     /* Extended states.  */
 332     enum
 333       {
 334         /* Use MMX state.  */
 335         xstate_mmx = 1 << 0,
 336         /* Use XMM state.  */
 337         xstate_xmm = 1 << 1,
 338         /* Use YMM state.  */
 339         xstate_ymm = 1 << 2 | xstate_xmm,
 340         /* Use ZMM state.  */
 341         xstate_zmm = 1 << 3 | xstate_ymm,
 342         /* Use TMM state.  */
 343         xstate_tmm = 1 << 4,
 344         /* Use MASK state.  */
 345         xstate_mask = 1 << 5
 346       } xstate;
 347
 348     /* Has GOTPC or TLS relocation.  */
 349     bool has_gotpc_tls_reloc;
 350
 351     /* RM and SIB are the modrm byte and the sib byte where the
 352        addressing modes of this insn are encoded.  */
 353     modrm_byte rm;
 354     rex_byte rex;
 355     rex_byte vrex;
 356     sib_byte sib;
 357     vex_prefix vex;
 358
 359     /* Masking attributes.
 360
 361        The struct describes masking, applied to OPERAND in the instruction.
 362        REG is a pointer to the corresponding mask register.  ZEROING tells
 363        whether merging or zeroing mask is used.  */
 364     struct Mask_Operation
 365     {
 366       const reg_entry *reg;
 367       unsigned int zeroing;
 368       /* The operand where this operation is associated.  */
 369       unsigned int operand;
 370     } mask;
 371
 372     /* Rounding control and SAE attributes.  */
 373     struct RC_Operation
 374     {
 375       enum rc_type
 376         {
 377           rc_none = -1,
 378           rne,
 379           rd,
 380           ru,
 381           rz,
 382           saeonly
 383         } type;
 384       /* In Intel syntax the operand modifier form is supposed to be used, but
 385          we continue to accept the immediate forms as well.  */
 386       bool modifier;
 387     } rounding;
 388
 389     /* Broadcasting attributes.
 390
 391        The struct describes broadcasting, applied to OPERAND.  TYPE is
 392        expresses the broadcast factor.  */
 393     struct Broadcast_Operation
 394     {
 395       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 396       unsigned int type;
 397
 398       /* Index of broadcasted operand.  */
 399       unsigned int operand;
 400
 401       /* Number of bytes to broadcast.  */
 402       unsigned int bytes;
 403     } broadcast;
 404
 405     /* Compressed disp8*N attribute.  */
 406     unsigned int memshift;
 407
 408     /* Prefer load or store in encoding.  */
 409     enum
 410       {
 411         dir_encoding_default = 0,
 412         dir_encoding_load,
 413         dir_encoding_store,
 414         dir_encoding_swap
 415       } dir_encoding;
 416
 417     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 418     enum
 419       {
 420         disp_encoding_default = 0,
 421         disp_encoding_8bit,
 422         disp_encoding_16bit,
 423         disp_encoding_32bit
 424       } disp_encoding;
 425
 426     /* Prefer the REX byte in encoding.  */
 427     bool rex_encoding;
 428
 429     /* Disable instruction size optimization.  */
 430     bool no_optimize;
 431
 432     /* How to encode vector instructions.  */
 433     enum
 434       {
 435         vex_encoding_default = 0,
 436         vex_encoding_vex,
 437         vex_encoding_vex3,
 438         vex_encoding_evex,
 439         vex_encoding_error
 440       } vec_encoding;
 441
 442     /* REP prefix.  */
 443     const char *rep_prefix;
 444
 445     /* HLE prefix.  */
 446     const char *hle_prefix;
 447
 448     /* Have BND prefix.  */
 449     const char *bnd_prefix;
 450
 451     /* Have NOTRACK prefix.  */
 452     const char *notrack_prefix;
 453
 454     /* Error message.  */
 455     enum i386_error error;
 456   };
 457
 458 typedef struct _i386_insn i386_insn;
 459
 460 /* Link RC type with corresponding string, that'll be looked for in
 461    asm.  */
 462 struct RC_name
 463 {
 464   enum rc_type type;
 465   const char *name;
 466   unsigned int len;
 467 };
 468
 469 static const struct RC_name RC_NamesTable[] =
 470 {
 471   {  rne, STRING_COMMA_LEN ("rn-sae") },
 472   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 473   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 474   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 475   {  saeonly,  STRING_COMMA_LEN ("sae") },
 476 };
 477
 478 /* To be indexed by segment register number.  */
 479 static const unsigned char i386_seg_prefixes[] = {
 480   ES_PREFIX_OPCODE,
 481   CS_PREFIX_OPCODE,
 482   SS_PREFIX_OPCODE,
 483   DS_PREFIX_OPCODE,
 484   FS_PREFIX_OPCODE,
 485   GS_PREFIX_OPCODE
 486 };
 487
 488 /* List of chars besides those in app.c:symbol_chars that can start an
 489    operand.  Used to prevent the scrubber eating vital white-space.  */
 490 const char extra_symbol_chars[] = "*%-([{}"
 491 #ifdef LEX_AT
 492         "@"
 493 #endif
 494 #ifdef LEX_QM
 495         "?"
 496 #endif
 497         ;
 498
 499 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 500      && !defined (TE_GNU)                               \
 501      && !defined (TE_LINUX)                             \
 502      && !defined (TE_Haiku)                             \
 503      && !defined (TE_FreeBSD)                           \
 504      && !defined (TE_DragonFly)                         \
 505      && !defined (TE_NetBSD))
 506 /* This array holds the chars that always start a comment.  If the
 507    pre-processor is disabled, these aren't very useful.  The option
 508    --divide will remove '/' from this list.  */
 509 const char *i386_comment_chars = "#/";
 510 #define SVR4_COMMENT_CHARS 1
 511 #define PREFIX_SEPARATOR '\\'
 512
 513 #else
 514 const char *i386_comment_chars = "#";
 515 #define PREFIX_SEPARATOR '/'
 516 #endif
 517
 518 /* This array holds the chars that only start a comment at the beginning of
 519    a line.  If the line seems to have the form '# 123 filename'
 520    .line and .file directives will appear in the pre-processed output.
 521    Note that input_file.c hand checks for '#' at the beginning of the
 522    first line of the input file.  This is because the compiler outputs
 523    #NO_APP at the beginning of its output.
 524    Also note that comments started like this one will always work if
 525    '/' isn't otherwise defined.  */
 526 const char line_comment_chars[] = "#/";
 527
 528 const char line_separator_chars[] = ";";
 529
 530 /* Chars that can be used to separate mant from exp in floating point
 531    nums.  */
 532 const char EXP_CHARS[] = "eE";
 533
 534 /* Chars that mean this number is a floating point constant
 535    As in 0f12.456
 536    or    0d1.2345e12.  */
 537 const char FLT_CHARS[] = "fFdDxXhHbB";
 538
 539 /* Tables for lexical analysis.  */
 540 static char mnemonic_chars[256];
 541 static char register_chars[256];
 542 static char operand_chars[256];
 543
 544 /* Lexical macros.  */
 545 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 546 #define is_register_char(x) (register_chars[(unsigned char) x])
 547 #define is_space_char(x) ((x) == ' ')
 548
 549 /* All non-digit non-letter characters that may occur in an operand and
 550    which aren't already in extra_symbol_chars[].  */
 551 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 552
 553 /* md_assemble() always leaves the strings it's passed unaltered.  To
 554    effect this we maintain a stack of saved characters that we've smashed
 555    with '\0's (indicating end of strings for various sub-fields of the
 556    assembler instruction).  */
 557 static char save_stack[32];
 558 static char *save_stack_p;
 559 #define END_STRING_AND_SAVE(s) \
 560         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 561 #define RESTORE_END_STRING(s) \
 562         do { *(s) = *--save_stack_p; } while (0)
 563
 564 /* The instruction we're assembling.  */
 565 static i386_insn i;
 566
 567 /* Possible templates for current insn.  */
 568 static const templates *current_templates;
 569
 570 /* Per instruction expressionS buffers: max displacements & immediates.  */
 571 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 572 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 573
 574 /* Current operand we are working on.  */
 575 static int this_operand = -1;
 576
 577 /* Are we processing a .insn directive?  */
 578 #define dot_insn() (i.tm.mnem_off == MN__insn)
 579
 580 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 581    these.  */
 582
 583 enum flag_code {
 584         CODE_32BIT,
 585         CODE_16BIT,
 586         CODE_64BIT };
 587
 588 static enum flag_code flag_code;
 589 static unsigned int object_64bit;
 590 static unsigned int disallow_64bit_reloc;
 591 static int use_rela_relocations = 0;
 592 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 593 static const char *tls_get_addr;
 594
 595 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 596      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 597      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 598
 599 /* The ELF ABI to use.  */
 600 enum x86_elf_abi
 601 {
 602   I386_ABI,
 603   X86_64_ABI,
 604   X86_64_X32_ABI
 605 };
 606
 607 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 608 #endif
 609
 610 #if defined (TE_PE) || defined (TE_PEP)
 611 /* Use big object file format.  */
 612 static int use_big_obj = 0;
 613 #endif
 614
 615 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 616 /* 1 if generating code for a shared library.  */
 617 static int shared = 0;
 618
 619 unsigned int x86_sframe_cfa_sp_reg;
 620 /* The other CFA base register for SFrame stack trace info.  */
 621 unsigned int x86_sframe_cfa_fp_reg;
 622 unsigned int x86_sframe_cfa_ra_reg;
 623
 624 #endif
 625
 626 /* 1 for intel syntax,
 627    0 if att syntax.  */
 628 static int intel_syntax = 0;
 629
 630 static enum x86_64_isa
 631 {
 632   amd64 = 1,    /* AMD64 ISA.  */
 633   intel64       /* Intel64 ISA.  */
 634 } isa64;
 635
 636 /* 1 for intel mnemonic,
 637    0 if att mnemonic.  */
 638 static int intel_mnemonic = !SYSV386_COMPAT;
 639
 640 /* 1 if pseudo registers are permitted.  */
 641 static int allow_pseudo_reg = 0;
 642
 643 /* 1 if register prefix % not required.  */
 644 static int allow_naked_reg = 0;
 645
 646 /* 1 if the assembler should add BND prefix for all control-transferring
 647    instructions supporting it, even if this prefix wasn't specified
 648    explicitly.  */
 649 static int add_bnd_prefix = 0;
 650
 651 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 652 static int allow_index_reg = 0;
 653
 654 /* 1 if the assembler should ignore LOCK prefix, even if it was
 655    specified explicitly.  */
 656 static int omit_lock_prefix = 0;
 657
 658 /* 1 if the assembler should encode lfence, mfence, and sfence as
 659    "lock addl $0, (%{re}sp)".  */
 660 static int avoid_fence = 0;
 661
 662 /* 1 if lfence should be inserted after every load.  */
 663 static int lfence_after_load = 0;
 664
 665 /* Non-zero if lfence should be inserted before indirect branch.  */
 666 static enum lfence_before_indirect_branch_kind
 667   {
 668     lfence_branch_none = 0,
 669     lfence_branch_register,
 670     lfence_branch_memory,
 671     lfence_branch_all
 672   }
 673 lfence_before_indirect_branch;
 674
 675 /* Non-zero if lfence should be inserted before ret.  */
 676 static enum lfence_before_ret_kind
 677   {
 678     lfence_before_ret_none = 0,
 679     lfence_before_ret_not,
 680     lfence_before_ret_or,
 681     lfence_before_ret_shl
 682   }
 683 lfence_before_ret;
 684
 685 /* Types of previous instruction is .byte or prefix.  */
 686 static struct
 687   {
 688     segT seg;
 689     const char *file;
 690     const char *name;
 691     unsigned int line;
 692     enum last_insn_kind
 693       {
 694         last_insn_other = 0,
 695         last_insn_directive,
 696         last_insn_prefix
 697       } kind;
 698   } last_insn;
 699
 700 /* 1 if the assembler should generate relax relocations.  */
 701
 702 static int generate_relax_relocations
 703   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 704
 705 static enum check_kind
 706   {
 707     check_none = 0,
 708     check_warning,
 709     check_error
 710   }
 711 sse_check, operand_check = check_warning;
 712
 713 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 714 static int align_branch_power = 0;
 715
 716 /* Types of branches to align.  */
 717 enum align_branch_kind
 718   {
 719     align_branch_none = 0,
 720     align_branch_jcc = 1,
 721     align_branch_fused = 2,
 722     align_branch_jmp = 3,
 723     align_branch_call = 4,
 724     align_branch_indirect = 5,
 725     align_branch_ret = 6
 726   };
 727
 728 /* Type bits of branches to align.  */
 729 enum align_branch_bit
 730   {
 731     align_branch_jcc_bit = 1 << align_branch_jcc,
 732     align_branch_fused_bit = 1 << align_branch_fused,
 733     align_branch_jmp_bit = 1 << align_branch_jmp,
 734     align_branch_call_bit = 1 << align_branch_call,
 735     align_branch_indirect_bit = 1 << align_branch_indirect,
 736     align_branch_ret_bit = 1 << align_branch_ret
 737   };
 738
 739 static unsigned int align_branch = (align_branch_jcc_bit
 740                                     | align_branch_fused_bit
 741                                     | align_branch_jmp_bit);
 742
 743 /* Types of condition jump used by macro-fusion.  */
 744 enum mf_jcc_kind
 745   {
 746     mf_jcc_jo = 0,  /* base opcode 0x70  */
 747     mf_jcc_jc,      /* base opcode 0x72  */
 748     mf_jcc_je,      /* base opcode 0x74  */
 749     mf_jcc_jna,     /* base opcode 0x76  */
 750     mf_jcc_js,      /* base opcode 0x78  */
 751     mf_jcc_jp,      /* base opcode 0x7a  */
 752     mf_jcc_jl,      /* base opcode 0x7c  */
 753     mf_jcc_jle,     /* base opcode 0x7e  */
 754   };
 755
 756 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 757 enum mf_cmp_kind
 758   {
 759     mf_cmp_test_and,  /* test/cmp */
 760     mf_cmp_alu_cmp,  /* add/sub/cmp */
 761     mf_cmp_incdec  /* inc/dec */
 762   };
 763
 764 /* The maximum padding size for fused jcc.  CMP like instruction can
 765    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 766    prefixes.   */
 767 #define MAX_FUSED_JCC_PADDING_SIZE 20
 768
 769 /* The maximum number of prefixes added for an instruction.  */
 770 static unsigned int align_branch_prefix_size = 5;
 771
 772 /* Optimization:
 773    1. Clear the REX_W bit with register operand if possible.
 774    2. Above plus use 128bit vector instruction to clear the full vector
 775       register.
 776  */
 777 static int optimize = 0;
 778
 779 /* Optimization:
 780    1. Clear the REX_W bit with register operand if possible.
 781    2. Above plus use 128bit vector instruction to clear the full vector
 782       register.
 783    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 784       "testb $imm7,%r8".
 785  */
 786 static int optimize_for_space = 0;
 787
 788 /* Register prefix used for error message.  */
 789 static const char *register_prefix = "%";
 790
 791 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 792    leave, push, and pop instructions so that gcc has the same stack
 793    frame as in 32 bit mode.  */
 794 static char stackop_size = '\0';
 795
 796 /* Non-zero to optimize code alignment.  */
 797 int optimize_align_code = 1;
 798
 799 /* Non-zero to quieten some warnings.  */
 800 static int quiet_warnings = 0;
 801
 802 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 803 static bool pre_386_16bit_warned;
 804
 805 /* CPU name.  */
 806 static const char *cpu_arch_name = NULL;
 807 static char *cpu_sub_arch_name = NULL;
 808
 809 /* CPU feature flags.  */
 810 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 811
 812 /* If we have selected a cpu we are generating instructions for.  */
 813 static int cpu_arch_tune_set = 0;
 814
 815 /* Cpu we are generating instructions for.  */
 816 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 817
 818 /* CPU feature flags of cpu we are generating instructions for.  */
 819 static i386_cpu_flags cpu_arch_tune_flags;
 820
 821 /* CPU instruction set architecture used.  */
 822 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 823
 824 /* CPU feature flags of instruction set architecture used.  */
 825 i386_cpu_flags cpu_arch_isa_flags;
 826
 827 /* If set, conditional jumps are not automatically promoted to handle
 828    larger than a byte offset.  */
 829 static bool no_cond_jump_promotion = false;
 830
 831 /* This will be set from an expression parser hook if there's any
 832    applicable operator involved in an expression.  */
 833 static enum {
 834   expr_operator_none,
 835   expr_operator_present,
 836   expr_large_value,
 837 } expr_mode;
 838
 839 /* Encode SSE instructions with VEX prefix.  */
 840 static unsigned int sse2avx;
 841
 842 /* Encode aligned vector move as unaligned vector move.  */
 843 static unsigned int use_unaligned_vector_move;
 844
 845 /* Maximum permitted vector size. */
 846 #define VSZ_DEFAULT VSZ512
 847 static unsigned int vector_size = VSZ_DEFAULT;
 848
 849 /* Encode scalar AVX instructions with specific vector length.  */
 850 static enum
 851   {
 852     vex128 = 0,
 853     vex256
 854   } avxscalar;
 855
 856 /* Encode VEX WIG instructions with specific vex.w.  */
 857 static enum
 858   {
 859     vexw0 = 0,
 860     vexw1
 861   } vexwig;
 862
 863 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 864 static enum
 865   {
 866     evexl128 = 0,
 867     evexl256,
 868     evexl512
 869   } evexlig;
 870
 871 /* Encode EVEX WIG instructions with specific evex.w.  */
 872 static enum
 873   {
 874     evexw0 = 0,
 875     evexw1
 876   } evexwig;
 877
 878 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 879 static enum rc_type evexrcig = rne;
 880
 881 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 882 static symbolS *GOT_symbol;
 883
 884 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 885 unsigned int x86_dwarf2_return_column;
 886
 887 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 888 int x86_cie_data_alignment;
 889
 890 /* Interface to relax_segment.
 891    There are 3 major relax states for 386 jump insns because the
 892    different types of jumps add different sizes to frags when we're
 893    figuring out what sort of jump to choose to reach a given label.
 894
 895    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 896    branches which are handled by md_estimate_size_before_relax() and
 897    i386_generic_table_relax_frag().  */
 898
 899 /* Types.  */
 900 #define UNCOND_JUMP 0
 901 #define COND_JUMP 1
 902 #define COND_JUMP86 2
 903 #define BRANCH_PADDING 3
 904 #define BRANCH_PREFIX 4
 905 #define FUSED_JCC_PADDING 5
 906
 907 /* Sizes.  */
 908 #define CODE16  1
 909 #define SMALL   0
 910 #define SMALL16 (SMALL | CODE16)
 911 #define BIG     2
 912 #define BIG16   (BIG | CODE16)
 913
 914 #ifndef INLINE
 915 #ifdef __GNUC__
 916 #define INLINE __inline__
 917 #else
 918 #define INLINE
 919 #endif
 920 #endif
 921
 922 #define ENCODE_RELAX_STATE(type, size) \
 923   ((relax_substateT) (((type) << 2) | (size)))
 924 #define TYPE_FROM_RELAX_STATE(s) \
 925   ((s) >> 2)
 926 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 927     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 928
 929 /* This table is used by relax_frag to promote short jumps to long
 930    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 931    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 932    don't allow a short jump in a 32 bit code segment to be promoted to
 933    a 16 bit offset jump because it's slower (requires data size
 934    prefix), and doesn't work, unless the destination is in the bottom
 935    64k of the code segment (The top 16 bits of eip are zeroed).  */
 936
 937 const relax_typeS md_relax_table[] =
 938 {
 939   /* The fields are:
 940      1) most positive reach of this state,
 941      2) most negative reach of this state,
 942      3) how many bytes this mode will have in the variable part of the frag
 943      4) which index into the table to try if we can't fit into this one.  */
 944
 945   /* UNCOND_JUMP states.  */
 946   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 947   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 948   /* dword jmp adds 4 bytes to frag:
 949      0 extra opcode bytes, 4 displacement bytes.  */
 950   {0, 0, 4, 0},
 951   /* word jmp adds 2 byte2 to frag:
 952      0 extra opcode bytes, 2 displacement bytes.  */
 953   {0, 0, 2, 0},
 954
 955   /* COND_JUMP states.  */
 956   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 957   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 958   /* dword conditionals adds 5 bytes to frag:
 959      1 extra opcode byte, 4 displacement bytes.  */
 960   {0, 0, 5, 0},
 961   /* word conditionals add 3 bytes to frag:
 962      1 extra opcode byte, 2 displacement bytes.  */
 963   {0, 0, 3, 0},
 964
 965   /* COND_JUMP86 states.  */
 966   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 967   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 968   /* dword conditionals adds 5 bytes to frag:
 969      1 extra opcode byte, 4 displacement bytes.  */
 970   {0, 0, 5, 0},
 971   /* word conditionals add 4 bytes to frag:
 972      1 displacement byte and a 3 byte long branch insn.  */
 973   {0, 0, 4, 0}
 974 };
 975
 976 #define ARCH(n, t, f, s) \
 977   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
 978     CPU_NONE_FLAGS }
 979 #define SUBARCH(n, e, d, s) \
 980   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
 981     CPU_ ## d ## _FLAGS }
 982 #define VECARCH(n, e, d, v) \
 983   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
 984     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
 985
 986 static const arch_entry cpu_arch[] =
 987 {
 988   /* Do not replace the first two entries - i386_target_format() and
 989      set_cpu_arch() rely on them being there in this order.  */
 990   ARCH (generic32, GENERIC32, GENERIC32, false),
 991   ARCH (generic64, GENERIC64, GENERIC64, false),
 992   ARCH (i8086, UNKNOWN, NONE, false),
 993   ARCH (i186, UNKNOWN, 186, false),
 994   ARCH (i286, UNKNOWN, 286, false),
 995   ARCH (i386, I386, 386, false),
 996   ARCH (i486, I486, 486, false),
 997   ARCH (i586, PENTIUM, 586, false),
 998   ARCH (i686, PENTIUMPRO, 686, false),
 999   ARCH (pentium, PENTIUM, 586, false),
1000   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
1001   ARCH (pentiumii, PENTIUMPRO, P2, false),
1002   ARCH (pentiumiii, PENTIUMPRO, P3, false),
1003   ARCH (pentium4, PENTIUM4, P4, false),
1004   ARCH (prescott, NOCONA, CORE, false),
1005   ARCH (nocona, NOCONA, NOCONA, false),
1006   ARCH (yonah, CORE, CORE, true),
1007   ARCH (core, CORE, CORE, false),
1008   ARCH (merom, CORE2, CORE2, true),
1009   ARCH (core2, CORE2, CORE2, false),
1010   ARCH (corei7, COREI7, COREI7, false),
1011   ARCH (iamcu, IAMCU, IAMCU, false),
1012   ARCH (k6, K6, K6, false),
1013   ARCH (k6_2, K6, K6_2, false),
1014   ARCH (athlon, ATHLON, ATHLON, false),
1015   ARCH (sledgehammer, K8, K8, true),
1016   ARCH (opteron, K8, K8, false),
1017   ARCH (k8, K8, K8, false),
1018   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1019   ARCH (bdver1, BD, BDVER1, false),
1020   ARCH (bdver2, BD, BDVER2, false),
1021   ARCH (bdver3, BD, BDVER3, false),
1022   ARCH (bdver4, BD, BDVER4, false),
1023   ARCH (znver1, ZNVER, ZNVER1, false),
1024   ARCH (znver2, ZNVER, ZNVER2, false),
1025   ARCH (znver3, ZNVER, ZNVER3, false),
1026   ARCH (znver4, ZNVER, ZNVER4, false),
1027   ARCH (btver1, BT, BTVER1, false),
1028   ARCH (btver2, BT, BTVER2, false),
1029
1030   SUBARCH (8087, 8087, ANY_8087, false),
1031   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1032   SUBARCH (287, 287, ANY_287, false),
1033   SUBARCH (387, 387, ANY_387, false),
1034   SUBARCH (687, 687, ANY_687, false),
1035   SUBARCH (cmov, CMOV, CMOV, false),
1036   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1037   SUBARCH (mmx, MMX, ANY_MMX, false),
1038   SUBARCH (sse, SSE, ANY_SSE, false),
1039   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1040   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1041   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1042   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1043   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1044   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1045   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1046   VECARCH (avx, AVX, ANY_AVX, reset),
1047   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1048   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1049   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1050   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1051   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1052   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1053   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1054   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1055   SUBARCH (monitor, MONITOR, MONITOR, false),
1056   SUBARCH (vmx, VMX, ANY_VMX, false),
1057   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1058   SUBARCH (smx, SMX, SMX, false),
1059   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1060   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1061   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1062   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1063   SUBARCH (aes, AES, ANY_AES, false),
1064   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1065   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1066   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1067   SUBARCH (rdrnd, RDRND, RDRND, false),
1068   SUBARCH (f16c, F16C, ANY_F16C, false),
1069   SUBARCH (bmi2, BMI2, BMI2, false),
1070   SUBARCH (fma, FMA, ANY_FMA, false),
1071   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1072   SUBARCH (xop, XOP, ANY_XOP, false),
1073   SUBARCH (lwp, LWP, ANY_LWP, false),
1074   SUBARCH (movbe, MOVBE, MOVBE, false),
1075   SUBARCH (cx16, CX16, CX16, false),
1076   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1077   SUBARCH (ept, EPT, ANY_EPT, false),
1078   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1079   SUBARCH (popcnt, POPCNT, POPCNT, false),
1080   SUBARCH (hle, HLE, HLE, false),
1081   SUBARCH (rtm, RTM, ANY_RTM, false),
1082   SUBARCH (tsx, TSX, TSX, false),
1083   SUBARCH (invpcid, INVPCID, INVPCID, false),
1084   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1085   SUBARCH (nop, NOP, NOP, false),
1086   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1087   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1088   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1089   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1090   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1091   SUBARCH (pacifica, SVME, ANY_SVME, true),
1092   SUBARCH (svme, SVME, ANY_SVME, false),
1093   SUBARCH (abm, ABM, ABM, false),
1094   SUBARCH (bmi, BMI, BMI, false),
1095   SUBARCH (tbm, TBM, TBM, false),
1096   SUBARCH (adx, ADX, ADX, false),
1097   SUBARCH (rdseed, RDSEED, RDSEED, false),
1098   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1099   SUBARCH (smap, SMAP, SMAP, false),
1100   SUBARCH (mpx, MPX, ANY_MPX, false),
1101   SUBARCH (sha, SHA, ANY_SHA, false),
1102   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1103   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1104   SUBARCH (se1, SE1, SE1, false),
1105   SUBARCH (clwb, CLWB, CLWB, false),
1106   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1107   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1108   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1109   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1110   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1111   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1112   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1113   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1114   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1115   SUBARCH (clzero, CLZERO, CLZERO, false),
1116   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1117   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1118   SUBARCH (rdpid, RDPID, RDPID, false),
1119   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1120   SUBARCH (ibt, IBT, IBT, false),
1121   SUBARCH (shstk, SHSTK, SHSTK, false),
1122   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1123   VECARCH (vaes, VAES, ANY_VAES, reset),
1124   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1125   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1126   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1127   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1128   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1129   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1130   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1131   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1132   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1133   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1134   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1135   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1136   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1137   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1138            ANY_AVX512_VP2INTERSECT, reset),
1139   SUBARCH (tdx, TDX, TDX, false),
1140   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1141   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1142   SUBARCH (rdpru, RDPRU, RDPRU, false),
1143   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1144   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1145   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1146   SUBARCH (kl, KL, ANY_KL, false),
1147   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1148   SUBARCH (uintr, UINTR, UINTR, false),
1149   SUBARCH (hreset, HRESET, HRESET, false),
1150   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1151   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1152   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1153   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1154   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1155   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1156   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1157   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1158   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1159   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1160   SUBARCH (fred, FRED, ANY_FRED, false),
1161   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1162   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1163   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1164   VECARCH (sm3, SM3, ANY_SM3, reset),
1165   VECARCH (sm4, SM4, ANY_SM4, reset),
1166   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1167   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1168 };
1169
1170 #undef SUBARCH
1171 #undef ARCH
1172
1173 #ifdef I386COFF
1174 /* Like s_lcomm_internal in gas/read.c but the alignment string
1175    is allowed to be optional.  */
1176
1177 static symbolS *
1178 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1179 {
1180   addressT align = 0;
1181
1182   SKIP_WHITESPACE ();
1183
1184   if (needs_align
1185       && *input_line_pointer == ',')
1186     {
1187       align = parse_align (needs_align - 1);
1188
1189       if (align == (addressT) -1)
1190         return NULL;
1191     }
1192   else
1193     {
1194       if (size >= 8)
1195         align = 3;
1196       else if (size >= 4)
1197         align = 2;
1198       else if (size >= 2)
1199         align = 1;
1200       else
1201         align = 0;
1202     }
1203
1204   bss_alloc (symbolP, size, align);
1205   return symbolP;
1206 }
1207
1208 static void
1209 pe_lcomm (int needs_align)
1210 {
1211   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1212 }
1213 #endif
1214
1215 const pseudo_typeS md_pseudo_table[] =
1216 {
1217 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1218   {"align", s_align_bytes, 0},
1219 #else
1220   {"align", s_align_ptwo, 0},
1221 #endif
1222   {"arch", set_cpu_arch, 0},
1223 #ifndef I386COFF
1224   {"bss", s_bss, 0},
1225 #else
1226   {"lcomm", pe_lcomm, 1},
1227 #endif
1228   {"ffloat", float_cons, 'f'},
1229   {"dfloat", float_cons, 'd'},
1230   {"tfloat", float_cons, 'x'},
1231   {"hfloat", float_cons, 'h'},
1232   {"bfloat16", float_cons, 'b'},
1233   {"value", cons, 2},
1234   {"slong", signed_cons, 4},
1235   {"insn", s_insn, 0},
1236   {"noopt", s_ignore, 0},
1237   {"optim", s_ignore, 0},
1238   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1239   {"code16", set_code_flag, CODE_16BIT},
1240   {"code32", set_code_flag, CODE_32BIT},
1241 #ifdef BFD64
1242   {"code64", set_code_flag, CODE_64BIT},
1243 #endif
1244   {"intel_syntax", set_intel_syntax, 1},
1245   {"att_syntax", set_intel_syntax, 0},
1246   {"intel_mnemonic", set_intel_mnemonic, 1},
1247   {"att_mnemonic", set_intel_mnemonic, 0},
1248   {"allow_index_reg", set_allow_index_reg, 1},
1249   {"disallow_index_reg", set_allow_index_reg, 0},
1250   {"sse_check", set_check, 0},
1251   {"operand_check", set_check, 1},
1252 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1253   {"largecomm", handle_large_common, 0},
1254 #else
1255   {"file", dwarf2_directive_file, 0},
1256   {"loc", dwarf2_directive_loc, 0},
1257   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1258 #endif
1259 #ifdef TE_PE
1260   {"secrel32", pe_directive_secrel, 0},
1261   {"secidx", pe_directive_secidx, 0},
1262 #endif
1263   {0, 0, 0}
1264 };
1265
1266 /* For interface with expression ().  */
1267 extern char *input_line_pointer;
1268
1269 /* Hash table for instruction mnemonic lookup.  */
1270 static htab_t op_hash;
1271
1272 /* Hash table for register lookup.  */
1273 static htab_t reg_hash;
1274 \f
1275   /* Various efficient no-op patterns for aligning code labels.
1276      Note: Don't try to assemble the instructions in the comments.
1277      0L and 0w are not legal.  */
1278 static const unsigned char f32_1[] =
1279   {0x90};                               /* nop                  */
1280 static const unsigned char f32_2[] =
1281   {0x66,0x90};                          /* xchg %ax,%ax         */
1282 static const unsigned char f32_3[] =
1283   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1284 static const unsigned char f32_4[] =
1285   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1286 static const unsigned char f32_6[] =
1287   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1288 static const unsigned char f32_7[] =
1289   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1290 static const unsigned char f16_3[] =
1291   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1292 static const unsigned char f16_4[] =
1293   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1294 static const unsigned char jump_disp8[] =
1295   {0xeb};                               /* jmp disp8           */
1296 static const unsigned char jump32_disp32[] =
1297   {0xe9};                               /* jmp disp32          */
1298 static const unsigned char jump16_disp32[] =
1299   {0x66,0xe9};                          /* jmp disp32          */
1300 /* 32-bit NOPs patterns.  */
1301 static const unsigned char *const f32_patt[] = {
1302   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1303 };
1304 /* 16-bit NOPs patterns.  */
1305 static const unsigned char *const f16_patt[] = {
1306   f32_1, f32_2, f16_3, f16_4
1307 };
1308 /* nopl (%[re]ax) */
1309 static const unsigned char alt_3[] =
1310   {0x0f,0x1f,0x00};
1311 /* nopl 0(%[re]ax) */
1312 static const unsigned char alt_4[] =
1313   {0x0f,0x1f,0x40,0x00};
1314 /* nopl 0(%[re]ax,%[re]ax,1) */
1315 static const unsigned char alt_5[] =
1316   {0x0f,0x1f,0x44,0x00,0x00};
1317 /* nopw 0(%[re]ax,%[re]ax,1) */
1318 static const unsigned char alt_6[] =
1319   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1320 /* nopl 0L(%[re]ax) */
1321 static const unsigned char alt_7[] =
1322   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1323 /* nopl 0L(%[re]ax,%[re]ax,1) */
1324 static const unsigned char alt_8[] =
1325   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1326 /* nopw 0L(%[re]ax,%[re]ax,1) */
1327 static const unsigned char alt_9[] =
1328   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1329 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1330 static const unsigned char alt_10[] =
1331   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1332 /* data16 nopw %cs:0L(%eax,%eax,1) */
1333 static const unsigned char alt_11[] =
1334   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1335 /* 32-bit and 64-bit NOPs patterns.  */
1336 static const unsigned char *const alt_patt[] = {
1337   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1338   alt_9, alt_10, alt_11
1339 };
1340
1341 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1342    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1343
1344 static void
1345 i386_output_nops (char *where, const unsigned char *const *patt,
1346                   int count, int max_single_nop_size)
1347
1348 {
1349   /* Place the longer NOP first.  */
1350   int last;
1351   int offset;
1352   const unsigned char *nops;
1353
1354   if (max_single_nop_size < 1)
1355     {
1356       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1357                 max_single_nop_size);
1358       return;
1359     }
1360
1361   nops = patt[max_single_nop_size - 1];
1362
1363   /* Use the smaller one if the requsted one isn't available.  */
1364   if (nops == NULL)
1365     {
1366       max_single_nop_size--;
1367       nops = patt[max_single_nop_size - 1];
1368     }
1369
1370   last = count % max_single_nop_size;
1371
1372   count -= last;
1373   for (offset = 0; offset < count; offset += max_single_nop_size)
1374     memcpy (where + offset, nops, max_single_nop_size);
1375
1376   if (last)
1377     {
1378       nops = patt[last - 1];
1379       if (nops == NULL)
1380         {
1381           /* Use the smaller one plus one-byte NOP if the needed one
1382              isn't available.  */
1383           last--;
1384           nops = patt[last - 1];
1385           memcpy (where + offset, nops, last);
1386           where[offset + last] = *patt[0];
1387         }
1388       else
1389         memcpy (where + offset, nops, last);
1390     }
1391 }
1392
1393 static INLINE int
1394 fits_in_imm7 (offsetT num)
1395 {
1396   return (num & 0x7f) == num;
1397 }
1398
1399 static INLINE int
1400 fits_in_imm31 (offsetT num)
1401 {
1402   return (num & 0x7fffffff) == num;
1403 }
1404
1405 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1406    single NOP instruction LIMIT.  */
1407
1408 void
1409 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1410 {
1411   const unsigned char *const *patt = NULL;
1412   int max_single_nop_size;
1413   /* Maximum number of NOPs before switching to jump over NOPs.  */
1414   int max_number_of_nops;
1415
1416   switch (fragP->fr_type)
1417     {
1418     case rs_fill_nop:
1419     case rs_align_code:
1420       break;
1421     case rs_machine_dependent:
1422       /* Allow NOP padding for jumps and calls.  */
1423       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1424           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1425         break;
1426       /* Fall through.  */
1427     default:
1428       return;
1429     }
1430
1431   /* We need to decide which NOP sequence to use for 32bit and
1432      64bit. When -mtune= is used:
1433
1434      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1435      PROCESSOR_GENERIC32, f32_patt will be used.
1436      2. For the rest, alt_patt will be used.
1437
1438      When -mtune= isn't used, alt_patt will be used if
1439      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1440      be used.
1441
1442      When -march= or .arch is used, we can't use anything beyond
1443      cpu_arch_isa_flags.   */
1444
1445   if (flag_code == CODE_16BIT)
1446     {
1447       patt = f16_patt;
1448       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1449       /* Limit number of NOPs to 2 in 16-bit mode.  */
1450       max_number_of_nops = 2;
1451     }
1452   else
1453     {
1454       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1455         {
1456           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1457           switch (cpu_arch_tune)
1458             {
1459             case PROCESSOR_UNKNOWN:
1460               /* We use cpu_arch_isa_flags to check if we SHOULD
1461                  optimize with nops.  */
1462               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1463                 patt = alt_patt;
1464               else
1465                 patt = f32_patt;
1466               break;
1467             case PROCESSOR_PENTIUM4:
1468             case PROCESSOR_NOCONA:
1469             case PROCESSOR_CORE:
1470             case PROCESSOR_CORE2:
1471             case PROCESSOR_COREI7:
1472             case PROCESSOR_GENERIC64:
1473             case PROCESSOR_K6:
1474             case PROCESSOR_ATHLON:
1475             case PROCESSOR_K8:
1476             case PROCESSOR_AMDFAM10:
1477             case PROCESSOR_BD:
1478             case PROCESSOR_ZNVER:
1479             case PROCESSOR_BT:
1480               patt = alt_patt;
1481               break;
1482             case PROCESSOR_I386:
1483             case PROCESSOR_I486:
1484             case PROCESSOR_PENTIUM:
1485             case PROCESSOR_PENTIUMPRO:
1486             case PROCESSOR_IAMCU:
1487             case PROCESSOR_GENERIC32:
1488               patt = f32_patt;
1489               break;
1490             case PROCESSOR_NONE:
1491               abort ();
1492             }
1493         }
1494       else
1495         {
1496           switch (fragP->tc_frag_data.tune)
1497             {
1498             case PROCESSOR_UNKNOWN:
1499               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1500                  PROCESSOR_UNKNOWN.  */
1501               abort ();
1502               break;
1503
1504             case PROCESSOR_I386:
1505             case PROCESSOR_I486:
1506             case PROCESSOR_PENTIUM:
1507             case PROCESSOR_IAMCU:
1508             case PROCESSOR_K6:
1509             case PROCESSOR_ATHLON:
1510             case PROCESSOR_K8:
1511             case PROCESSOR_AMDFAM10:
1512             case PROCESSOR_BD:
1513             case PROCESSOR_ZNVER:
1514             case PROCESSOR_BT:
1515             case PROCESSOR_GENERIC32:
1516               /* We use cpu_arch_isa_flags to check if we CAN optimize
1517                  with nops.  */
1518               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1519                 patt = alt_patt;
1520               else
1521                 patt = f32_patt;
1522               break;
1523             case PROCESSOR_PENTIUMPRO:
1524             case PROCESSOR_PENTIUM4:
1525             case PROCESSOR_NOCONA:
1526             case PROCESSOR_CORE:
1527             case PROCESSOR_CORE2:
1528             case PROCESSOR_COREI7:
1529               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1530                 patt = alt_patt;
1531               else
1532                 patt = f32_patt;
1533               break;
1534             case PROCESSOR_GENERIC64:
1535               patt = alt_patt;
1536               break;
1537             case PROCESSOR_NONE:
1538               abort ();
1539             }
1540         }
1541
1542       if (patt == f32_patt)
1543         {
1544           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1545           /* Limit number of NOPs to 2 for older processors.  */
1546           max_number_of_nops = 2;
1547         }
1548       else
1549         {
1550           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1551           /* Limit number of NOPs to 7 for newer processors.  */
1552           max_number_of_nops = 7;
1553         }
1554     }
1555
1556   if (limit == 0)
1557     limit = max_single_nop_size;
1558
1559   if (fragP->fr_type == rs_fill_nop)
1560     {
1561       /* Output NOPs for .nop directive.  */
1562       if (limit > max_single_nop_size)
1563         {
1564           as_bad_where (fragP->fr_file, fragP->fr_line,
1565                         _("invalid single nop size: %d "
1566                           "(expect within [0, %d])"),
1567                         limit, max_single_nop_size);
1568           return;
1569         }
1570     }
1571   else if (fragP->fr_type != rs_machine_dependent)
1572     fragP->fr_var = count;
1573
1574   if ((count / max_single_nop_size) > max_number_of_nops)
1575     {
1576       /* Generate jump over NOPs.  */
1577       offsetT disp = count - 2;
1578       if (fits_in_imm7 (disp))
1579         {
1580           /* Use "jmp disp8" if possible.  */
1581           count = disp;
1582           where[0] = jump_disp8[0];
1583           where[1] = count;
1584           where += 2;
1585         }
1586       else
1587         {
1588           unsigned int size_of_jump;
1589
1590           if (flag_code == CODE_16BIT)
1591             {
1592               where[0] = jump16_disp32[0];
1593               where[1] = jump16_disp32[1];
1594               size_of_jump = 2;
1595             }
1596           else
1597             {
1598               where[0] = jump32_disp32[0];
1599               size_of_jump = 1;
1600             }
1601
1602           count -= size_of_jump + 4;
1603           if (!fits_in_imm31 (count))
1604             {
1605               as_bad_where (fragP->fr_file, fragP->fr_line,
1606                             _("jump over nop padding out of range"));
1607               return;
1608             }
1609
1610           md_number_to_chars (where + size_of_jump, count, 4);
1611           where += size_of_jump + 4;
1612         }
1613     }
1614
1615   /* Generate multiple NOPs.  */
1616   i386_output_nops (where, patt, count, limit);
1617 }
1618
1619 static INLINE int
1620 operand_type_all_zero (const union i386_operand_type *x)
1621 {
1622   switch (ARRAY_SIZE(x->array))
1623     {
1624     case 3:
1625       if (x->array[2])
1626         return 0;
1627       /* Fall through.  */
1628     case 2:
1629       if (x->array[1])
1630         return 0;
1631       /* Fall through.  */
1632     case 1:
1633       return !x->array[0];
1634     default:
1635       abort ();
1636     }
1637 }
1638
1639 static INLINE void
1640 operand_type_set (union i386_operand_type *x, unsigned int v)
1641 {
1642   switch (ARRAY_SIZE(x->array))
1643     {
1644     case 3:
1645       x->array[2] = v;
1646       /* Fall through.  */
1647     case 2:
1648       x->array[1] = v;
1649       /* Fall through.  */
1650     case 1:
1651       x->array[0] = v;
1652       /* Fall through.  */
1653       break;
1654     default:
1655       abort ();
1656     }
1657
1658   x->bitfield.class = ClassNone;
1659   x->bitfield.instance = InstanceNone;
1660 }
1661
1662 static INLINE int
1663 operand_type_equal (const union i386_operand_type *x,
1664                     const union i386_operand_type *y)
1665 {
1666   switch (ARRAY_SIZE(x->array))
1667     {
1668     case 3:
1669       if (x->array[2] != y->array[2])
1670         return 0;
1671       /* Fall through.  */
1672     case 2:
1673       if (x->array[1] != y->array[1])
1674         return 0;
1675       /* Fall through.  */
1676     case 1:
1677       return x->array[0] == y->array[0];
1678       break;
1679     default:
1680       abort ();
1681     }
1682 }
1683
1684 static INLINE bool
1685 is_cpu (const insn_template *t, enum i386_cpu cpu)
1686 {
1687   switch (cpu)
1688     {
1689     case Cpu287:      return t->cpu.bitfield.cpu287;
1690     case Cpu387:      return t->cpu.bitfield.cpu387;
1691     case Cpu3dnow:    return t->cpu.bitfield.cpu3dnow;
1692     case Cpu3dnowA:   return t->cpu.bitfield.cpu3dnowa;
1693     case CpuAVX:      return t->cpu.bitfield.cpuavx;
1694     case CpuHLE:      return t->cpu.bitfield.cpuhle;
1695     case CpuAVX512F:  return t->cpu.bitfield.cpuavx512f;
1696     case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
1697     case Cpu64:       return t->cpu.bitfield.cpu64;
1698     case CpuNo64:     return t->cpu.bitfield.cpuno64;
1699     default:
1700       gas_assert (cpu < CpuAttrEnums);
1701     }
1702   return t->cpu.bitfield.isa == cpu + 1u;
1703 }
1704
1705 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1706 {
1707   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1708   i386_cpu_flags f = { .array[0] = 0 };
1709
1710   switch (ARRAY_SIZE(a.array))
1711     {
1712     case 1:
1713       f.array[CpuAttrEnums / bps]
1714         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1715       if (CpuAttrEnums % bps > CpuIsaBits)
1716         f.array[CpuAttrEnums / bps + 1]
1717           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1718       break;
1719     default:
1720       abort ();
1721     }
1722
1723   if (a.bitfield.isa)
1724     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1725
1726   return f;
1727 }
1728
1729 static INLINE int
1730 cpu_flags_all_zero (const union i386_cpu_flags *x)
1731 {
1732   switch (ARRAY_SIZE(x->array))
1733     {
1734     case 5:
1735       if (x->array[4])
1736         return 0;
1737       /* Fall through.  */
1738     case 4:
1739       if (x->array[3])
1740         return 0;
1741       /* Fall through.  */
1742     case 3:
1743       if (x->array[2])
1744         return 0;
1745       /* Fall through.  */
1746     case 2:
1747       if (x->array[1])
1748         return 0;
1749       /* Fall through.  */
1750     case 1:
1751       return !x->array[0];
1752     default:
1753       abort ();
1754     }
1755 }
1756
1757 static INLINE int
1758 cpu_flags_equal (const union i386_cpu_flags *x,
1759                  const union i386_cpu_flags *y)
1760 {
1761   switch (ARRAY_SIZE(x->array))
1762     {
1763     case 5:
1764       if (x->array[4] != y->array[4])
1765         return 0;
1766       /* Fall through.  */
1767     case 4:
1768       if (x->array[3] != y->array[3])
1769         return 0;
1770       /* Fall through.  */
1771     case 3:
1772       if (x->array[2] != y->array[2])
1773         return 0;
1774       /* Fall through.  */
1775     case 2:
1776       if (x->array[1] != y->array[1])
1777         return 0;
1778       /* Fall through.  */
1779     case 1:
1780       return x->array[0] == y->array[0];
1781       break;
1782     default:
1783       abort ();
1784     }
1785 }
1786
1787 static INLINE int
1788 cpu_flags_check_cpu64 (const insn_template *t)
1789 {
1790   return flag_code == CODE_64BIT
1791          ? !t->cpu.bitfield.cpuno64
1792          : !t->cpu.bitfield.cpu64;
1793 }
1794
1795 static INLINE i386_cpu_flags
1796 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1797 {
1798   switch (ARRAY_SIZE (x.array))
1799     {
1800     case 5:
1801       x.array [4] &= y.array [4];
1802       /* Fall through.  */
1803     case 4:
1804       x.array [3] &= y.array [3];
1805       /* Fall through.  */
1806     case 3:
1807       x.array [2] &= y.array [2];
1808       /* Fall through.  */
1809     case 2:
1810       x.array [1] &= y.array [1];
1811       /* Fall through.  */
1812     case 1:
1813       x.array [0] &= y.array [0];
1814       break;
1815     default:
1816       abort ();
1817     }
1818   return x;
1819 }
1820
1821 static INLINE i386_cpu_flags
1822 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1823 {
1824   switch (ARRAY_SIZE (x.array))
1825     {
1826     case 5:
1827       x.array [4] |= y.array [4];
1828       /* Fall through.  */
1829     case 4:
1830       x.array [3] |= y.array [3];
1831       /* Fall through.  */
1832     case 3:
1833       x.array [2] |= y.array [2];
1834       /* Fall through.  */
1835     case 2:
1836       x.array [1] |= y.array [1];
1837       /* Fall through.  */
1838     case 1:
1839       x.array [0] |= y.array [0];
1840       break;
1841     default:
1842       abort ();
1843     }
1844   return x;
1845 }
1846
1847 static INLINE i386_cpu_flags
1848 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1849 {
1850   switch (ARRAY_SIZE (x.array))
1851     {
1852     case 5:
1853       x.array [4] &= ~y.array [4];
1854       /* Fall through.  */
1855     case 4:
1856       x.array [3] &= ~y.array [3];
1857       /* Fall through.  */
1858     case 3:
1859       x.array [2] &= ~y.array [2];
1860       /* Fall through.  */
1861     case 2:
1862       x.array [1] &= ~y.array [1];
1863       /* Fall through.  */
1864     case 1:
1865       x.array [0] &= ~y.array [0];
1866       break;
1867     default:
1868       abort ();
1869     }
1870   return x;
1871 }
1872
1873 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1874
1875 #define CPU_FLAGS_ARCH_MATCH            0x1
1876 #define CPU_FLAGS_64BIT_MATCH           0x2
1877
1878 #define CPU_FLAGS_PERFECT_MATCH \
1879   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1880
1881 /* Return CPU flags match bits. */
1882
1883 static int
1884 cpu_flags_match (const insn_template *t)
1885 {
1886   i386_cpu_flags x = cpu_flags_from_attr (t->cpu);
1887   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
1888
1889   x.bitfield.cpu64 = 0;
1890   x.bitfield.cpuno64 = 0;
1891
1892   if (cpu_flags_all_zero (&x))
1893     {
1894       /* This instruction is available on all archs.  */
1895       match |= CPU_FLAGS_ARCH_MATCH;
1896     }
1897   else
1898     {
1899       /* This instruction is available only on some archs.  */
1900       i386_cpu_flags cpu = cpu_arch_flags;
1901
1902       /* AVX512VL is no standalone feature - match it and then strip it.  */
1903       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1904         return match;
1905       x.bitfield.cpuavx512vl = 0;
1906
1907       /* AVX and AVX2 present at the same time express an operand size
1908          dependency - strip AVX2 for the purposes here.  The operand size
1909          dependent check occurs in check_vecOperands().  */
1910       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1911         x.bitfield.cpuavx2 = 0;
1912
1913       cpu = cpu_flags_and (x, cpu);
1914       if (!cpu_flags_all_zero (&cpu))
1915         {
1916           if (x.bitfield.cpuavx)
1917             {
1918               /* We need to check a few extra flags with AVX.  */
1919               if (cpu.bitfield.cpuavx
1920                   && (!t->opcode_modifier.sse2avx
1921                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1922                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1923                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1924                   && (!x.bitfield.cpupclmulqdq || cpu.bitfield.cpupclmulqdq))
1925                 match |= CPU_FLAGS_ARCH_MATCH;
1926             }
1927           else if (x.bitfield.cpuavx512f)
1928             {
1929               /* We need to check a few extra flags with AVX512F.  */
1930               if (cpu.bitfield.cpuavx512f
1931                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1932                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1933                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1934                 match |= CPU_FLAGS_ARCH_MATCH;
1935             }
1936           else
1937             match |= CPU_FLAGS_ARCH_MATCH;
1938         }
1939     }
1940   return match;
1941 }
1942
1943 static INLINE i386_operand_type
1944 operand_type_and (i386_operand_type x, i386_operand_type y)
1945 {
1946   if (x.bitfield.class != y.bitfield.class)
1947     x.bitfield.class = ClassNone;
1948   if (x.bitfield.instance != y.bitfield.instance)
1949     x.bitfield.instance = InstanceNone;
1950
1951   switch (ARRAY_SIZE (x.array))
1952     {
1953     case 3:
1954       x.array [2] &= y.array [2];
1955       /* Fall through.  */
1956     case 2:
1957       x.array [1] &= y.array [1];
1958       /* Fall through.  */
1959     case 1:
1960       x.array [0] &= y.array [0];
1961       break;
1962     default:
1963       abort ();
1964     }
1965   return x;
1966 }
1967
1968 static INLINE i386_operand_type
1969 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1970 {
1971   gas_assert (y.bitfield.class == ClassNone);
1972   gas_assert (y.bitfield.instance == InstanceNone);
1973
1974   switch (ARRAY_SIZE (x.array))
1975     {
1976     case 3:
1977       x.array [2] &= ~y.array [2];
1978       /* Fall through.  */
1979     case 2:
1980       x.array [1] &= ~y.array [1];
1981       /* Fall through.  */
1982     case 1:
1983       x.array [0] &= ~y.array [0];
1984       break;
1985     default:
1986       abort ();
1987     }
1988   return x;
1989 }
1990
1991 static INLINE i386_operand_type
1992 operand_type_or (i386_operand_type x, i386_operand_type y)
1993 {
1994   gas_assert (x.bitfield.class == ClassNone ||
1995               y.bitfield.class == ClassNone ||
1996               x.bitfield.class == y.bitfield.class);
1997   gas_assert (x.bitfield.instance == InstanceNone ||
1998               y.bitfield.instance == InstanceNone ||
1999               x.bitfield.instance == y.bitfield.instance);
2000
2001   switch (ARRAY_SIZE (x.array))
2002     {
2003     case 3:
2004       x.array [2] |= y.array [2];
2005       /* Fall through.  */
2006     case 2:
2007       x.array [1] |= y.array [1];
2008       /* Fall through.  */
2009     case 1:
2010       x.array [0] |= y.array [0];
2011       break;
2012     default:
2013       abort ();
2014     }
2015   return x;
2016 }
2017
2018 static INLINE i386_operand_type
2019 operand_type_xor (i386_operand_type x, i386_operand_type y)
2020 {
2021   gas_assert (y.bitfield.class == ClassNone);
2022   gas_assert (y.bitfield.instance == InstanceNone);
2023
2024   switch (ARRAY_SIZE (x.array))
2025     {
2026     case 3:
2027       x.array [2] ^= y.array [2];
2028       /* Fall through.  */
2029     case 2:
2030       x.array [1] ^= y.array [1];
2031       /* Fall through.  */
2032     case 1:
2033       x.array [0] ^= y.array [0];
2034       break;
2035     default:
2036       abort ();
2037     }
2038   return x;
2039 }
2040
2041 static const i386_operand_type anydisp = {
2042   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2043 };
2044
2045 enum operand_type
2046 {
2047   reg,
2048   imm,
2049   disp,
2050   anymem
2051 };
2052
2053 static INLINE int
2054 operand_type_check (i386_operand_type t, enum operand_type c)
2055 {
2056   switch (c)
2057     {
2058     case reg:
2059       return t.bitfield.class == Reg;
2060
2061     case imm:
2062       return (t.bitfield.imm8
2063               || t.bitfield.imm8s
2064               || t.bitfield.imm16
2065               || t.bitfield.imm32
2066               || t.bitfield.imm32s
2067               || t.bitfield.imm64);
2068
2069     case disp:
2070       return (t.bitfield.disp8
2071               || t.bitfield.disp16
2072               || t.bitfield.disp32
2073               || t.bitfield.disp64);
2074
2075     case anymem:
2076       return (t.bitfield.disp8
2077               || t.bitfield.disp16
2078               || t.bitfield.disp32
2079               || t.bitfield.disp64
2080               || t.bitfield.baseindex);
2081
2082     default:
2083       abort ();
2084     }
2085
2086   return 0;
2087 }
2088
2089 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2090    between operand GIVEN and opeand WANTED for instruction template T.  */
2091
2092 static INLINE int
2093 match_operand_size (const insn_template *t, unsigned int wanted,
2094                     unsigned int given)
2095 {
2096   return !((i.types[given].bitfield.byte
2097             && !t->operand_types[wanted].bitfield.byte)
2098            || (i.types[given].bitfield.word
2099                && !t->operand_types[wanted].bitfield.word)
2100            || (i.types[given].bitfield.dword
2101                && !t->operand_types[wanted].bitfield.dword)
2102            || (i.types[given].bitfield.qword
2103                && (!t->operand_types[wanted].bitfield.qword
2104                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2105                       mode, when they're used where a 64-bit GPR could also
2106                       be used.  Checking is needed for Intel Syntax only.  */
2107                    || (intel_syntax
2108                        && flag_code != CODE_64BIT
2109                        && (t->operand_types[wanted].bitfield.class == Reg
2110                            || t->operand_types[wanted].bitfield.class == Accum
2111                            || t->opcode_modifier.isstring))))
2112            || (i.types[given].bitfield.tbyte
2113                && !t->operand_types[wanted].bitfield.tbyte));
2114 }
2115
2116 /* Return 1 if there is no conflict in SIMD register between operand
2117    GIVEN and opeand WANTED for instruction template T.  */
2118
2119 static INLINE int
2120 match_simd_size (const insn_template *t, unsigned int wanted,
2121                  unsigned int given)
2122 {
2123   return !((i.types[given].bitfield.xmmword
2124             && !t->operand_types[wanted].bitfield.xmmword)
2125            || (i.types[given].bitfield.ymmword
2126                && !t->operand_types[wanted].bitfield.ymmword)
2127            || (i.types[given].bitfield.zmmword
2128                && !t->operand_types[wanted].bitfield.zmmword)
2129            || (i.types[given].bitfield.tmmword
2130                && !t->operand_types[wanted].bitfield.tmmword));
2131 }
2132
2133 /* Return 1 if there is no conflict in any size between operand GIVEN
2134    and opeand WANTED for instruction template T.  */
2135
2136 static INLINE int
2137 match_mem_size (const insn_template *t, unsigned int wanted,
2138                 unsigned int given)
2139 {
2140   return (match_operand_size (t, wanted, given)
2141           && !((i.types[given].bitfield.unspecified
2142                 && !i.broadcast.type
2143                 && !i.broadcast.bytes
2144                 && !t->operand_types[wanted].bitfield.unspecified)
2145                || (i.types[given].bitfield.fword
2146                    && !t->operand_types[wanted].bitfield.fword)
2147                /* For scalar opcode templates to allow register and memory
2148                   operands at the same time, some special casing is needed
2149                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2150                   down-conversion vpmov*.  */
2151                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2152                     && t->operand_types[wanted].bitfield.byte
2153                        + t->operand_types[wanted].bitfield.word
2154                        + t->operand_types[wanted].bitfield.dword
2155                        + t->operand_types[wanted].bitfield.qword
2156                        > !!t->opcode_modifier.broadcast)
2157                    ? (i.types[given].bitfield.xmmword
2158                       || i.types[given].bitfield.ymmword
2159                       || i.types[given].bitfield.zmmword)
2160                    : !match_simd_size(t, wanted, given))));
2161 }
2162
2163 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2164    operands for instruction template T, and it has MATCH_REVERSE set if there
2165    is no size conflict on any operands for the template with operands reversed
2166    (and the template allows for reversing in the first place).  */
2167
2168 #define MATCH_STRAIGHT 1
2169 #define MATCH_REVERSE  2
2170
2171 static INLINE unsigned int
2172 operand_size_match (const insn_template *t)
2173 {
2174   unsigned int j, match = MATCH_STRAIGHT;
2175
2176   /* Don't check non-absolute jump instructions.  */
2177   if (t->opcode_modifier.jump
2178       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2179     return match;
2180
2181   /* Check memory and accumulator operand size.  */
2182   for (j = 0; j < i.operands; j++)
2183     {
2184       if (i.types[j].bitfield.class != Reg
2185           && i.types[j].bitfield.class != RegSIMD
2186           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2187         continue;
2188
2189       if (t->operand_types[j].bitfield.class == Reg
2190           && !match_operand_size (t, j, j))
2191         {
2192           match = 0;
2193           break;
2194         }
2195
2196       if (t->operand_types[j].bitfield.class == RegSIMD
2197           && !match_simd_size (t, j, j))
2198         {
2199           match = 0;
2200           break;
2201         }
2202
2203       if (t->operand_types[j].bitfield.instance == Accum
2204           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2205         {
2206           match = 0;
2207           break;
2208         }
2209
2210       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2211         {
2212           match = 0;
2213           break;
2214         }
2215     }
2216
2217   if (!t->opcode_modifier.d)
2218     return match;
2219
2220   /* Check reverse.  */
2221   gas_assert (i.operands >= 2);
2222
2223   for (j = 0; j < i.operands; j++)
2224     {
2225       unsigned int given = i.operands - j - 1;
2226
2227       /* For FMA4 and XOP insns VEX.W controls just the first two
2228          register operands.  */
2229       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
2230         given = j < 2 ? 1 - j : j;
2231
2232       if (t->operand_types[j].bitfield.class == Reg
2233           && !match_operand_size (t, j, given))
2234         return match;
2235
2236       if (t->operand_types[j].bitfield.class == RegSIMD
2237           && !match_simd_size (t, j, given))
2238         return match;
2239
2240       if (t->operand_types[j].bitfield.instance == Accum
2241           && (!match_operand_size (t, j, given)
2242               || !match_simd_size (t, j, given)))
2243         return match;
2244
2245       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2246         return match;
2247     }
2248
2249   return match | MATCH_REVERSE;
2250 }
2251
2252 static INLINE int
2253 operand_type_match (i386_operand_type overlap,
2254                     i386_operand_type given)
2255 {
2256   i386_operand_type temp = overlap;
2257
2258   temp.bitfield.unspecified = 0;
2259   temp.bitfield.byte = 0;
2260   temp.bitfield.word = 0;
2261   temp.bitfield.dword = 0;
2262   temp.bitfield.fword = 0;
2263   temp.bitfield.qword = 0;
2264   temp.bitfield.tbyte = 0;
2265   temp.bitfield.xmmword = 0;
2266   temp.bitfield.ymmword = 0;
2267   temp.bitfield.zmmword = 0;
2268   temp.bitfield.tmmword = 0;
2269   if (operand_type_all_zero (&temp))
2270     goto mismatch;
2271
2272   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2273     return 1;
2274
2275  mismatch:
2276   i.error = operand_type_mismatch;
2277   return 0;
2278 }
2279
2280 /* If given types g0 and g1 are registers they must be of the same type
2281    unless the expected operand type register overlap is null.
2282    Intel syntax sized memory operands are also checked here.  */
2283
2284 static INLINE int
2285 operand_type_register_match (i386_operand_type g0,
2286                              i386_operand_type t0,
2287                              i386_operand_type g1,
2288                              i386_operand_type t1)
2289 {
2290   if (g0.bitfield.class != Reg
2291       && g0.bitfield.class != RegSIMD
2292       && (g0.bitfield.unspecified
2293           || !operand_type_check (g0, anymem)))
2294     return 1;
2295
2296   if (g1.bitfield.class != Reg
2297       && g1.bitfield.class != RegSIMD
2298       && (g1.bitfield.unspecified
2299           || !operand_type_check (g1, anymem)))
2300     return 1;
2301
2302   if (g0.bitfield.byte == g1.bitfield.byte
2303       && g0.bitfield.word == g1.bitfield.word
2304       && g0.bitfield.dword == g1.bitfield.dword
2305       && g0.bitfield.qword == g1.bitfield.qword
2306       && g0.bitfield.xmmword == g1.bitfield.xmmword
2307       && g0.bitfield.ymmword == g1.bitfield.ymmword
2308       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2309     return 1;
2310
2311   /* If expectations overlap in no more than a single size, all is fine. */
2312   g0 = operand_type_and (t0, t1);
2313   if (g0.bitfield.byte
2314       + g0.bitfield.word
2315       + g0.bitfield.dword
2316       + g0.bitfield.qword
2317       + g0.bitfield.xmmword
2318       + g0.bitfield.ymmword
2319       + g0.bitfield.zmmword <= 1)
2320     return 1;
2321
2322   i.error = register_type_mismatch;
2323
2324   return 0;
2325 }
2326
2327 static INLINE unsigned int
2328 register_number (const reg_entry *r)
2329 {
2330   unsigned int nr = r->reg_num;
2331
2332   if (r->reg_flags & RegRex)
2333     nr += 8;
2334
2335   if (r->reg_flags & RegVRex)
2336     nr += 16;
2337
2338   return nr;
2339 }
2340
2341 static INLINE unsigned int
2342 mode_from_disp_size (i386_operand_type t)
2343 {
2344   if (t.bitfield.disp8)
2345     return 1;
2346   else if (t.bitfield.disp16
2347            || t.bitfield.disp32)
2348     return 2;
2349   else
2350     return 0;
2351 }
2352
2353 static INLINE int
2354 fits_in_signed_byte (addressT num)
2355 {
2356   return num + 0x80 <= 0xff;
2357 }
2358
2359 static INLINE int
2360 fits_in_unsigned_byte (addressT num)
2361 {
2362   return num <= 0xff;
2363 }
2364
2365 static INLINE int
2366 fits_in_unsigned_word (addressT num)
2367 {
2368   return num <= 0xffff;
2369 }
2370
2371 static INLINE int
2372 fits_in_signed_word (addressT num)
2373 {
2374   return num + 0x8000 <= 0xffff;
2375 }
2376
2377 static INLINE int
2378 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2379 {
2380 #ifndef BFD64
2381   return 1;
2382 #else
2383   return num + 0x80000000 <= 0xffffffff;
2384 #endif
2385 }                               /* fits_in_signed_long() */
2386
2387 static INLINE int
2388 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2389 {
2390 #ifndef BFD64
2391   return 1;
2392 #else
2393   return num <= 0xffffffff;
2394 #endif
2395 }                               /* fits_in_unsigned_long() */
2396
2397 static INLINE valueT extend_to_32bit_address (addressT num)
2398 {
2399 #ifdef BFD64
2400   if (fits_in_unsigned_long(num))
2401     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2402
2403   if (!fits_in_signed_long (num))
2404     return num & 0xffffffff;
2405 #endif
2406
2407   return num;
2408 }
2409
2410 static INLINE int
2411 fits_in_disp8 (offsetT num)
2412 {
2413   int shift = i.memshift;
2414   unsigned int mask;
2415
2416   if (shift == -1)
2417     abort ();
2418
2419   mask = (1 << shift) - 1;
2420
2421   /* Return 0 if NUM isn't properly aligned.  */
2422   if ((num & mask))
2423     return 0;
2424
2425   /* Check if NUM will fit in 8bit after shift.  */
2426   return fits_in_signed_byte (num >> shift);
2427 }
2428
2429 static INLINE int
2430 fits_in_imm4 (offsetT num)
2431 {
2432   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2433   return (num & (i.vec_encoding != vex_encoding_evex ? 0xf : 7)) == num;
2434 }
2435
2436 static i386_operand_type
2437 smallest_imm_type (offsetT num)
2438 {
2439   i386_operand_type t;
2440
2441   operand_type_set (&t, 0);
2442   t.bitfield.imm64 = 1;
2443
2444   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2445     {
2446       /* This code is disabled on the 486 because all the Imm1 forms
2447          in the opcode table are slower on the i486.  They're the
2448          versions with the implicitly specified single-position
2449          displacement, which has another syntax if you really want to
2450          use that form.  */
2451       t.bitfield.imm1 = 1;
2452       t.bitfield.imm8 = 1;
2453       t.bitfield.imm8s = 1;
2454       t.bitfield.imm16 = 1;
2455       t.bitfield.imm32 = 1;
2456       t.bitfield.imm32s = 1;
2457     }
2458   else if (fits_in_signed_byte (num))
2459     {
2460       if (fits_in_unsigned_byte (num))
2461         t.bitfield.imm8 = 1;
2462       t.bitfield.imm8s = 1;
2463       t.bitfield.imm16 = 1;
2464       t.bitfield.imm32 = 1;
2465       t.bitfield.imm32s = 1;
2466     }
2467   else if (fits_in_unsigned_byte (num))
2468     {
2469       t.bitfield.imm8 = 1;
2470       t.bitfield.imm16 = 1;
2471       t.bitfield.imm32 = 1;
2472       t.bitfield.imm32s = 1;
2473     }
2474   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2475     {
2476       t.bitfield.imm16 = 1;
2477       t.bitfield.imm32 = 1;
2478       t.bitfield.imm32s = 1;
2479     }
2480   else if (fits_in_signed_long (num))
2481     {
2482       t.bitfield.imm32 = 1;
2483       t.bitfield.imm32s = 1;
2484     }
2485   else if (fits_in_unsigned_long (num))
2486     t.bitfield.imm32 = 1;
2487
2488   return t;
2489 }
2490
2491 static offsetT
2492 offset_in_range (offsetT val, int size)
2493 {
2494   addressT mask;
2495
2496   switch (size)
2497     {
2498     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2499     case 2: mask = ((addressT) 1 << 16) - 1; break;
2500 #ifdef BFD64
2501     case 4: mask = ((addressT) 1 << 32) - 1; break;
2502 #endif
2503     case sizeof (val): return val;
2504     default: abort ();
2505     }
2506
2507   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2508     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2509              (uint64_t) val, (uint64_t) (val & mask));
2510
2511   return val & mask;
2512 }
2513
2514 static INLINE const char *insn_name (const insn_template *t)
2515 {
2516   return &i386_mnemonics[t->mnem_off];
2517 }
2518
2519 enum PREFIX_GROUP
2520 {
2521   PREFIX_EXIST = 0,
2522   PREFIX_LOCK,
2523   PREFIX_REP,
2524   PREFIX_DS,
2525   PREFIX_OTHER
2526 };
2527
2528 /* Returns
2529    a. PREFIX_EXIST if attempting to add a prefix where one from the
2530    same class already exists.
2531    b. PREFIX_LOCK if lock prefix is added.
2532    c. PREFIX_REP if rep/repne prefix is added.
2533    d. PREFIX_DS if ds prefix is added.
2534    e. PREFIX_OTHER if other prefix is added.
2535  */
2536
2537 static enum PREFIX_GROUP
2538 add_prefix (unsigned int prefix)
2539 {
2540   enum PREFIX_GROUP ret = PREFIX_OTHER;
2541   unsigned int q;
2542
2543   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2544       && flag_code == CODE_64BIT)
2545     {
2546       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2547           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2548           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2549           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2550         ret = PREFIX_EXIST;
2551       q = REX_PREFIX;
2552     }
2553   else
2554     {
2555       switch (prefix)
2556         {
2557         default:
2558           abort ();
2559
2560         case DS_PREFIX_OPCODE:
2561           ret = PREFIX_DS;
2562           /* Fall through.  */
2563         case CS_PREFIX_OPCODE:
2564         case ES_PREFIX_OPCODE:
2565         case FS_PREFIX_OPCODE:
2566         case GS_PREFIX_OPCODE:
2567         case SS_PREFIX_OPCODE:
2568           q = SEG_PREFIX;
2569           break;
2570
2571         case REPNE_PREFIX_OPCODE:
2572         case REPE_PREFIX_OPCODE:
2573           q = REP_PREFIX;
2574           ret = PREFIX_REP;
2575           break;
2576
2577         case LOCK_PREFIX_OPCODE:
2578           q = LOCK_PREFIX;
2579           ret = PREFIX_LOCK;
2580           break;
2581
2582         case FWAIT_OPCODE:
2583           q = WAIT_PREFIX;
2584           break;
2585
2586         case ADDR_PREFIX_OPCODE:
2587           q = ADDR_PREFIX;
2588           break;
2589
2590         case DATA_PREFIX_OPCODE:
2591           q = DATA_PREFIX;
2592           break;
2593         }
2594       if (i.prefix[q] != 0)
2595         ret = PREFIX_EXIST;
2596     }
2597
2598   if (ret)
2599     {
2600       if (!i.prefix[q])
2601         ++i.prefixes;
2602       i.prefix[q] |= prefix;
2603     }
2604   else
2605     as_bad (_("same type of prefix used twice"));
2606
2607   return ret;
2608 }
2609
2610 static void
2611 update_code_flag (int value, int check)
2612 {
2613   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2614
2615   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
2616     {
2617       as_error (_("64bit mode not supported on `%s'."),
2618                 cpu_arch_name ? cpu_arch_name : default_arch);
2619       return;
2620     }
2621
2622   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2623     {
2624       as_error (_("32bit mode not supported on `%s'."),
2625                 cpu_arch_name ? cpu_arch_name : default_arch);
2626       return;
2627     }
2628
2629   flag_code = (enum flag_code) value;
2630
2631   stackop_size = '\0';
2632 }
2633
2634 static void
2635 set_code_flag (int value)
2636 {
2637   update_code_flag (value, 0);
2638 }
2639
2640 static void
2641 set_16bit_gcc_code_flag (int new_code_flag)
2642 {
2643   flag_code = (enum flag_code) new_code_flag;
2644   if (flag_code != CODE_16BIT)
2645     abort ();
2646   stackop_size = LONG_MNEM_SUFFIX;
2647 }
2648
2649 static void
2650 set_intel_syntax (int syntax_flag)
2651 {
2652   /* Find out if register prefixing is specified.  */
2653   int ask_naked_reg = 0;
2654
2655   SKIP_WHITESPACE ();
2656   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2657     {
2658       char *string;
2659       int e = get_symbol_name (&string);
2660
2661       if (strcmp (string, "prefix") == 0)
2662         ask_naked_reg = 1;
2663       else if (strcmp (string, "noprefix") == 0)
2664         ask_naked_reg = -1;
2665       else
2666         as_bad (_("bad argument to syntax directive."));
2667       (void) restore_line_pointer (e);
2668     }
2669   demand_empty_rest_of_line ();
2670
2671   intel_syntax = syntax_flag;
2672
2673   if (ask_naked_reg == 0)
2674     allow_naked_reg = (intel_syntax
2675                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2676   else
2677     allow_naked_reg = (ask_naked_reg < 0);
2678
2679   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2680
2681   register_prefix = allow_naked_reg ? "" : "%";
2682 }
2683
2684 static void
2685 set_intel_mnemonic (int mnemonic_flag)
2686 {
2687   intel_mnemonic = mnemonic_flag;
2688 }
2689
2690 static void
2691 set_allow_index_reg (int flag)
2692 {
2693   allow_index_reg = flag;
2694 }
2695
2696 static void
2697 set_check (int what)
2698 {
2699   enum check_kind *kind;
2700   const char *str;
2701
2702   if (what)
2703     {
2704       kind = &operand_check;
2705       str = "operand";
2706     }
2707   else
2708     {
2709       kind = &sse_check;
2710       str = "sse";
2711     }
2712
2713   SKIP_WHITESPACE ();
2714
2715   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2716     {
2717       char *string;
2718       int e = get_symbol_name (&string);
2719
2720       if (strcmp (string, "none") == 0)
2721         *kind = check_none;
2722       else if (strcmp (string, "warning") == 0)
2723         *kind = check_warning;
2724       else if (strcmp (string, "error") == 0)
2725         *kind = check_error;
2726       else
2727         as_bad (_("bad argument to %s_check directive."), str);
2728       (void) restore_line_pointer (e);
2729     }
2730   else
2731     as_bad (_("missing argument for %s_check directive"), str);
2732
2733   demand_empty_rest_of_line ();
2734 }
2735
2736 static void
2737 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2738                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2739 {
2740 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2741   static const char *arch;
2742
2743   /* Intel MCU is only supported on ELF.  */
2744   if (!IS_ELF)
2745     return;
2746
2747   if (!arch)
2748     {
2749       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2750          use default_arch.  */
2751       arch = cpu_arch_name;
2752       if (!arch)
2753         arch = default_arch;
2754     }
2755
2756   /* If we are targeting Intel MCU, we must enable it.  */
2757   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2758       == new_flag.bitfield.cpuiamcu)
2759     return;
2760
2761   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2762 #endif
2763 }
2764
2765 static void
2766 extend_cpu_sub_arch_name (const char *name)
2767 {
2768   if (cpu_sub_arch_name)
2769     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2770                                   ".", name, (const char *) NULL);
2771   else
2772     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2773 }
2774
2775 static void
2776 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2777 {
2778   typedef struct arch_stack_entry
2779   {
2780     const struct arch_stack_entry *prev;
2781     const char *name;
2782     char *sub_name;
2783     i386_cpu_flags flags;
2784     i386_cpu_flags isa_flags;
2785     enum processor_type isa;
2786     enum flag_code flag_code;
2787     unsigned int vector_size;
2788     char stackop_size;
2789     bool no_cond_jump_promotion;
2790   } arch_stack_entry;
2791   static const arch_stack_entry *arch_stack_top;
2792   char *s;
2793   int e;
2794   const char *string;
2795   unsigned int j = 0;
2796   i386_cpu_flags flags;
2797
2798   SKIP_WHITESPACE ();
2799
2800   if (is_end_of_line[(unsigned char) *input_line_pointer])
2801     {
2802       as_bad (_("missing cpu architecture"));
2803       input_line_pointer++;
2804       return;
2805     }
2806
2807   e = get_symbol_name (&s);
2808   string = s;
2809
2810   if (strcmp (string, "push") == 0)
2811     {
2812       arch_stack_entry *top = XNEW (arch_stack_entry);
2813
2814       top->name = cpu_arch_name;
2815       if (cpu_sub_arch_name)
2816         top->sub_name = xstrdup (cpu_sub_arch_name);
2817       else
2818         top->sub_name = NULL;
2819       top->flags = cpu_arch_flags;
2820       top->isa = cpu_arch_isa;
2821       top->isa_flags = cpu_arch_isa_flags;
2822       top->flag_code = flag_code;
2823       top->vector_size = vector_size;
2824       top->stackop_size = stackop_size;
2825       top->no_cond_jump_promotion = no_cond_jump_promotion;
2826
2827       top->prev = arch_stack_top;
2828       arch_stack_top = top;
2829
2830       (void) restore_line_pointer (e);
2831       demand_empty_rest_of_line ();
2832       return;
2833     }
2834
2835   if (strcmp (string, "pop") == 0)
2836     {
2837       const arch_stack_entry *top = arch_stack_top;
2838
2839       if (!top)
2840         as_bad (_(".arch stack is empty"));
2841       else if (top->flag_code != flag_code
2842                || top->stackop_size != stackop_size)
2843         {
2844           static const unsigned int bits[] = {
2845             [CODE_16BIT] = 16,
2846             [CODE_32BIT] = 32,
2847             [CODE_64BIT] = 64,
2848           };
2849
2850           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2851                   bits[top->flag_code],
2852                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2853         }
2854       else
2855         {
2856           arch_stack_top = top->prev;
2857
2858           cpu_arch_name = top->name;
2859           free (cpu_sub_arch_name);
2860           cpu_sub_arch_name = top->sub_name;
2861           cpu_arch_flags = top->flags;
2862           cpu_arch_isa = top->isa;
2863           cpu_arch_isa_flags = top->isa_flags;
2864           vector_size = top->vector_size;
2865           no_cond_jump_promotion = top->no_cond_jump_promotion;
2866
2867           XDELETE (top);
2868         }
2869
2870       (void) restore_line_pointer (e);
2871       demand_empty_rest_of_line ();
2872       return;
2873     }
2874
2875   if (strcmp (string, "default") == 0)
2876     {
2877       if (strcmp (default_arch, "iamcu") == 0)
2878         string = default_arch;
2879       else
2880         {
2881           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2882
2883           cpu_arch_name = NULL;
2884           free (cpu_sub_arch_name);
2885           cpu_sub_arch_name = NULL;
2886           cpu_arch_flags = cpu_unknown_flags;
2887           cpu_arch_isa = PROCESSOR_UNKNOWN;
2888           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2889           if (!cpu_arch_tune_set)
2890             {
2891               cpu_arch_tune = cpu_arch_isa;
2892               cpu_arch_tune_flags = cpu_arch_isa_flags;
2893             }
2894
2895           vector_size = VSZ_DEFAULT;
2896
2897           j = ARRAY_SIZE (cpu_arch) + 1;
2898         }
2899     }
2900
2901   for (; j < ARRAY_SIZE (cpu_arch); j++)
2902     {
2903       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2904           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2905         {
2906           if (*string != '.')
2907             {
2908               check_cpu_arch_compatible (string, cpu_arch[j].enable);
2909
2910               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
2911                 {
2912                   as_bad (_("64bit mode not supported on `%s'."),
2913                           cpu_arch[j].name);
2914                   (void) restore_line_pointer (e);
2915                   ignore_rest_of_line ();
2916                   return;
2917                 }
2918
2919               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
2920                 {
2921                   as_bad (_("32bit mode not supported on `%s'."),
2922                           cpu_arch[j].name);
2923                   (void) restore_line_pointer (e);
2924                   ignore_rest_of_line ();
2925                   return;
2926                 }
2927
2928               cpu_arch_name = cpu_arch[j].name;
2929               free (cpu_sub_arch_name);
2930               cpu_sub_arch_name = NULL;
2931               cpu_arch_flags = cpu_arch[j].enable;
2932               cpu_arch_isa = cpu_arch[j].type;
2933               cpu_arch_isa_flags = cpu_arch[j].enable;
2934               if (!cpu_arch_tune_set)
2935                 {
2936                   cpu_arch_tune = cpu_arch_isa;
2937                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2938                 }
2939
2940               vector_size = VSZ_DEFAULT;
2941
2942               pre_386_16bit_warned = false;
2943               break;
2944             }
2945
2946           if (cpu_flags_all_zero (&cpu_arch[j].enable))
2947             continue;
2948
2949           flags = cpu_flags_or (cpu_arch_flags, cpu_arch[j].enable);
2950
2951           if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2952             {
2953               extend_cpu_sub_arch_name (string + 1);
2954               cpu_arch_flags = flags;
2955               cpu_arch_isa_flags = flags;
2956             }
2957           else
2958             cpu_arch_isa_flags
2959               = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[j].enable);
2960
2961           (void) restore_line_pointer (e);
2962
2963           switch (cpu_arch[j].vsz)
2964             {
2965             default:
2966               break;
2967
2968             case vsz_set:
2969 #ifdef SVR4_COMMENT_CHARS
2970               if (*input_line_pointer == ':' || *input_line_pointer == '/')
2971 #else
2972               if (*input_line_pointer == '/')
2973 #endif
2974                 {
2975                   ++input_line_pointer;
2976                   switch (get_absolute_expression ())
2977                     {
2978                     case 512: vector_size = VSZ512; break;
2979                     case 256: vector_size = VSZ256; break;
2980                     case 128: vector_size = VSZ128; break;
2981                     default:
2982                       as_bad (_("Unrecognized vector size specifier"));
2983                       ignore_rest_of_line ();
2984                       return;
2985                     }
2986                   break;
2987                 }
2988                 /* Fall through.  */
2989             case vsz_reset:
2990               vector_size = VSZ_DEFAULT;
2991               break;
2992             }
2993
2994           demand_empty_rest_of_line ();
2995           return;
2996         }
2997     }
2998
2999   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3000     {
3001       /* Disable an ISA extension.  */
3002       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3003         if (cpu_arch[j].type == PROCESSOR_NONE
3004             && strcmp (string + 3, cpu_arch[j].name) == 0)
3005           {
3006             flags = cpu_flags_and_not (cpu_arch_flags, cpu_arch[j].disable);
3007             if (!cpu_flags_equal (&flags, &cpu_arch_flags))
3008               {
3009                 extend_cpu_sub_arch_name (string + 1);
3010                 cpu_arch_flags = flags;
3011                 cpu_arch_isa_flags = flags;
3012               }
3013
3014             if (cpu_arch[j].vsz == vsz_set)
3015               vector_size = VSZ_DEFAULT;
3016
3017             (void) restore_line_pointer (e);
3018             demand_empty_rest_of_line ();
3019             return;
3020           }
3021     }
3022
3023   if (j == ARRAY_SIZE (cpu_arch))
3024     as_bad (_("no such architecture: `%s'"), string);
3025
3026   *input_line_pointer = e;
3027
3028   no_cond_jump_promotion = 0;
3029   if (*input_line_pointer == ','
3030       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3031     {
3032       ++input_line_pointer;
3033       e = get_symbol_name (&s);
3034       string = s;
3035
3036       if (strcmp (string, "nojumps") == 0)
3037         no_cond_jump_promotion = 1;
3038       else if (strcmp (string, "jumps") == 0)
3039         ;
3040       else
3041         as_bad (_("no such architecture modifier: `%s'"), string);
3042
3043       (void) restore_line_pointer (e);
3044     }
3045
3046   demand_empty_rest_of_line ();
3047 }
3048
3049 enum bfd_architecture
3050 i386_arch (void)
3051 {
3052   if (cpu_arch_isa == PROCESSOR_IAMCU)
3053     {
3054       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3055           || flag_code == CODE_64BIT)
3056         as_fatal (_("Intel MCU is 32bit ELF only"));
3057       return bfd_arch_iamcu;
3058     }
3059   else
3060     return bfd_arch_i386;
3061 }
3062
3063 unsigned long
3064 i386_mach (void)
3065 {
3066   if (startswith (default_arch, "x86_64"))
3067     {
3068       if (default_arch[6] == '\0')
3069         return bfd_mach_x86_64;
3070       else
3071         return bfd_mach_x64_32;
3072     }
3073   else if (!strcmp (default_arch, "i386")
3074            || !strcmp (default_arch, "iamcu"))
3075     {
3076       if (cpu_arch_isa == PROCESSOR_IAMCU)
3077         {
3078           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3079             as_fatal (_("Intel MCU is 32bit ELF only"));
3080           return bfd_mach_i386_iamcu;
3081         }
3082       else
3083         return bfd_mach_i386_i386;
3084     }
3085   else
3086     as_fatal (_("unknown architecture"));
3087 }
3088 \f
3089 #include "opcodes/i386-tbl.h"
3090
3091 void
3092 md_begin (void)
3093 {
3094   /* Support pseudo prefixes like {disp32}.  */
3095   lex_type ['{'] = LEX_BEGIN_NAME;
3096
3097   /* Initialize op_hash hash table.  */
3098   op_hash = str_htab_create ();
3099
3100   {
3101     const insn_template *const *sets = i386_op_sets;
3102     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
3103
3104     /* Type checks to compensate for the conversion through void * which
3105        occurs during hash table insertion / lookup.  */
3106     (void) sizeof (sets == &current_templates->start);
3107     (void) sizeof (end == &current_templates->end);
3108     for (; sets < end; ++sets)
3109       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
3110         as_fatal (_("duplicate %s"), insn_name (*sets));
3111   }
3112
3113   /* Initialize reg_hash hash table.  */
3114   reg_hash = str_htab_create ();
3115   {
3116     const reg_entry *regtab;
3117     unsigned int regtab_size = i386_regtab_size;
3118
3119     for (regtab = i386_regtab; regtab_size--; regtab++)
3120       {
3121         switch (regtab->reg_type.bitfield.class)
3122           {
3123           case Reg:
3124             if (regtab->reg_type.bitfield.dword)
3125               {
3126                 if (regtab->reg_type.bitfield.instance == Accum)
3127                   reg_eax = regtab;
3128               }
3129             else if (regtab->reg_type.bitfield.tbyte)
3130               {
3131                 /* There's no point inserting st(<N>) in the hash table, as
3132                    parentheses aren't included in register_chars[] anyway.  */
3133                 if (regtab->reg_type.bitfield.instance != Accum)
3134                   continue;
3135                 reg_st0 = regtab;
3136               }
3137             break;
3138
3139           case SReg:
3140             switch (regtab->reg_num)
3141               {
3142               case 0: reg_es = regtab; break;
3143               case 2: reg_ss = regtab; break;
3144               case 3: reg_ds = regtab; break;
3145               }
3146             break;
3147
3148           case RegMask:
3149             if (!regtab->reg_num)
3150               reg_k0 = regtab;
3151             break;
3152           }
3153
3154         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3155           as_fatal (_("duplicate %s"), regtab->reg_name);
3156       }
3157   }
3158
3159   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3160   {
3161     int c;
3162     const char *p;
3163
3164     for (c = 0; c < 256; c++)
3165       {
3166         if (ISDIGIT (c) || ISLOWER (c))
3167           {
3168             mnemonic_chars[c] = c;
3169             register_chars[c] = c;
3170             operand_chars[c] = c;
3171           }
3172         else if (ISUPPER (c))
3173           {
3174             mnemonic_chars[c] = TOLOWER (c);
3175             register_chars[c] = mnemonic_chars[c];
3176             operand_chars[c] = c;
3177           }
3178 #ifdef SVR4_COMMENT_CHARS
3179         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3180           operand_chars[c] = c;
3181 #endif
3182
3183         if (c >= 128)
3184           operand_chars[c] = c;
3185       }
3186
3187     mnemonic_chars['_'] = '_';
3188     mnemonic_chars['-'] = '-';
3189     mnemonic_chars['.'] = '.';
3190
3191     for (p = extra_symbol_chars; *p != '\0'; p++)
3192       operand_chars[(unsigned char) *p] = *p;
3193     for (p = operand_special_chars; *p != '\0'; p++)
3194       operand_chars[(unsigned char) *p] = *p;
3195   }
3196
3197   if (flag_code == CODE_64BIT)
3198     {
3199 #if defined (OBJ_COFF) && defined (TE_PE)
3200       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3201                                   ? 32 : 16);
3202 #else
3203       x86_dwarf2_return_column = 16;
3204 #endif
3205       x86_cie_data_alignment = -8;
3206 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3207       x86_sframe_cfa_sp_reg = 7;
3208       x86_sframe_cfa_fp_reg = 6;
3209 #endif
3210     }
3211   else
3212     {
3213       x86_dwarf2_return_column = 8;
3214       x86_cie_data_alignment = -4;
3215     }
3216
3217   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3218      can be turned into BRANCH_PREFIX frag.  */
3219   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3220     abort ();
3221 }
3222
3223 void
3224 i386_print_statistics (FILE *file)
3225 {
3226   htab_print_statistics (file, "i386 opcode", op_hash);
3227   htab_print_statistics (file, "i386 register", reg_hash);
3228 }
3229
3230 void
3231 i386_md_end (void)
3232 {
3233   htab_delete (op_hash);
3234   htab_delete (reg_hash);
3235 }
3236 \f
3237 #ifdef DEBUG386
3238
3239 /* Debugging routines for md_assemble.  */
3240 static void pte (insn_template *);
3241 static void pt (i386_operand_type);
3242 static void pe (expressionS *);
3243 static void ps (symbolS *);
3244
3245 static void
3246 pi (const char *line, i386_insn *x)
3247 {
3248   unsigned int j;
3249
3250   fprintf (stdout, "%s: template ", line);
3251   pte (&x->tm);
3252   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3253            x->base_reg ? x->base_reg->reg_name : "none",
3254            x->index_reg ? x->index_reg->reg_name : "none",
3255            x->log2_scale_factor);
3256   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3257            x->rm.mode, x->rm.reg, x->rm.regmem);
3258   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3259            x->sib.base, x->sib.index, x->sib.scale);
3260   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3261            (x->rex & REX_W) != 0,
3262            (x->rex & REX_R) != 0,
3263            (x->rex & REX_X) != 0,
3264            (x->rex & REX_B) != 0);
3265   for (j = 0; j < x->operands; j++)
3266     {
3267       fprintf (stdout, "    #%d:  ", j + 1);
3268       pt (x->types[j]);
3269       fprintf (stdout, "\n");
3270       if (x->types[j].bitfield.class == Reg
3271           || x->types[j].bitfield.class == RegMMX
3272           || x->types[j].bitfield.class == RegSIMD
3273           || x->types[j].bitfield.class == RegMask
3274           || x->types[j].bitfield.class == SReg
3275           || x->types[j].bitfield.class == RegCR
3276           || x->types[j].bitfield.class == RegDR
3277           || x->types[j].bitfield.class == RegTR
3278           || x->types[j].bitfield.class == RegBND)
3279         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3280       if (operand_type_check (x->types[j], imm))
3281         pe (x->op[j].imms);
3282       if (operand_type_check (x->types[j], disp))
3283         pe (x->op[j].disps);
3284     }
3285 }
3286
3287 static void
3288 pte (insn_template *t)
3289 {
3290   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3291   static const char *const opc_spc[] = {
3292     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3293     "XOP08", "XOP09", "XOP0A",
3294   };
3295   unsigned int j;
3296
3297   fprintf (stdout, " %d operands ", t->operands);
3298   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3299     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3300   if (opc_spc[t->opcode_space])
3301     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3302   fprintf (stdout, "opcode %x ", t->base_opcode);
3303   if (t->extension_opcode != None)
3304     fprintf (stdout, "ext %x ", t->extension_opcode);
3305   if (t->opcode_modifier.d)
3306     fprintf (stdout, "D");
3307   if (t->opcode_modifier.w)
3308     fprintf (stdout, "W");
3309   fprintf (stdout, "\n");
3310   for (j = 0; j < t->operands; j++)
3311     {
3312       fprintf (stdout, "    #%d type ", j + 1);
3313       pt (t->operand_types[j]);
3314       fprintf (stdout, "\n");
3315     }
3316 }
3317
3318 static void
3319 pe (expressionS *e)
3320 {
3321   fprintf (stdout, "    operation     %d\n", e->X_op);
3322   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3323            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3324   if (e->X_add_symbol)
3325     {
3326       fprintf (stdout, "    add_symbol    ");
3327       ps (e->X_add_symbol);
3328       fprintf (stdout, "\n");
3329     }
3330   if (e->X_op_symbol)
3331     {
3332       fprintf (stdout, "    op_symbol    ");
3333       ps (e->X_op_symbol);
3334       fprintf (stdout, "\n");
3335     }
3336 }
3337
3338 static void
3339 ps (symbolS *s)
3340 {
3341   fprintf (stdout, "%s type %s%s",
3342            S_GET_NAME (s),
3343            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3344            segment_name (S_GET_SEGMENT (s)));
3345 }
3346
3347 static struct type_name
3348   {
3349     i386_operand_type mask;
3350     const char *name;
3351   }
3352 const type_names[] =
3353 {
3354   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3355   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3356   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3357   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3358   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3359   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3360   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3361   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3362   { { .bitfield = { .imm8 = 1 } }, "i8" },
3363   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3364   { { .bitfield = { .imm16 = 1 } }, "i16" },
3365   { { .bitfield = { .imm32 = 1 } }, "i32" },
3366   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3367   { { .bitfield = { .imm64 = 1 } }, "i64" },
3368   { { .bitfield = { .imm1 = 1 } }, "i1" },
3369   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3370   { { .bitfield = { .disp8 = 1 } }, "d8" },
3371   { { .bitfield = { .disp16 = 1 } }, "d16" },
3372   { { .bitfield = { .disp32 = 1 } }, "d32" },
3373   { { .bitfield = { .disp64 = 1 } }, "d64" },
3374   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3375   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3376   { { .bitfield = { .class = RegCR } }, "control reg" },
3377   { { .bitfield = { .class = RegTR } }, "test reg" },
3378   { { .bitfield = { .class = RegDR } }, "debug reg" },
3379   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3380   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3381   { { .bitfield = { .class = SReg } }, "SReg" },
3382   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3383   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3384   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3385   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3386   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3387   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3388 };
3389
3390 static void
3391 pt (i386_operand_type t)
3392 {
3393   unsigned int j;
3394   i386_operand_type a;
3395
3396   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3397     {
3398       a = operand_type_and (t, type_names[j].mask);
3399       if (operand_type_equal (&a, &type_names[j].mask))
3400         fprintf (stdout, "%s, ",  type_names[j].name);
3401     }
3402   fflush (stdout);
3403 }
3404
3405 #endif /* DEBUG386 */
3406 \f
3407 static bfd_reloc_code_real_type
3408 reloc (unsigned int size,
3409        int pcrel,
3410        int sign,
3411        bfd_reloc_code_real_type other)
3412 {
3413   if (other != NO_RELOC)
3414     {
3415       reloc_howto_type *rel;
3416
3417       if (size == 8)
3418         switch (other)
3419           {
3420           case BFD_RELOC_X86_64_GOT32:
3421             return BFD_RELOC_X86_64_GOT64;
3422             break;
3423           case BFD_RELOC_X86_64_GOTPLT64:
3424             return BFD_RELOC_X86_64_GOTPLT64;
3425             break;
3426           case BFD_RELOC_X86_64_PLTOFF64:
3427             return BFD_RELOC_X86_64_PLTOFF64;
3428             break;
3429           case BFD_RELOC_X86_64_GOTPC32:
3430             other = BFD_RELOC_X86_64_GOTPC64;
3431             break;
3432           case BFD_RELOC_X86_64_GOTPCREL:
3433             other = BFD_RELOC_X86_64_GOTPCREL64;
3434             break;
3435           case BFD_RELOC_X86_64_TPOFF32:
3436             other = BFD_RELOC_X86_64_TPOFF64;
3437             break;
3438           case BFD_RELOC_X86_64_DTPOFF32:
3439             other = BFD_RELOC_X86_64_DTPOFF64;
3440             break;
3441           default:
3442             break;
3443           }
3444
3445 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3446       if (other == BFD_RELOC_SIZE32)
3447         {
3448           if (size == 8)
3449             other = BFD_RELOC_SIZE64;
3450           if (pcrel)
3451             {
3452               as_bad (_("there are no pc-relative size relocations"));
3453               return NO_RELOC;
3454             }
3455         }
3456 #endif
3457
3458       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3459       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3460         sign = -1;
3461
3462       rel = bfd_reloc_type_lookup (stdoutput, other);
3463       if (!rel)
3464         as_bad (_("unknown relocation (%u)"), other);
3465       else if (size != bfd_get_reloc_size (rel))
3466         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3467                 bfd_get_reloc_size (rel),
3468                 size);
3469       else if (pcrel && !rel->pc_relative)
3470         as_bad (_("non-pc-relative relocation for pc-relative field"));
3471       else if ((rel->complain_on_overflow == complain_overflow_signed
3472                 && !sign)
3473                || (rel->complain_on_overflow == complain_overflow_unsigned
3474                    && sign > 0))
3475         as_bad (_("relocated field and relocation type differ in signedness"));
3476       else
3477         return other;
3478       return NO_RELOC;
3479     }
3480
3481   if (pcrel)
3482     {
3483       if (!sign)
3484         as_bad (_("there are no unsigned pc-relative relocations"));
3485       switch (size)
3486         {
3487         case 1: return BFD_RELOC_8_PCREL;
3488         case 2: return BFD_RELOC_16_PCREL;
3489         case 4: return BFD_RELOC_32_PCREL;
3490         case 8: return BFD_RELOC_64_PCREL;
3491         }
3492       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3493     }
3494   else
3495     {
3496       if (sign > 0)
3497         switch (size)
3498           {
3499           case 4: return BFD_RELOC_X86_64_32S;
3500           }
3501       else
3502         switch (size)
3503           {
3504           case 1: return BFD_RELOC_8;
3505           case 2: return BFD_RELOC_16;
3506           case 4: return BFD_RELOC_32;
3507           case 8: return BFD_RELOC_64;
3508           }
3509       as_bad (_("cannot do %s %u byte relocation"),
3510               sign > 0 ? "signed" : "unsigned", size);
3511     }
3512
3513   return NO_RELOC;
3514 }
3515
3516 /* Here we decide which fixups can be adjusted to make them relative to
3517    the beginning of the section instead of the symbol.  Basically we need
3518    to make sure that the dynamic relocations are done correctly, so in
3519    some cases we force the original symbol to be used.  */
3520
3521 int
3522 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3523 {
3524 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3525   if (!IS_ELF)
3526     return 1;
3527
3528   /* Don't adjust pc-relative references to merge sections in 64-bit
3529      mode.  */
3530   if (use_rela_relocations
3531       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3532       && fixP->fx_pcrel)
3533     return 0;
3534
3535   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3536      and changed later by validate_fix.  */
3537   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3538       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3539     return 0;
3540
3541   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3542      for size relocations.  */
3543   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3544       || fixP->fx_r_type == BFD_RELOC_SIZE64
3545       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3546       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3547       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3548       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3549       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3550       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3551       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3552       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3553       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3554       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3555       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3556       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3557       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3558       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3559       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3560       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3561       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3562       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3563       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3564       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3565       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3566       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3567       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3568       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3569       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3570       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3571       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3572       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3573       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3574     return 0;
3575 #endif
3576   return 1;
3577 }
3578
3579 static INLINE bool
3580 want_disp32 (const insn_template *t)
3581 {
3582   return flag_code != CODE_64BIT
3583          || i.prefix[ADDR_PREFIX]
3584          || (t->mnem_off == MN_lea
3585              && (!i.types[1].bitfield.qword
3586                 || t->opcode_modifier.size == SIZE32));
3587 }
3588
3589 static int
3590 intel_float_operand (const char *mnemonic)
3591 {
3592   /* Note that the value returned is meaningful only for opcodes with (memory)
3593      operands, hence the code here is free to improperly handle opcodes that
3594      have no operands (for better performance and smaller code). */
3595
3596   if (mnemonic[0] != 'f')
3597     return 0; /* non-math */
3598
3599   switch (mnemonic[1])
3600     {
3601     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3602        the fs segment override prefix not currently handled because no
3603        call path can make opcodes without operands get here */
3604     case 'i':
3605       return 2 /* integer op */;
3606     case 'l':
3607       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3608         return 3; /* fldcw/fldenv */
3609       break;
3610     case 'n':
3611       if (mnemonic[2] != 'o' /* fnop */)
3612         return 3; /* non-waiting control op */
3613       break;
3614     case 'r':
3615       if (mnemonic[2] == 's')
3616         return 3; /* frstor/frstpm */
3617       break;
3618     case 's':
3619       if (mnemonic[2] == 'a')
3620         return 3; /* fsave */
3621       if (mnemonic[2] == 't')
3622         {
3623           switch (mnemonic[3])
3624             {
3625             case 'c': /* fstcw */
3626             case 'd': /* fstdw */
3627             case 'e': /* fstenv */
3628             case 's': /* fsts[gw] */
3629               return 3;
3630             }
3631         }
3632       break;
3633     case 'x':
3634       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3635         return 0; /* fxsave/fxrstor are not really math ops */
3636       break;
3637     }
3638
3639   return 1;
3640 }
3641
3642 static INLINE void
3643 install_template (const insn_template *t)
3644 {
3645   unsigned int l;
3646
3647   i.tm = *t;
3648
3649   /* Note that for pseudo prefixes this produces a length of 1. But for them
3650      the length isn't interesting at all.  */
3651   for (l = 1; l < 4; ++l)
3652     if (!(t->base_opcode >> (8 * l)))
3653       break;
3654
3655   i.opcode_length = l;
3656 }
3657
3658 /* Build the VEX prefix.  */
3659
3660 static void
3661 build_vex_prefix (const insn_template *t)
3662 {
3663   unsigned int register_specifier;
3664   unsigned int vector_length;
3665   unsigned int w;
3666
3667   /* Check register specifier.  */
3668   if (i.vex.register_specifier)
3669     {
3670       register_specifier =
3671         ~register_number (i.vex.register_specifier) & 0xf;
3672       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3673     }
3674   else
3675     register_specifier = 0xf;
3676
3677   /* Use 2-byte VEX prefix by swapping destination and source operand
3678      if there are more than 1 register operand.  */
3679   if (i.reg_operands > 1
3680       && i.vec_encoding != vex_encoding_vex3
3681       && i.dir_encoding == dir_encoding_default
3682       && i.operands == i.reg_operands
3683       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3684       && i.tm.opcode_space == SPACE_0F
3685       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3686       && i.rex == REX_B)
3687     {
3688       unsigned int xchg;
3689
3690       swap_2_operands (0, i.operands - 1);
3691
3692       gas_assert (i.rm.mode == 3);
3693
3694       i.rex = REX_R;
3695       xchg = i.rm.regmem;
3696       i.rm.regmem = i.rm.reg;
3697       i.rm.reg = xchg;
3698
3699       if (i.tm.opcode_modifier.d)
3700         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3701                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3702       else /* Use the next insn.  */
3703         install_template (&t[1]);
3704     }
3705
3706   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3707      are no memory operands and at least 3 register ones.  */
3708   if (i.reg_operands >= 3
3709       && i.vec_encoding != vex_encoding_vex3
3710       && i.reg_operands == i.operands - i.imm_operands
3711       && i.tm.opcode_modifier.vex
3712       && i.tm.opcode_modifier.commutative
3713       && (i.tm.opcode_modifier.sse2avx
3714           || (optimize > 1 && !i.no_optimize))
3715       && i.rex == REX_B
3716       && i.vex.register_specifier
3717       && !(i.vex.register_specifier->reg_flags & RegRex))
3718     {
3719       unsigned int xchg = i.operands - i.reg_operands;
3720
3721       gas_assert (i.tm.opcode_space == SPACE_0F);
3722       gas_assert (!i.tm.opcode_modifier.sae);
3723       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3724                                       &i.types[i.operands - 3]));
3725       gas_assert (i.rm.mode == 3);
3726
3727       swap_2_operands (xchg, xchg + 1);
3728
3729       i.rex = 0;
3730       xchg = i.rm.regmem | 8;
3731       i.rm.regmem = ~register_specifier & 0xf;
3732       gas_assert (!(i.rm.regmem & 8));
3733       i.vex.register_specifier += xchg - i.rm.regmem;
3734       register_specifier = ~xchg & 0xf;
3735     }
3736
3737   if (i.tm.opcode_modifier.vex == VEXScalar)
3738     vector_length = avxscalar;
3739   else if (i.tm.opcode_modifier.vex == VEX256)
3740     vector_length = 1;
3741   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3742     vector_length = 0;
3743   else
3744     {
3745       unsigned int op;
3746
3747       /* Determine vector length from the last multi-length vector
3748          operand.  */
3749       vector_length = 0;
3750       for (op = t->operands; op--;)
3751         if (t->operand_types[op].bitfield.xmmword
3752             && t->operand_types[op].bitfield.ymmword
3753             && i.types[op].bitfield.ymmword)
3754           {
3755             vector_length = 1;
3756             break;
3757           }
3758     }
3759
3760   /* Check the REX.W bit and VEXW.  */
3761   if (i.tm.opcode_modifier.vexw == VEXWIG)
3762     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3763   else if (i.tm.opcode_modifier.vexw)
3764     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3765   else
3766     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3767
3768   /* Use 2-byte VEX prefix if possible.  */
3769   if (w == 0
3770       && i.vec_encoding != vex_encoding_vex3
3771       && i.tm.opcode_space == SPACE_0F
3772       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3773     {
3774       /* 2-byte VEX prefix.  */
3775       unsigned int r;
3776
3777       i.vex.length = 2;
3778       i.vex.bytes[0] = 0xc5;
3779
3780       /* Check the REX.R bit.  */
3781       r = (i.rex & REX_R) ? 0 : 1;
3782       i.vex.bytes[1] = (r << 7
3783                         | register_specifier << 3
3784                         | vector_length << 2
3785                         | i.tm.opcode_modifier.opcodeprefix);
3786     }
3787   else
3788     {
3789       /* 3-byte VEX prefix.  */
3790       i.vex.length = 3;
3791
3792       switch (i.tm.opcode_space)
3793         {
3794         case SPACE_0F:
3795         case SPACE_0F38:
3796         case SPACE_0F3A:
3797           i.vex.bytes[0] = 0xc4;
3798           break;
3799         case SPACE_XOP08:
3800         case SPACE_XOP09:
3801         case SPACE_XOP0A:
3802           i.vex.bytes[0] = 0x8f;
3803           break;
3804         default:
3805           abort ();
3806         }
3807
3808       /* The high 3 bits of the second VEX byte are 1's compliment
3809          of RXB bits from REX.  */
3810       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3811                        | (!dot_insn () ? i.tm.opcode_space
3812                                        : i.insn_opcode_space);
3813
3814       i.vex.bytes[2] = (w << 7
3815                         | register_specifier << 3
3816                         | vector_length << 2
3817                         | i.tm.opcode_modifier.opcodeprefix);
3818     }
3819 }
3820
3821 static INLINE bool
3822 is_evex_encoding (const insn_template *t)
3823 {
3824   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3825          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3826          || t->opcode_modifier.sae;
3827 }
3828
3829 static INLINE bool
3830 is_any_vex_encoding (const insn_template *t)
3831 {
3832   return t->opcode_modifier.vex || is_evex_encoding (t);
3833 }
3834
3835 static unsigned int
3836 get_broadcast_bytes (const insn_template *t, bool diag)
3837 {
3838   unsigned int op, bytes;
3839   const i386_operand_type *types;
3840
3841   if (i.broadcast.type)
3842     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3843
3844   gas_assert (intel_syntax);
3845
3846   for (op = 0; op < t->operands; ++op)
3847     if (t->operand_types[op].bitfield.baseindex)
3848       break;
3849
3850   gas_assert (op < t->operands);
3851
3852   if (t->opcode_modifier.evex
3853       && t->opcode_modifier.evex != EVEXDYN)
3854     switch (i.broadcast.bytes)
3855       {
3856       case 1:
3857         if (t->operand_types[op].bitfield.word)
3858           return 2;
3859       /* Fall through.  */
3860       case 2:
3861         if (t->operand_types[op].bitfield.dword)
3862           return 4;
3863       /* Fall through.  */
3864       case 4:
3865         if (t->operand_types[op].bitfield.qword)
3866           return 8;
3867       /* Fall through.  */
3868       case 8:
3869         if (t->operand_types[op].bitfield.xmmword)
3870           return 16;
3871         if (t->operand_types[op].bitfield.ymmword)
3872           return 32;
3873         if (t->operand_types[op].bitfield.zmmword)
3874           return 64;
3875       /* Fall through.  */
3876       default:
3877         abort ();
3878       }
3879
3880   gas_assert (op + 1 < t->operands);
3881
3882   if (t->operand_types[op + 1].bitfield.xmmword
3883       + t->operand_types[op + 1].bitfield.ymmword
3884       + t->operand_types[op + 1].bitfield.zmmword > 1)
3885     {
3886       types = &i.types[op + 1];
3887       diag = false;
3888     }
3889   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3890     types = &t->operand_types[op];
3891
3892   if (types->bitfield.zmmword)
3893     bytes = 64;
3894   else if (types->bitfield.ymmword)
3895     bytes = 32;
3896   else
3897     bytes = 16;
3898
3899   if (diag)
3900     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3901              insn_name (t), bytes * 8);
3902
3903   return bytes;
3904 }
3905
3906 /* Build the EVEX prefix.  */
3907
3908 static void
3909 build_evex_prefix (void)
3910 {
3911   unsigned int register_specifier, w;
3912   rex_byte vrex_used = 0;
3913
3914   /* Check register specifier.  */
3915   if (i.vex.register_specifier)
3916     {
3917       gas_assert ((i.vrex & REX_X) == 0);
3918
3919       register_specifier = i.vex.register_specifier->reg_num;
3920       if ((i.vex.register_specifier->reg_flags & RegRex))
3921         register_specifier += 8;
3922       /* The upper 16 registers are encoded in the fourth byte of the
3923          EVEX prefix.  */
3924       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3925         i.vex.bytes[3] = 0x8;
3926       register_specifier = ~register_specifier & 0xf;
3927     }
3928   else
3929     {
3930       register_specifier = 0xf;
3931
3932       /* Encode upper 16 vector index register in the fourth byte of
3933          the EVEX prefix.  */
3934       if (!(i.vrex & REX_X))
3935         i.vex.bytes[3] = 0x8;
3936       else
3937         vrex_used |= REX_X;
3938     }
3939
3940   /* 4 byte EVEX prefix.  */
3941   i.vex.length = 4;
3942   i.vex.bytes[0] = 0x62;
3943
3944   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3945      bits from REX.  */
3946   gas_assert (i.tm.opcode_space >= SPACE_0F);
3947   gas_assert (i.tm.opcode_space <= SPACE_EVEXMAP6);
3948   i.vex.bytes[1] = ((~i.rex & 7) << 5)
3949                    | (!dot_insn () ? i.tm.opcode_space
3950                                    : i.insn_opcode_space);
3951
3952   /* The fifth bit of the second EVEX byte is 1's compliment of the
3953      REX_R bit in VREX.  */
3954   if (!(i.vrex & REX_R))
3955     i.vex.bytes[1] |= 0x10;
3956   else
3957     vrex_used |= REX_R;
3958
3959   if ((i.reg_operands + i.imm_operands) == i.operands)
3960     {
3961       /* When all operands are registers, the REX_X bit in REX is not
3962          used.  We reuse it to encode the upper 16 registers, which is
3963          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3964          as 1's compliment.  */
3965       if ((i.vrex & REX_B))
3966         {
3967           vrex_used |= REX_B;
3968           i.vex.bytes[1] &= ~0x40;
3969         }
3970     }
3971
3972   /* EVEX instructions shouldn't need the REX prefix.  */
3973   i.vrex &= ~vrex_used;
3974   gas_assert (i.vrex == 0);
3975
3976   /* Check the REX.W bit and VEXW.  */
3977   if (i.tm.opcode_modifier.vexw == VEXWIG)
3978     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
3979   else if (i.tm.opcode_modifier.vexw)
3980     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3981   else
3982     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
3983
3984   /* The third byte of the EVEX prefix.  */
3985   i.vex.bytes[2] = ((w << 7)
3986                     | (register_specifier << 3)
3987                     | 4 /* Encode the U bit.  */
3988                     | i.tm.opcode_modifier.opcodeprefix);
3989
3990   /* The fourth byte of the EVEX prefix.  */
3991   /* The zeroing-masking bit.  */
3992   if (i.mask.reg && i.mask.zeroing)
3993     i.vex.bytes[3] |= 0x80;
3994
3995   /* Don't always set the broadcast bit if there is no RC.  */
3996   if (i.rounding.type == rc_none)
3997     {
3998       /* Encode the vector length.  */
3999       unsigned int vec_length;
4000
4001       if (!i.tm.opcode_modifier.evex
4002           || i.tm.opcode_modifier.evex == EVEXDYN)
4003         {
4004           unsigned int op;
4005
4006           /* Determine vector length from the last multi-length vector
4007              operand.  */
4008           for (op = i.operands; op--;)
4009             if (i.tm.operand_types[op].bitfield.xmmword
4010                 + i.tm.operand_types[op].bitfield.ymmword
4011                 + i.tm.operand_types[op].bitfield.zmmword > 1)
4012               {
4013                 if (i.types[op].bitfield.zmmword)
4014                   {
4015                     i.tm.opcode_modifier.evex = EVEX512;
4016                     break;
4017                   }
4018                 else if (i.types[op].bitfield.ymmword)
4019                   {
4020                     i.tm.opcode_modifier.evex = EVEX256;
4021                     break;
4022                   }
4023                 else if (i.types[op].bitfield.xmmword)
4024                   {
4025                     i.tm.opcode_modifier.evex = EVEX128;
4026                     break;
4027                   }
4028                 else if ((i.broadcast.type || i.broadcast.bytes)
4029                          && op == i.broadcast.operand)
4030                   {
4031                     switch (get_broadcast_bytes (&i.tm, true))
4032                       {
4033                         case 64:
4034                           i.tm.opcode_modifier.evex = EVEX512;
4035                           break;
4036                         case 32:
4037                           i.tm.opcode_modifier.evex = EVEX256;
4038                           break;
4039                         case 16:
4040                           i.tm.opcode_modifier.evex = EVEX128;
4041                           break;
4042                         default:
4043                           abort ();
4044                       }
4045                     break;
4046                   }
4047               }
4048
4049           if (op >= MAX_OPERANDS)
4050             abort ();
4051         }
4052
4053       switch (i.tm.opcode_modifier.evex)
4054         {
4055         case EVEXLIG: /* LL' is ignored */
4056           vec_length = evexlig << 5;
4057           break;
4058         case EVEX128:
4059           vec_length = 0 << 5;
4060           break;
4061         case EVEX256:
4062           vec_length = 1 << 5;
4063           break;
4064         case EVEX512:
4065           vec_length = 2 << 5;
4066           break;
4067         case EVEX_L3:
4068           if (dot_insn ())
4069             {
4070               vec_length = 3 << 5;
4071               break;
4072             }
4073           /* Fall through.  */
4074         default:
4075           abort ();
4076           break;
4077         }
4078       i.vex.bytes[3] |= vec_length;
4079       /* Encode the broadcast bit.  */
4080       if (i.broadcast.type || i.broadcast.bytes)
4081         i.vex.bytes[3] |= 0x10;
4082     }
4083   else if (i.rounding.type != saeonly)
4084     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4085   else
4086     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4087
4088   if (i.mask.reg)
4089     i.vex.bytes[3] |= i.mask.reg->reg_num;
4090 }
4091
4092 static void
4093 process_immext (void)
4094 {
4095   expressionS *exp;
4096
4097   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4098      which is coded in the same place as an 8-bit immediate field
4099      would be.  Here we fake an 8-bit immediate operand from the
4100      opcode suffix stored in tm.extension_opcode.
4101
4102      AVX instructions also use this encoding, for some of
4103      3 argument instructions.  */
4104
4105   gas_assert (i.imm_operands <= 1
4106               && (i.operands <= 2
4107                   || (is_any_vex_encoding (&i.tm)
4108                       && i.operands <= 4)));
4109
4110   exp = &im_expressions[i.imm_operands++];
4111   i.op[i.operands].imms = exp;
4112   i.types[i.operands].bitfield.imm8 = 1;
4113   i.operands++;
4114   exp->X_op = O_constant;
4115   exp->X_add_number = i.tm.extension_opcode;
4116   i.tm.extension_opcode = None;
4117 }
4118
4119
4120 static int
4121 check_hle (void)
4122 {
4123   switch (i.tm.opcode_modifier.prefixok)
4124     {
4125     default:
4126       abort ();
4127     case PrefixLock:
4128     case PrefixNone:
4129     case PrefixNoTrack:
4130     case PrefixRep:
4131       as_bad (_("invalid instruction `%s' after `%s'"),
4132               insn_name (&i.tm), i.hle_prefix);
4133       return 0;
4134     case PrefixHLELock:
4135       if (i.prefix[LOCK_PREFIX])
4136         return 1;
4137       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4138       return 0;
4139     case PrefixHLEAny:
4140       return 1;
4141     case PrefixHLERelease:
4142       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4143         {
4144           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4145                   insn_name (&i.tm));
4146           return 0;
4147         }
4148       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4149         {
4150           as_bad (_("memory destination needed for instruction `%s'"
4151                     " after `xrelease'"), insn_name (&i.tm));
4152           return 0;
4153         }
4154       return 1;
4155     }
4156 }
4157
4158 /* Encode aligned vector move as unaligned vector move.  */
4159
4160 static void
4161 encode_with_unaligned_vector_move (void)
4162 {
4163   switch (i.tm.base_opcode)
4164     {
4165     case 0x28:  /* Load instructions.  */
4166     case 0x29:  /* Store instructions.  */
4167       /* movaps/movapd/vmovaps/vmovapd.  */
4168       if (i.tm.opcode_space == SPACE_0F
4169           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4170         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4171       break;
4172     case 0x6f:  /* Load instructions.  */
4173     case 0x7f:  /* Store instructions.  */
4174       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4175       if (i.tm.opcode_space == SPACE_0F
4176           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4177         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4178       break;
4179     default:
4180       break;
4181     }
4182 }
4183
4184 /* Try the shortest encoding by shortening operand size.  */
4185
4186 static void
4187 optimize_encoding (void)
4188 {
4189   unsigned int j;
4190
4191   if (i.tm.mnem_off == MN_lea)
4192     {
4193       /* Optimize: -O:
4194            lea symbol, %rN    -> mov $symbol, %rN
4195            lea (%rM), %rN     -> mov %rM, %rN
4196            lea (,%rM,1), %rN  -> mov %rM, %rN
4197
4198            and in 32-bit mode for 16-bit addressing
4199
4200            lea (%rM), %rN     -> movzx %rM, %rN
4201
4202            and in 64-bit mode zap 32-bit addressing in favor of using a
4203            32-bit (or less) destination.
4204        */
4205       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4206         {
4207           if (!i.op[1].regs->reg_type.bitfield.word)
4208             i.tm.opcode_modifier.size = SIZE32;
4209           i.prefix[ADDR_PREFIX] = 0;
4210         }
4211
4212       if (!i.index_reg && !i.base_reg)
4213         {
4214           /* Handle:
4215                lea symbol, %rN    -> mov $symbol, %rN
4216            */
4217           if (flag_code == CODE_64BIT)
4218             {
4219               /* Don't transform a relocation to a 16-bit one.  */
4220               if (i.op[0].disps
4221                   && i.op[0].disps->X_op != O_constant
4222                   && i.op[1].regs->reg_type.bitfield.word)
4223                 return;
4224
4225               if (!i.op[1].regs->reg_type.bitfield.qword
4226                   || i.tm.opcode_modifier.size == SIZE32)
4227                 {
4228                   i.tm.base_opcode = 0xb8;
4229                   i.tm.opcode_modifier.modrm = 0;
4230                   if (!i.op[1].regs->reg_type.bitfield.word)
4231                     i.types[0].bitfield.imm32 = 1;
4232                   else
4233                     {
4234                       i.tm.opcode_modifier.size = SIZE16;
4235                       i.types[0].bitfield.imm16 = 1;
4236                     }
4237                 }
4238               else
4239                 {
4240                   /* Subject to further optimization below.  */
4241                   i.tm.base_opcode = 0xc7;
4242                   i.tm.extension_opcode = 0;
4243                   i.types[0].bitfield.imm32s = 1;
4244                   i.types[0].bitfield.baseindex = 0;
4245                 }
4246             }
4247           /* Outside of 64-bit mode address and operand sizes have to match if
4248              a relocation is involved, as otherwise we wouldn't (currently) or
4249              even couldn't express the relocation correctly.  */
4250           else if (i.op[0].disps
4251                    && i.op[0].disps->X_op != O_constant
4252                    && ((!i.prefix[ADDR_PREFIX])
4253                        != (flag_code == CODE_32BIT
4254                            ? i.op[1].regs->reg_type.bitfield.dword
4255                            : i.op[1].regs->reg_type.bitfield.word)))
4256             return;
4257           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4258              destination is going to grow encoding size.  */
4259           else if (flag_code == CODE_16BIT
4260                    && (optimize <= 1 || optimize_for_space)
4261                    && !i.prefix[ADDR_PREFIX]
4262                    && i.op[1].regs->reg_type.bitfield.dword)
4263             return;
4264           else
4265             {
4266               i.tm.base_opcode = 0xb8;
4267               i.tm.opcode_modifier.modrm = 0;
4268               if (i.op[1].regs->reg_type.bitfield.dword)
4269                 i.types[0].bitfield.imm32 = 1;
4270               else
4271                 i.types[0].bitfield.imm16 = 1;
4272
4273               if (i.op[0].disps
4274                   && i.op[0].disps->X_op == O_constant
4275                   && i.op[1].regs->reg_type.bitfield.dword
4276                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4277                      GCC 5. */
4278                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4279                 i.op[0].disps->X_add_number &= 0xffff;
4280             }
4281
4282           i.tm.operand_types[0] = i.types[0];
4283           i.imm_operands = 1;
4284           if (!i.op[0].imms)
4285             {
4286               i.op[0].imms = &im_expressions[0];
4287               i.op[0].imms->X_op = O_absent;
4288             }
4289         }
4290       else if (i.op[0].disps
4291                   && (i.op[0].disps->X_op != O_constant
4292                       || i.op[0].disps->X_add_number))
4293         return;
4294       else
4295         {
4296           /* Handle:
4297                lea (%rM), %rN     -> mov %rM, %rN
4298                lea (,%rM,1), %rN  -> mov %rM, %rN
4299                lea (%rM), %rN     -> movzx %rM, %rN
4300            */
4301           const reg_entry *addr_reg;
4302
4303           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4304             addr_reg = i.base_reg;
4305           else if (!i.base_reg
4306                    && i.index_reg->reg_num != RegIZ
4307                    && !i.log2_scale_factor)
4308             addr_reg = i.index_reg;
4309           else
4310             return;
4311
4312           if (addr_reg->reg_type.bitfield.word
4313               && i.op[1].regs->reg_type.bitfield.dword)
4314             {
4315               if (flag_code != CODE_32BIT)
4316                 return;
4317               i.tm.opcode_space = SPACE_0F;
4318               i.tm.base_opcode = 0xb7;
4319             }
4320           else
4321             i.tm.base_opcode = 0x8b;
4322
4323           if (addr_reg->reg_type.bitfield.dword
4324               && i.op[1].regs->reg_type.bitfield.qword)
4325             i.tm.opcode_modifier.size = SIZE32;
4326
4327           i.op[0].regs = addr_reg;
4328           i.reg_operands = 2;
4329         }
4330
4331       i.mem_operands = 0;
4332       i.disp_operands = 0;
4333       i.prefix[ADDR_PREFIX] = 0;
4334       i.prefix[SEG_PREFIX] = 0;
4335       i.seg[0] = NULL;
4336     }
4337
4338   if (optimize_for_space
4339       && i.tm.mnem_off == MN_test
4340       && i.reg_operands == 1
4341       && i.imm_operands == 1
4342       && !i.types[1].bitfield.byte
4343       && i.op[0].imms->X_op == O_constant
4344       && fits_in_imm7 (i.op[0].imms->X_add_number))
4345     {
4346       /* Optimize: -Os:
4347            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4348        */
4349       unsigned int base_regnum = i.op[1].regs->reg_num;
4350       if (flag_code == CODE_64BIT || base_regnum < 4)
4351         {
4352           i.types[1].bitfield.byte = 1;
4353           /* Ignore the suffix.  */
4354           i.suffix = 0;
4355           /* Convert to byte registers.  */
4356           if (i.types[1].bitfield.word)
4357             j = 16;
4358           else if (i.types[1].bitfield.dword)
4359             j = 32;
4360           else
4361             j = 48;
4362           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4363             j += 8;
4364           i.op[1].regs -= j;
4365         }
4366     }
4367   else if (flag_code == CODE_64BIT
4368            && i.tm.opcode_space == SPACE_BASE
4369            && ((i.types[1].bitfield.qword
4370                 && i.reg_operands == 1
4371                 && i.imm_operands == 1
4372                 && i.op[0].imms->X_op == O_constant
4373                 && ((i.tm.base_opcode == 0xb8
4374                      && i.tm.extension_opcode == None
4375                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4376                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4377                         && (i.tm.base_opcode == 0x24
4378                             || (i.tm.base_opcode == 0x80
4379                                 && i.tm.extension_opcode == 0x4)
4380                             || i.tm.mnem_off == MN_test
4381                             || ((i.tm.base_opcode | 1) == 0xc7
4382                                 && i.tm.extension_opcode == 0x0)))
4383                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4384                         && i.tm.base_opcode == 0x83
4385                         && i.tm.extension_opcode == 0x4)))
4386                || (i.types[0].bitfield.qword
4387                    && ((i.reg_operands == 2
4388                         && i.op[0].regs == i.op[1].regs
4389                         && (i.tm.mnem_off == MN_xor
4390                             || i.tm.mnem_off == MN_sub))
4391                        || i.tm.mnem_off == MN_clr))))
4392     {
4393       /* Optimize: -O:
4394            andq $imm31, %r64   -> andl $imm31, %r32
4395            andq $imm7, %r64    -> andl $imm7, %r32
4396            testq $imm31, %r64  -> testl $imm31, %r32
4397            xorq %r64, %r64     -> xorl %r32, %r32
4398            subq %r64, %r64     -> subl %r32, %r32
4399            movq $imm31, %r64   -> movl $imm31, %r32
4400            movq $imm32, %r64   -> movl $imm32, %r32
4401         */
4402       i.tm.opcode_modifier.size = SIZE32;
4403       if (i.imm_operands)
4404         {
4405           i.types[0].bitfield.imm32 = 1;
4406           i.types[0].bitfield.imm32s = 0;
4407           i.types[0].bitfield.imm64 = 0;
4408         }
4409       else
4410         {
4411           i.types[0].bitfield.dword = 1;
4412           i.types[0].bitfield.qword = 0;
4413         }
4414       i.types[1].bitfield.dword = 1;
4415       i.types[1].bitfield.qword = 0;
4416       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4417         {
4418           /* Handle
4419                movq $imm31, %r64   -> movl $imm31, %r32
4420                movq $imm32, %r64   -> movl $imm32, %r32
4421            */
4422           i.tm.operand_types[0].bitfield.imm32 = 1;
4423           i.tm.operand_types[0].bitfield.imm32s = 0;
4424           i.tm.operand_types[0].bitfield.imm64 = 0;
4425           if ((i.tm.base_opcode | 1) == 0xc7)
4426             {
4427               /* Handle
4428                    movq $imm31, %r64   -> movl $imm31, %r32
4429                */
4430               i.tm.base_opcode = 0xb8;
4431               i.tm.extension_opcode = None;
4432               i.tm.opcode_modifier.w = 0;
4433               i.tm.opcode_modifier.modrm = 0;
4434             }
4435         }
4436     }
4437   else if (optimize > 1
4438            && !optimize_for_space
4439            && i.reg_operands == 2
4440            && i.op[0].regs == i.op[1].regs
4441            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4442            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4443     {
4444       /* Optimize: -O2:
4445            andb %rN, %rN  -> testb %rN, %rN
4446            andw %rN, %rN  -> testw %rN, %rN
4447            andq %rN, %rN  -> testq %rN, %rN
4448            orb %rN, %rN   -> testb %rN, %rN
4449            orw %rN, %rN   -> testw %rN, %rN
4450            orq %rN, %rN   -> testq %rN, %rN
4451
4452            and outside of 64-bit mode
4453
4454            andl %rN, %rN  -> testl %rN, %rN
4455            orl %rN, %rN   -> testl %rN, %rN
4456        */
4457       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4458     }
4459   else if (i.tm.base_opcode == 0xba
4460            && i.tm.opcode_space == SPACE_0F
4461            && i.reg_operands == 1
4462            && i.op[0].imms->X_op == O_constant
4463            && i.op[0].imms->X_add_number >= 0)
4464     {
4465       /* Optimize: -O:
4466            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4467            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4468            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4469
4470            With <BT> one of bts, btr, and bts also:
4471            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4472            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4473        */
4474       switch (flag_code)
4475         {
4476         case CODE_64BIT:
4477           if (i.tm.extension_opcode != 4)
4478             break;
4479           if (i.types[1].bitfield.qword
4480               && i.op[0].imms->X_add_number < 32
4481               && !(i.op[1].regs->reg_flags & RegRex))
4482             i.tm.opcode_modifier.size = SIZE32;
4483           /* Fall through.  */
4484         case CODE_32BIT:
4485           if (i.types[1].bitfield.word
4486               && i.op[0].imms->X_add_number < 16)
4487             i.tm.opcode_modifier.size = SIZE32;
4488           break;
4489         case CODE_16BIT:
4490           if (i.op[0].imms->X_add_number < 16)
4491             i.tm.opcode_modifier.size = SIZE16;
4492           break;
4493         }
4494     }
4495   else if (i.reg_operands == 3
4496            && i.op[0].regs == i.op[1].regs
4497            && !i.types[2].bitfield.xmmword
4498            && (i.tm.opcode_modifier.vex
4499                || ((!i.mask.reg || i.mask.zeroing)
4500                    && is_evex_encoding (&i.tm)
4501                    && (i.vec_encoding != vex_encoding_evex
4502                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4503                        || is_cpu (&i.tm, CpuAVX512VL)
4504                        || (i.tm.operand_types[2].bitfield.zmmword
4505                            && i.types[2].bitfield.ymmword))))
4506            && i.tm.opcode_space == SPACE_0F
4507            && ((i.tm.base_opcode | 2) == 0x57
4508                || i.tm.base_opcode == 0xdf
4509                || i.tm.base_opcode == 0xef
4510                || (i.tm.base_opcode | 3) == 0xfb
4511                || i.tm.base_opcode == 0x42
4512                || i.tm.base_opcode == 0x47))
4513     {
4514       /* Optimize: -O1:
4515            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4516            vpsubq and vpsubw:
4517              EVEX VOP %zmmM, %zmmM, %zmmN
4518                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4519                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4520              EVEX VOP %ymmM, %ymmM, %ymmN
4521                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4522                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4523              VEX VOP %ymmM, %ymmM, %ymmN
4524                -> VEX VOP %xmmM, %xmmM, %xmmN
4525            VOP, one of vpandn and vpxor:
4526              VEX VOP %ymmM, %ymmM, %ymmN
4527                -> VEX VOP %xmmM, %xmmM, %xmmN
4528            VOP, one of vpandnd and vpandnq:
4529              EVEX VOP %zmmM, %zmmM, %zmmN
4530                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4531                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4532              EVEX VOP %ymmM, %ymmM, %ymmN
4533                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4534                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4535            VOP, one of vpxord and vpxorq:
4536              EVEX VOP %zmmM, %zmmM, %zmmN
4537                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4538                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4539              EVEX VOP %ymmM, %ymmM, %ymmN
4540                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4541                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4542            VOP, one of kxord and kxorq:
4543              VEX VOP %kM, %kM, %kN
4544                -> VEX kxorw %kM, %kM, %kN
4545            VOP, one of kandnd and kandnq:
4546              VEX VOP %kM, %kM, %kN
4547                -> VEX kandnw %kM, %kM, %kN
4548        */
4549       if (is_evex_encoding (&i.tm))
4550         {
4551           if (i.vec_encoding != vex_encoding_evex)
4552             {
4553               i.tm.opcode_modifier.vex = VEX128;
4554               i.tm.opcode_modifier.vexw = VEXW0;
4555               i.tm.opcode_modifier.evex = 0;
4556             }
4557           else if (optimize > 1)
4558             i.tm.opcode_modifier.evex = EVEX128;
4559           else
4560             return;
4561         }
4562       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4563         {
4564           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4565           i.tm.opcode_modifier.vexw = VEXW0;
4566         }
4567       else
4568         i.tm.opcode_modifier.vex = VEX128;
4569
4570       if (i.tm.opcode_modifier.vex)
4571         for (j = 0; j < 3; j++)
4572           {
4573             i.types[j].bitfield.xmmword = 1;
4574             i.types[j].bitfield.ymmword = 0;
4575           }
4576     }
4577   else if (i.vec_encoding != vex_encoding_evex
4578            && !i.types[0].bitfield.zmmword
4579            && !i.types[1].bitfield.zmmword
4580            && !i.mask.reg
4581            && !i.broadcast.type
4582            && !i.broadcast.bytes
4583            && is_evex_encoding (&i.tm)
4584            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4585                || (i.tm.base_opcode & ~4) == 0xdb
4586                || (i.tm.base_opcode & ~4) == 0xeb)
4587            && i.tm.extension_opcode == None)
4588     {
4589       /* Optimize: -O1:
4590            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4591            vmovdqu32 and vmovdqu64:
4592              EVEX VOP %xmmM, %xmmN
4593                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4594              EVEX VOP %ymmM, %ymmN
4595                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4596              EVEX VOP %xmmM, mem
4597                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4598              EVEX VOP %ymmM, mem
4599                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4600              EVEX VOP mem, %xmmN
4601                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4602              EVEX VOP mem, %ymmN
4603                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4604            VOP, one of vpand, vpandn, vpor, vpxor:
4605              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4606                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4607              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4608                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4609              EVEX VOP{d,q} mem, %xmmM, %xmmN
4610                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4611              EVEX VOP{d,q} mem, %ymmM, %ymmN
4612                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4613        */
4614       for (j = 0; j < i.operands; j++)
4615         if (operand_type_check (i.types[j], disp)
4616             && i.op[j].disps->X_op == O_constant)
4617           {
4618             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4619                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4620                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4621             int evex_disp8, vex_disp8;
4622             unsigned int memshift = i.memshift;
4623             offsetT n = i.op[j].disps->X_add_number;
4624
4625             evex_disp8 = fits_in_disp8 (n);
4626             i.memshift = 0;
4627             vex_disp8 = fits_in_disp8 (n);
4628             if (evex_disp8 != vex_disp8)
4629               {
4630                 i.memshift = memshift;
4631                 return;
4632               }
4633
4634             i.types[j].bitfield.disp8 = vex_disp8;
4635             break;
4636           }
4637       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4638           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4639         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4640       i.tm.opcode_modifier.vex
4641         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4642       i.tm.opcode_modifier.vexw = VEXW0;
4643       /* VPAND, VPOR, and VPXOR are commutative.  */
4644       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4645         i.tm.opcode_modifier.commutative = 1;
4646       i.tm.opcode_modifier.evex = 0;
4647       i.tm.opcode_modifier.masking = 0;
4648       i.tm.opcode_modifier.broadcast = 0;
4649       i.tm.opcode_modifier.disp8memshift = 0;
4650       i.memshift = 0;
4651       if (j < i.operands)
4652         i.types[j].bitfield.disp8
4653           = fits_in_disp8 (i.op[j].disps->X_add_number);
4654     }
4655   else if (optimize_for_space
4656            && i.tm.base_opcode == 0x29
4657            && i.tm.opcode_space == SPACE_0F38
4658            && i.operands == i.reg_operands
4659            && i.op[0].regs == i.op[1].regs
4660            && (!i.tm.opcode_modifier.vex
4661                || !(i.op[0].regs->reg_flags & RegRex))
4662            && !is_evex_encoding (&i.tm))
4663     {
4664       /* Optimize: -Os:
4665          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
4666          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
4667          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
4668        */
4669       i.tm.opcode_space = SPACE_0F;
4670       i.tm.base_opcode = 0x76;
4671     }
4672   else if (((i.tm.base_opcode >= 0x64
4673              && i.tm.base_opcode <= 0x66
4674              && i.tm.opcode_space == SPACE_0F)
4675             || (i.tm.base_opcode == 0x37
4676                 && i.tm.opcode_space == SPACE_0F38))
4677            && i.operands == i.reg_operands
4678            && i.op[0].regs == i.op[1].regs
4679            && !is_evex_encoding (&i.tm))
4680     {
4681       /* Optimize: -O:
4682          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
4683          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
4684          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
4685          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
4686          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
4687          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
4688        */
4689       i.tm.opcode_space = SPACE_0F;
4690       i.tm.base_opcode = 0xef;
4691       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
4692         {
4693           if (i.operands == 2)
4694             {
4695               gas_assert (i.tm.opcode_modifier.sse2avx);
4696
4697               i.operands = 3;
4698               i.reg_operands = 3;
4699               i.tm.operands = 3;
4700
4701               i.op[2].regs = i.op[0].regs;
4702               i.types[2] = i.types[0];
4703               i.flags[2] = i.flags[0];
4704               i.tm.operand_types[2] = i.tm.operand_types[0];
4705
4706               i.tm.opcode_modifier.sse2avx = 0;
4707             }
4708           i.op[0].regs -= i.op[0].regs->reg_num + 8;
4709           i.op[1].regs = i.op[0].regs;
4710         }
4711     }
4712   else if (optimize_for_space
4713            && i.tm.base_opcode == 0x59
4714            && i.tm.opcode_space == SPACE_0F38
4715            && i.operands == i.reg_operands
4716            && i.tm.opcode_modifier.vex
4717            && !(i.op[0].regs->reg_flags & RegRex)
4718            && i.op[0].regs->reg_type.bitfield.xmmword
4719            && i.vec_encoding != vex_encoding_vex3)
4720     {
4721       /* Optimize: -Os:
4722          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
4723        */
4724       i.tm.opcode_space = SPACE_0F;
4725       i.tm.base_opcode = 0x6c;
4726       i.tm.opcode_modifier.vexvvvv = 1;
4727
4728       ++i.operands;
4729       ++i.reg_operands;
4730       ++i.tm.operands;
4731
4732       i.op[2].regs = i.op[0].regs;
4733       i.types[2] = i.types[0];
4734       i.flags[2] = i.flags[0];
4735       i.tm.operand_types[2] = i.tm.operand_types[0];
4736
4737       swap_2_operands (1, 2);
4738     }
4739 }
4740
4741 /* Return non-zero for load instruction.  */
4742
4743 static int
4744 load_insn_p (void)
4745 {
4746   unsigned int dest;
4747   int any_vex_p = is_any_vex_encoding (&i.tm);
4748   unsigned int base_opcode = i.tm.base_opcode | 1;
4749
4750   if (!any_vex_p)
4751     {
4752       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4753          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4754       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4755         return 0;
4756
4757       /* pop.   */
4758       if (i.tm.mnem_off == MN_pop)
4759         return 1;
4760     }
4761
4762   if (i.tm.opcode_space == SPACE_BASE)
4763     {
4764       /* popf, popa.   */
4765       if (i.tm.base_opcode == 0x9d
4766           || i.tm.base_opcode == 0x61)
4767         return 1;
4768
4769       /* movs, cmps, lods, scas.  */
4770       if ((i.tm.base_opcode | 0xb) == 0xaf)
4771         return 1;
4772
4773       /* outs, xlatb.  */
4774       if (base_opcode == 0x6f
4775           || i.tm.base_opcode == 0xd7)
4776         return 1;
4777       /* NB: For AMD-specific insns with implicit memory operands,
4778          they're intentionally not covered.  */
4779     }
4780
4781   /* No memory operand.  */
4782   if (!i.mem_operands)
4783     return 0;
4784
4785   if (any_vex_p)
4786     {
4787       if (i.tm.mnem_off == MN_vldmxcsr)
4788         return 1;
4789     }
4790   else if (i.tm.opcode_space == SPACE_BASE)
4791     {
4792       /* test, not, neg, mul, imul, div, idiv.  */
4793       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
4794         return 1;
4795
4796       /* inc, dec.  */
4797       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4798         return 1;
4799
4800       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4801       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4802         return 1;
4803
4804       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4805       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
4806           && i.tm.extension_opcode != 6)
4807         return 1;
4808
4809       /* Check for x87 instructions.  */
4810       if ((base_opcode | 6) == 0xdf)
4811         {
4812           /* Skip fst, fstp, fstenv, fstcw.  */
4813           if (i.tm.base_opcode == 0xd9
4814               && (i.tm.extension_opcode == 2
4815                   || i.tm.extension_opcode == 3
4816                   || i.tm.extension_opcode == 6
4817                   || i.tm.extension_opcode == 7))
4818             return 0;
4819
4820           /* Skip fisttp, fist, fistp, fstp.  */
4821           if (i.tm.base_opcode == 0xdb
4822               && (i.tm.extension_opcode == 1
4823                   || i.tm.extension_opcode == 2
4824                   || i.tm.extension_opcode == 3
4825                   || i.tm.extension_opcode == 7))
4826             return 0;
4827
4828           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4829           if (i.tm.base_opcode == 0xdd
4830               && (i.tm.extension_opcode == 1
4831                   || i.tm.extension_opcode == 2
4832                   || i.tm.extension_opcode == 3
4833                   || i.tm.extension_opcode == 6
4834                   || i.tm.extension_opcode == 7))
4835             return 0;
4836
4837           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4838           if (i.tm.base_opcode == 0xdf
4839               && (i.tm.extension_opcode == 1
4840                   || i.tm.extension_opcode == 2
4841                   || i.tm.extension_opcode == 3
4842                   || i.tm.extension_opcode == 6
4843                   || i.tm.extension_opcode == 7))
4844             return 0;
4845
4846           return 1;
4847         }
4848     }
4849   else if (i.tm.opcode_space == SPACE_0F)
4850     {
4851       /* bt, bts, btr, btc.  */
4852       if (i.tm.base_opcode == 0xba
4853           && (i.tm.extension_opcode | 3) == 7)
4854         return 1;
4855
4856       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4857       if (i.tm.base_opcode == 0xc7
4858           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4859           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4860               || i.tm.extension_opcode == 6))
4861         return 1;
4862
4863       /* fxrstor, ldmxcsr, xrstor.  */
4864       if (i.tm.base_opcode == 0xae
4865           && (i.tm.extension_opcode == 1
4866               || i.tm.extension_opcode == 2
4867               || i.tm.extension_opcode == 5))
4868         return 1;
4869
4870       /* lgdt, lidt, lmsw.  */
4871       if (i.tm.base_opcode == 0x01
4872           && (i.tm.extension_opcode == 2
4873               || i.tm.extension_opcode == 3
4874               || i.tm.extension_opcode == 6))
4875         return 1;
4876     }
4877
4878   dest = i.operands - 1;
4879
4880   /* Check fake imm8 operand and 3 source operands.  */
4881   if ((i.tm.opcode_modifier.immext
4882        || i.reg_operands + i.mem_operands == 4)
4883       && i.types[dest].bitfield.imm8)
4884     dest--;
4885
4886   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4887   if (i.tm.opcode_space == SPACE_BASE
4888       && ((base_opcode | 0x38) == 0x39
4889           || (base_opcode | 2) == 0x87))
4890     return 1;
4891
4892   if (i.tm.mnem_off == MN_xadd)
4893     return 1;
4894
4895   /* Check for load instruction.  */
4896   return (i.types[dest].bitfield.class != ClassNone
4897           || i.types[dest].bitfield.instance == Accum);
4898 }
4899
4900 /* Output lfence, 0xfaee8, after instruction.  */
4901
4902 static void
4903 insert_lfence_after (void)
4904 {
4905   if (lfence_after_load && load_insn_p ())
4906     {
4907       /* There are also two REP string instructions that require
4908          special treatment. Specifically, the compare string (CMPS)
4909          and scan string (SCAS) instructions set EFLAGS in a manner
4910          that depends on the data being compared/scanned. When used
4911          with a REP prefix, the number of iterations may therefore
4912          vary depending on this data. If the data is a program secret
4913          chosen by the adversary using an LVI method,
4914          then this data-dependent behavior may leak some aspect
4915          of the secret.  */
4916       if (((i.tm.base_opcode | 0x9) == 0xaf)
4917           && i.prefix[REP_PREFIX])
4918         {
4919             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4920                      insn_name (&i.tm));
4921         }
4922       char *p = frag_more (3);
4923       *p++ = 0xf;
4924       *p++ = 0xae;
4925       *p = 0xe8;
4926     }
4927 }
4928
4929 /* Output lfence, 0xfaee8, before instruction.  */
4930
4931 static void
4932 insert_lfence_before (void)
4933 {
4934   char *p;
4935
4936   if (i.tm.opcode_space != SPACE_BASE)
4937     return;
4938
4939   if (i.tm.base_opcode == 0xff
4940       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4941     {
4942       /* Insert lfence before indirect branch if needed.  */
4943
4944       if (lfence_before_indirect_branch == lfence_branch_none)
4945         return;
4946
4947       if (i.operands != 1)
4948         abort ();
4949
4950       if (i.reg_operands == 1)
4951         {
4952           /* Indirect branch via register.  Don't insert lfence with
4953              -mlfence-after-load=yes.  */
4954           if (lfence_after_load
4955               || lfence_before_indirect_branch == lfence_branch_memory)
4956             return;
4957         }
4958       else if (i.mem_operands == 1
4959                && lfence_before_indirect_branch != lfence_branch_register)
4960         {
4961           as_warn (_("indirect `%s` with memory operand should be avoided"),
4962                    insn_name (&i.tm));
4963           return;
4964         }
4965       else
4966         return;
4967
4968       if (last_insn.kind != last_insn_other
4969           && last_insn.seg == now_seg)
4970         {
4971           as_warn_where (last_insn.file, last_insn.line,
4972                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
4973                          last_insn.name, insn_name (&i.tm));
4974           return;
4975         }
4976
4977       p = frag_more (3);
4978       *p++ = 0xf;
4979       *p++ = 0xae;
4980       *p = 0xe8;
4981       return;
4982     }
4983
4984   /* Output or/not/shl and lfence before near ret.  */
4985   if (lfence_before_ret != lfence_before_ret_none
4986       && (i.tm.base_opcode | 1) == 0xc3)
4987     {
4988       if (last_insn.kind != last_insn_other
4989           && last_insn.seg == now_seg)
4990         {
4991           as_warn_where (last_insn.file, last_insn.line,
4992                          _("`%s` skips -mlfence-before-ret on `%s`"),
4993                          last_insn.name, insn_name (&i.tm));
4994           return;
4995         }
4996
4997       /* Near ret ingore operand size override under CPU64.  */
4998       char prefix = flag_code == CODE_64BIT
4999                     ? 0x48
5000                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
5001
5002       if (lfence_before_ret == lfence_before_ret_not)
5003         {
5004           /* not: 0xf71424, may add prefix
5005              for operand size override or 64-bit code.  */
5006           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
5007           if (prefix)
5008             *p++ = prefix;
5009           *p++ = 0xf7;
5010           *p++ = 0x14;
5011           *p++ = 0x24;
5012           if (prefix)
5013             *p++ = prefix;
5014           *p++ = 0xf7;
5015           *p++ = 0x14;
5016           *p++ = 0x24;
5017         }
5018       else
5019         {
5020           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
5021           if (prefix)
5022             *p++ = prefix;
5023           if (lfence_before_ret == lfence_before_ret_or)
5024             {
5025               /* or: 0x830c2400, may add prefix
5026                  for operand size override or 64-bit code.  */
5027               *p++ = 0x83;
5028               *p++ = 0x0c;
5029             }
5030           else
5031             {
5032               /* shl: 0xc1242400, may add prefix
5033                  for operand size override or 64-bit code.  */
5034               *p++ = 0xc1;
5035               *p++ = 0x24;
5036             }
5037
5038           *p++ = 0x24;
5039           *p++ = 0x0;
5040         }
5041
5042       *p++ = 0xf;
5043       *p++ = 0xae;
5044       *p = 0xe8;
5045     }
5046 }
5047
5048 /* Shared helper for md_assemble() and s_insn().  */
5049 static void init_globals (void)
5050 {
5051   unsigned int j;
5052
5053   memset (&i, '\0', sizeof (i));
5054   i.rounding.type = rc_none;
5055   for (j = 0; j < MAX_OPERANDS; j++)
5056     i.reloc[j] = NO_RELOC;
5057   memset (disp_expressions, '\0', sizeof (disp_expressions));
5058   memset (im_expressions, '\0', sizeof (im_expressions));
5059   save_stack_p = save_stack;
5060 }
5061
5062 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
5063    parsing pass. Instead of introducing a rarely use new insn attribute this
5064    utilizes a common pattern between affected templates. It is deemed
5065    acceptable that this will lead to unnecessary pass 2 preparations in a
5066    limited set of cases.  */
5067 static INLINE bool may_need_pass2 (const insn_template *t)
5068 {
5069   return t->opcode_modifier.sse2avx
5070          /* Note that all SSE2AVX templates have at least one operand.  */
5071          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
5072          : (t->opcode_space == SPACE_0F
5073             && (t->base_opcode | 1) == 0xbf)
5074            || (t->opcode_space == SPACE_BASE
5075                && t->base_opcode == 0x63);
5076 }
5077
5078 /* This is the guts of the machine-dependent assembler.  LINE points to a
5079    machine dependent instruction.  This function is supposed to emit
5080    the frags/bytes it assembles to.  */
5081
5082 void
5083 md_assemble (char *line)
5084 {
5085   unsigned int j;
5086   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
5087   const char *end, *pass1_mnem = NULL;
5088   enum i386_error pass1_err = 0;
5089   const insn_template *t;
5090
5091   /* Initialize globals.  */
5092   current_templates = NULL;
5093  retry:
5094   init_globals ();
5095
5096   /* First parse an instruction mnemonic & call i386_operand for the operands.
5097      We assume that the scrubber has arranged it so that line[0] is the valid
5098      start of a (possibly prefixed) mnemonic.  */
5099
5100   end = parse_insn (line, mnemonic, false);
5101   if (end == NULL)
5102     {
5103       if (pass1_mnem != NULL)
5104         goto match_error;
5105       if (i.error != no_error)
5106         {
5107           gas_assert (current_templates != NULL);
5108           if (may_need_pass2 (current_templates->start) && !i.suffix)
5109             goto no_match;
5110           /* No point in trying a 2nd pass - it'll only find the same suffix
5111              again.  */
5112           mnem_suffix = i.suffix;
5113           goto match_error;
5114         }
5115       return;
5116     }
5117   t = current_templates->start;
5118   if (may_need_pass2 (t))
5119     {
5120       /* Make a copy of the full line in case we need to retry.  */
5121       copy = xstrdup (line);
5122     }
5123   line += end - line;
5124   mnem_suffix = i.suffix;
5125
5126   line = parse_operands (line, mnemonic);
5127   this_operand = -1;
5128   if (line == NULL)
5129     {
5130       free (copy);
5131       return;
5132     }
5133
5134   /* Now we've parsed the mnemonic into a set of templates, and have the
5135      operands at hand.  */
5136
5137   /* All Intel opcodes have reversed operands except for "bound", "enter",
5138      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
5139      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
5140      intersegment "jmp" and "call" instructions with 2 immediate operands so
5141      that the immediate segment precedes the offset consistently in Intel and
5142      AT&T modes.  */
5143   if (intel_syntax
5144       && i.operands > 1
5145       && (t->mnem_off != MN_bound)
5146       && !startswith (mnemonic, "invlpg")
5147       && !startswith (mnemonic, "monitor")
5148       && !startswith (mnemonic, "mwait")
5149       && (t->mnem_off != MN_pvalidate)
5150       && !startswith (mnemonic, "rmp")
5151       && (t->mnem_off != MN_tpause)
5152       && (t->mnem_off != MN_umwait)
5153       && !(i.operands == 2
5154            && operand_type_check (i.types[0], imm)
5155            && operand_type_check (i.types[1], imm)))
5156     swap_operands ();
5157
5158   /* The order of the immediates should be reversed
5159      for 2 immediates extrq and insertq instructions */
5160   if (i.imm_operands == 2
5161       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
5162       swap_2_operands (0, 1);
5163
5164   if (i.imm_operands)
5165     optimize_imm ();
5166
5167   if (i.disp_operands && !optimize_disp (t))
5168     return;
5169
5170   /* Next, we find a template that matches the given insn,
5171      making sure the overlap of the given operands types is consistent
5172      with the template operand types.  */
5173
5174   if (!(t = match_template (mnem_suffix)))
5175     {
5176       const char *err_msg;
5177
5178       if (copy && !mnem_suffix)
5179         {
5180           line = copy;
5181           copy = NULL;
5182   no_match:
5183           pass1_err = i.error;
5184           pass1_mnem = insn_name (current_templates->start);
5185           goto retry;
5186         }
5187
5188       /* If a non-/only-64bit template (group) was found in pass 1, and if
5189          _some_ template (group) was found in pass 2, squash pass 1's
5190          error.  */
5191       if (pass1_err == unsupported_64bit)
5192         pass1_mnem = NULL;
5193
5194   match_error:
5195       free (copy);
5196
5197       switch (pass1_mnem ? pass1_err : i.error)
5198         {
5199         default:
5200           abort ();
5201         case operand_size_mismatch:
5202           err_msg = _("operand size mismatch");
5203           break;
5204         case operand_type_mismatch:
5205           err_msg = _("operand type mismatch");
5206           break;
5207         case register_type_mismatch:
5208           err_msg = _("register type mismatch");
5209           break;
5210         case number_of_operands_mismatch:
5211           err_msg = _("number of operands mismatch");
5212           break;
5213         case invalid_instruction_suffix:
5214           err_msg = _("invalid instruction suffix");
5215           break;
5216         case bad_imm4:
5217           err_msg = _("constant doesn't fit in 4 bits");
5218           break;
5219         case unsupported_with_intel_mnemonic:
5220           err_msg = _("unsupported with Intel mnemonic");
5221           break;
5222         case unsupported_syntax:
5223           err_msg = _("unsupported syntax");
5224           break;
5225         case unsupported:
5226           as_bad (_("unsupported instruction `%s'"),
5227                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5228           return;
5229         case unsupported_on_arch:
5230           as_bad (_("`%s' is not supported on `%s%s'"),
5231                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5232                   cpu_arch_name ? cpu_arch_name : default_arch,
5233                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5234           return;
5235         case unsupported_64bit:
5236           if (ISLOWER (mnem_suffix))
5237             {
5238               if (flag_code == CODE_64BIT)
5239                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5240                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5241                         mnem_suffix);
5242               else
5243                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5244                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5245                         mnem_suffix);
5246             }
5247           else
5248             {
5249               if (flag_code == CODE_64BIT)
5250                 as_bad (_("`%s' is not supported in 64-bit mode"),
5251                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5252               else
5253                 as_bad (_("`%s' is only supported in 64-bit mode"),
5254                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5255             }
5256           return;
5257         case invalid_sib_address:
5258           err_msg = _("invalid SIB address");
5259           break;
5260         case invalid_vsib_address:
5261           err_msg = _("invalid VSIB address");
5262           break;
5263         case invalid_vector_register_set:
5264           err_msg = _("mask, index, and destination registers must be distinct");
5265           break;
5266         case invalid_tmm_register_set:
5267           err_msg = _("all tmm registers must be distinct");
5268           break;
5269         case invalid_dest_and_src_register_set:
5270           err_msg = _("destination and source registers must be distinct");
5271           break;
5272         case unsupported_vector_index_register:
5273           err_msg = _("unsupported vector index register");
5274           break;
5275         case unsupported_broadcast:
5276           err_msg = _("unsupported broadcast");
5277           break;
5278         case broadcast_needed:
5279           err_msg = _("broadcast is needed for operand of such type");
5280           break;
5281         case unsupported_masking:
5282           err_msg = _("unsupported masking");
5283           break;
5284         case mask_not_on_destination:
5285           err_msg = _("mask not on destination operand");
5286           break;
5287         case no_default_mask:
5288           err_msg = _("default mask isn't allowed");
5289           break;
5290         case unsupported_rc_sae:
5291           err_msg = _("unsupported static rounding/sae");
5292           break;
5293         case invalid_register_operand:
5294           err_msg = _("invalid register operand");
5295           break;
5296         }
5297       as_bad (_("%s for `%s'"), err_msg,
5298               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5299       return;
5300     }
5301
5302   free (copy);
5303
5304   if (sse_check != check_none
5305       /* The opcode space check isn't strictly needed; it's there only to
5306          bypass the logic below when easily possible.  */
5307       && t->opcode_space >= SPACE_0F
5308       && t->opcode_space <= SPACE_0F3A
5309       && !is_cpu (&i.tm, CpuSSE4a)
5310       && !is_any_vex_encoding (t))
5311     {
5312       bool simd = false;
5313
5314       for (j = 0; j < t->operands; ++j)
5315         {
5316           if (t->operand_types[j].bitfield.class == RegMMX)
5317             break;
5318           if (t->operand_types[j].bitfield.class == RegSIMD)
5319             simd = true;
5320         }
5321
5322       if (j >= t->operands && simd)
5323         (sse_check == check_warning
5324          ? as_warn
5325          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5326     }
5327
5328   if (i.tm.opcode_modifier.fwait)
5329     if (!add_prefix (FWAIT_OPCODE))
5330       return;
5331
5332   /* Check if REP prefix is OK.  */
5333   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5334     {
5335       as_bad (_("invalid instruction `%s' after `%s'"),
5336                 insn_name (&i.tm), i.rep_prefix);
5337       return;
5338     }
5339
5340   /* Check for lock without a lockable instruction.  Destination operand
5341      must be memory unless it is xchg (0x86).  */
5342   if (i.prefix[LOCK_PREFIX])
5343     {
5344       if (i.tm.opcode_modifier.prefixok < PrefixLock
5345           || i.mem_operands == 0
5346           || (i.tm.base_opcode != 0x86
5347               && !(i.flags[i.operands - 1] & Operand_Mem)))
5348         {
5349           as_bad (_("expecting lockable instruction after `lock'"));
5350           return;
5351         }
5352
5353       /* Zap the redundant prefix from XCHG when optimizing.  */
5354       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
5355         i.prefix[LOCK_PREFIX] = 0;
5356     }
5357
5358   if (is_any_vex_encoding (&i.tm)
5359       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5360       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5361     {
5362       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5363       if (i.prefix[DATA_PREFIX])
5364         {
5365           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5366           return;
5367         }
5368
5369       /* Don't allow e.g. KMOV in TLS code sequences.  */
5370       for (j = i.imm_operands; j < i.operands; ++j)
5371         switch (i.reloc[j])
5372           {
5373           case BFD_RELOC_386_TLS_GOTIE:
5374           case BFD_RELOC_386_TLS_LE_32:
5375           case BFD_RELOC_X86_64_GOTTPOFF:
5376           case BFD_RELOC_X86_64_TLSLD:
5377             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5378             return;
5379           default:
5380             break;
5381           }
5382     }
5383
5384   /* Check if HLE prefix is OK.  */
5385   if (i.hle_prefix && !check_hle ())
5386     return;
5387
5388   /* Check BND prefix.  */
5389   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5390     as_bad (_("expecting valid branch instruction after `bnd'"));
5391
5392   /* Check NOTRACK prefix.  */
5393   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5394     as_bad (_("expecting indirect branch instruction after `notrack'"));
5395
5396   if (is_cpu (&i.tm, CpuMPX))
5397     {
5398       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5399         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5400       else if (flag_code != CODE_16BIT
5401                ? i.prefix[ADDR_PREFIX]
5402                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5403         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5404     }
5405
5406   /* Insert BND prefix.  */
5407   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5408     {
5409       if (!i.prefix[BND_PREFIX])
5410         add_prefix (BND_PREFIX_OPCODE);
5411       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5412         {
5413           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5414           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5415         }
5416     }
5417
5418   /* Check string instruction segment overrides.  */
5419   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5420     {
5421       gas_assert (i.mem_operands);
5422       if (!check_string ())
5423         return;
5424       i.disp_operands = 0;
5425     }
5426
5427   /* The memory operand of (%dx) should be only used with input/output
5428      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5429   if (i.input_output_operand
5430       && ((i.tm.base_opcode | 0x82) != 0xee
5431           || i.tm.opcode_space != SPACE_BASE))
5432     {
5433       as_bad (_("input/output port address isn't allowed with `%s'"),
5434               insn_name (&i.tm));
5435       return;
5436     }
5437
5438   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5439     optimize_encoding ();
5440
5441   if (use_unaligned_vector_move)
5442     encode_with_unaligned_vector_move ();
5443
5444   if (!process_suffix ())
5445     return;
5446
5447   /* Check if IP-relative addressing requirements can be satisfied.  */
5448   if (is_cpu (&i.tm, CpuPREFETCHI)
5449       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5450     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5451
5452   /* Update operand types and check extended states.  */
5453   for (j = 0; j < i.operands; j++)
5454     {
5455       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5456       switch (i.tm.operand_types[j].bitfield.class)
5457         {
5458         default:
5459           break;
5460         case RegMMX:
5461           i.xstate |= xstate_mmx;
5462           break;
5463         case RegMask:
5464           i.xstate |= xstate_mask;
5465           break;
5466         case RegSIMD:
5467           if (i.tm.operand_types[j].bitfield.tmmword)
5468             i.xstate |= xstate_tmm;
5469           else if (i.tm.operand_types[j].bitfield.zmmword
5470                    && vector_size >= VSZ512)
5471             i.xstate |= xstate_zmm;
5472           else if (i.tm.operand_types[j].bitfield.ymmword
5473                    && vector_size >= VSZ256)
5474             i.xstate |= xstate_ymm;
5475           else if (i.tm.operand_types[j].bitfield.xmmword)
5476             i.xstate |= xstate_xmm;
5477           break;
5478         }
5479     }
5480
5481   /* Make still unresolved immediate matches conform to size of immediate
5482      given in i.suffix.  */
5483   if (!finalize_imm ())
5484     return;
5485
5486   if (i.types[0].bitfield.imm1)
5487     i.imm_operands = 0; /* kludge for shift insns.  */
5488
5489   /* For insns with operands there are more diddles to do to the opcode.  */
5490   if (i.operands)
5491     {
5492       if (!process_operands ())
5493         return;
5494     }
5495   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5496     {
5497       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5498       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5499     }
5500
5501   if (is_any_vex_encoding (&i.tm))
5502     {
5503       if (!cpu_arch_flags.bitfield.cpui286)
5504         {
5505           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5506                   insn_name (&i.tm));
5507           return;
5508         }
5509
5510       /* Check for explicit REX prefix.  */
5511       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5512         {
5513           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5514           return;
5515         }
5516
5517       if (i.tm.opcode_modifier.vex)
5518         build_vex_prefix (t);
5519       else
5520         build_evex_prefix ();
5521
5522       /* The individual REX.RXBW bits got consumed.  */
5523       i.rex &= REX_OPCODE;
5524     }
5525
5526   /* Handle conversion of 'int $3' --> special int3 insn.  */
5527   if (i.tm.mnem_off == MN_int
5528       && i.op[0].imms->X_add_number == 3)
5529     {
5530       i.tm.base_opcode = INT3_OPCODE;
5531       i.imm_operands = 0;
5532     }
5533
5534   if ((i.tm.opcode_modifier.jump == JUMP
5535        || i.tm.opcode_modifier.jump == JUMP_BYTE
5536        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5537       && i.op[0].disps->X_op == O_constant)
5538     {
5539       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5540          the absolute address given by the constant.  Since ix86 jumps and
5541          calls are pc relative, we need to generate a reloc.  */
5542       i.op[0].disps->X_add_symbol = &abs_symbol;
5543       i.op[0].disps->X_op = O_symbol;
5544     }
5545
5546   /* For 8 bit registers we need an empty rex prefix.  Also if the
5547      instruction already has a prefix, we need to convert old
5548      registers to new ones.  */
5549
5550   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5551        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5552       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5553           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5554       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5555            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5556           && i.rex != 0))
5557     {
5558       int x;
5559
5560       i.rex |= REX_OPCODE;
5561       for (x = 0; x < 2; x++)
5562         {
5563           /* Look for 8 bit operand that uses old registers.  */
5564           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5565               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5566             {
5567               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5568               /* In case it is "hi" register, give up.  */
5569               if (i.op[x].regs->reg_num > 3)
5570                 as_bad (_("can't encode register '%s%s' in an "
5571                           "instruction requiring REX prefix."),
5572                         register_prefix, i.op[x].regs->reg_name);
5573
5574               /* Otherwise it is equivalent to the extended register.
5575                  Since the encoding doesn't change this is merely
5576                  cosmetic cleanup for debug output.  */
5577
5578               i.op[x].regs = i.op[x].regs + 8;
5579             }
5580         }
5581     }
5582
5583   if (i.rex == 0 && i.rex_encoding)
5584     {
5585       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5586          that uses legacy register.  If it is "hi" register, don't add
5587          the REX_OPCODE byte.  */
5588       int x;
5589       for (x = 0; x < 2; x++)
5590         if (i.types[x].bitfield.class == Reg
5591             && i.types[x].bitfield.byte
5592             && (i.op[x].regs->reg_flags & RegRex64) == 0
5593             && i.op[x].regs->reg_num > 3)
5594           {
5595             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5596             i.rex_encoding = false;
5597             break;
5598           }
5599
5600       if (i.rex_encoding)
5601         i.rex = REX_OPCODE;
5602     }
5603
5604   if (i.rex != 0)
5605     add_prefix (REX_OPCODE | i.rex);
5606
5607   insert_lfence_before ();
5608
5609   /* We are ready to output the insn.  */
5610   output_insn ();
5611
5612   insert_lfence_after ();
5613
5614   last_insn.seg = now_seg;
5615
5616   if (i.tm.opcode_modifier.isprefix)
5617     {
5618       last_insn.kind = last_insn_prefix;
5619       last_insn.name = insn_name (&i.tm);
5620       last_insn.file = as_where (&last_insn.line);
5621     }
5622   else
5623     last_insn.kind = last_insn_other;
5624 }
5625
5626 /* The Q suffix is generally valid only in 64-bit mode, with very few
5627    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5628    and fisttp only one of their two templates is matched below: That's
5629    sufficient since other relevant attributes are the same between both
5630    respective templates.  */
5631 static INLINE bool q_suffix_allowed(const insn_template *t)
5632 {
5633   return flag_code == CODE_64BIT
5634          || (t->opcode_space == SPACE_BASE
5635              && t->base_opcode == 0xdf
5636              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5637          || t->mnem_off == MN_cmpxchg8b;
5638 }
5639
5640 static const char *
5641 parse_insn (const char *line, char *mnemonic, bool prefix_only)
5642 {
5643   const char *l = line, *token_start = l;
5644   char *mnem_p;
5645   bool pass1 = !current_templates;
5646   int supported;
5647   const insn_template *t;
5648   char *dot_p = NULL;
5649
5650   while (1)
5651     {
5652       mnem_p = mnemonic;
5653       /* Pseudo-prefixes start with an opening figure brace.  */
5654       if ((*mnem_p = *l) == '{')
5655         {
5656           ++mnem_p;
5657           ++l;
5658         }
5659       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5660         {
5661           if (*mnem_p == '.')
5662             dot_p = mnem_p;
5663           mnem_p++;
5664           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5665             {
5666             too_long:
5667               as_bad (_("no such instruction: `%s'"), token_start);
5668               return NULL;
5669             }
5670           l++;
5671         }
5672       /* Pseudo-prefixes end with a closing figure brace.  */
5673       if (*mnemonic == '{' && *l == '}')
5674         {
5675           *mnem_p++ = *l++;
5676           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5677             goto too_long;
5678           *mnem_p = '\0';
5679
5680           /* Point l at the closing brace if there's no other separator.  */
5681           if (*l != END_OF_INSN && !is_space_char (*l)
5682               && *l != PREFIX_SEPARATOR)
5683             --l;
5684         }
5685       else if (!is_space_char (*l)
5686                && *l != END_OF_INSN
5687                && (intel_syntax
5688                    || (*l != PREFIX_SEPARATOR && *l != ',')))
5689         {
5690           if (prefix_only)
5691             break;
5692           as_bad (_("invalid character %s in mnemonic"),
5693                   output_invalid (*l));
5694           return NULL;
5695         }
5696       if (token_start == l)
5697         {
5698           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5699             as_bad (_("expecting prefix; got nothing"));
5700           else
5701             as_bad (_("expecting mnemonic; got nothing"));
5702           return NULL;
5703         }
5704
5705       /* Look up instruction (or prefix) via hash table.  */
5706       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5707
5708       if (*l != END_OF_INSN
5709           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5710           && current_templates
5711           && current_templates->start->opcode_modifier.isprefix)
5712         {
5713           if (!cpu_flags_check_cpu64 (current_templates->start))
5714             {
5715               as_bad ((flag_code != CODE_64BIT
5716                        ? _("`%s' is only supported in 64-bit mode")
5717                        : _("`%s' is not supported in 64-bit mode")),
5718                       insn_name (current_templates->start));
5719               return NULL;
5720             }
5721           /* If we are in 16-bit mode, do not allow addr16 or data16.
5722              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5723           if ((current_templates->start->opcode_modifier.size == SIZE16
5724                || current_templates->start->opcode_modifier.size == SIZE32)
5725               && flag_code != CODE_64BIT
5726               && ((current_templates->start->opcode_modifier.size == SIZE32)
5727                   ^ (flag_code == CODE_16BIT)))
5728             {
5729               as_bad (_("redundant %s prefix"),
5730                       insn_name (current_templates->start));
5731               return NULL;
5732             }
5733
5734           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5735             {
5736               /* Handle pseudo prefixes.  */
5737               switch (current_templates->start->extension_opcode)
5738                 {
5739                 case Prefix_Disp8:
5740                   /* {disp8} */
5741                   i.disp_encoding = disp_encoding_8bit;
5742                   break;
5743                 case Prefix_Disp16:
5744                   /* {disp16} */
5745                   i.disp_encoding = disp_encoding_16bit;
5746                   break;
5747                 case Prefix_Disp32:
5748                   /* {disp32} */
5749                   i.disp_encoding = disp_encoding_32bit;
5750                   break;
5751                 case Prefix_Load:
5752                   /* {load} */
5753                   i.dir_encoding = dir_encoding_load;
5754                   break;
5755                 case Prefix_Store:
5756                   /* {store} */
5757                   i.dir_encoding = dir_encoding_store;
5758                   break;
5759                 case Prefix_VEX:
5760                   /* {vex} */
5761                   i.vec_encoding = vex_encoding_vex;
5762                   break;
5763                 case Prefix_VEX3:
5764                   /* {vex3} */
5765                   i.vec_encoding = vex_encoding_vex3;
5766                   break;
5767                 case Prefix_EVEX:
5768                   /* {evex} */
5769                   i.vec_encoding = vex_encoding_evex;
5770                   break;
5771                 case Prefix_REX:
5772                   /* {rex} */
5773                   i.rex_encoding = true;
5774                   break;
5775                 case Prefix_NoOptimize:
5776                   /* {nooptimize} */
5777                   i.no_optimize = true;
5778                   break;
5779                 default:
5780                   abort ();
5781                 }
5782             }
5783           else
5784             {
5785               /* Add prefix, checking for repeated prefixes.  */
5786               switch (add_prefix (current_templates->start->base_opcode))
5787                 {
5788                 case PREFIX_EXIST:
5789                   return NULL;
5790                 case PREFIX_DS:
5791                   if (is_cpu (current_templates->start, CpuIBT))
5792                     i.notrack_prefix = insn_name (current_templates->start);
5793                   break;
5794                 case PREFIX_REP:
5795                   if (is_cpu (current_templates->start, CpuHLE))
5796                     i.hle_prefix = insn_name (current_templates->start);
5797                   else if (is_cpu (current_templates->start, CpuMPX))
5798                     i.bnd_prefix = insn_name (current_templates->start);
5799                   else
5800                     i.rep_prefix = insn_name (current_templates->start);
5801                   break;
5802                 default:
5803                   break;
5804                 }
5805             }
5806           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5807           token_start = ++l;
5808         }
5809       else
5810         break;
5811     }
5812
5813   if (prefix_only)
5814     return token_start;
5815
5816   if (!current_templates)
5817     {
5818       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5819          Check if we should swap operand or force 32bit displacement in
5820          encoding.  */
5821       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5822         i.dir_encoding = dir_encoding_swap;
5823       else if (mnem_p - 3 == dot_p
5824                && dot_p[1] == 'd'
5825                && dot_p[2] == '8')
5826         i.disp_encoding = disp_encoding_8bit;
5827       else if (mnem_p - 4 == dot_p
5828                && dot_p[1] == 'd'
5829                && dot_p[2] == '3'
5830                && dot_p[3] == '2')
5831         i.disp_encoding = disp_encoding_32bit;
5832       else
5833         goto check_suffix;
5834       mnem_p = dot_p;
5835       *dot_p = '\0';
5836       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5837     }
5838
5839   if (!current_templates || !pass1)
5840     {
5841       current_templates = NULL;
5842
5843     check_suffix:
5844       if (mnem_p > mnemonic)
5845         {
5846           /* See if we can get a match by trimming off a suffix.  */
5847           switch (mnem_p[-1])
5848             {
5849             case WORD_MNEM_SUFFIX:
5850               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5851                 i.suffix = SHORT_MNEM_SUFFIX;
5852               else
5853                 /* Fall through.  */
5854               case BYTE_MNEM_SUFFIX:
5855               case QWORD_MNEM_SUFFIX:
5856                 i.suffix = mnem_p[-1];
5857               mnem_p[-1] = '\0';
5858               current_templates
5859                 = (const templates *) str_hash_find (op_hash, mnemonic);
5860               break;
5861             case SHORT_MNEM_SUFFIX:
5862             case LONG_MNEM_SUFFIX:
5863               if (!intel_syntax)
5864                 {
5865                   i.suffix = mnem_p[-1];
5866                   mnem_p[-1] = '\0';
5867                   current_templates
5868                     = (const templates *) str_hash_find (op_hash, mnemonic);
5869                 }
5870               break;
5871
5872               /* Intel Syntax.  */
5873             case 'd':
5874               if (intel_syntax)
5875                 {
5876                   if (intel_float_operand (mnemonic) == 1)
5877                     i.suffix = SHORT_MNEM_SUFFIX;
5878                   else
5879                     i.suffix = LONG_MNEM_SUFFIX;
5880                   mnem_p[-1] = '\0';
5881                   current_templates
5882                     = (const templates *) str_hash_find (op_hash, mnemonic);
5883                 }
5884               /* For compatibility reasons accept MOVSD and CMPSD without
5885                  operands even in AT&T mode.  */
5886               else if (*l == END_OF_INSN
5887                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5888                 {
5889                   mnem_p[-1] = '\0';
5890                   current_templates
5891                     = (const templates *) str_hash_find (op_hash, mnemonic);
5892                   if (current_templates != NULL
5893                       /* MOVS or CMPS */
5894                       && (current_templates->start->base_opcode | 2) == 0xa6
5895                       && current_templates->start->opcode_space
5896                          == SPACE_BASE
5897                       && mnem_p[-2] == 's')
5898                     {
5899                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5900                                mnemonic, mnemonic);
5901                       i.suffix = LONG_MNEM_SUFFIX;
5902                     }
5903                   else
5904                     {
5905                       current_templates = NULL;
5906                       mnem_p[-1] = 'd';
5907                     }
5908                 }
5909               break;
5910             }
5911         }
5912
5913       if (!current_templates)
5914         {
5915           if (pass1)
5916             as_bad (_("no such instruction: `%s'"), token_start);
5917           return NULL;
5918         }
5919     }
5920
5921   if (current_templates->start->opcode_modifier.jump == JUMP
5922       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5923     {
5924       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5925          predict taken and predict not taken respectively.
5926          I'm not sure that branch hints actually do anything on loop
5927          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5928          may work in the future and it doesn't hurt to accept them
5929          now.  */
5930       if (l[0] == ',' && l[1] == 'p')
5931         {
5932           if (l[2] == 't')
5933             {
5934               if (!add_prefix (DS_PREFIX_OPCODE))
5935                 return NULL;
5936               l += 3;
5937             }
5938           else if (l[2] == 'n')
5939             {
5940               if (!add_prefix (CS_PREFIX_OPCODE))
5941                 return NULL;
5942               l += 3;
5943             }
5944         }
5945     }
5946   /* Any other comma loses.  */
5947   if (*l == ',')
5948     {
5949       as_bad (_("invalid character %s in mnemonic"),
5950               output_invalid (*l));
5951       return NULL;
5952     }
5953
5954   /* Check if instruction is supported on specified architecture.  */
5955   supported = 0;
5956   for (t = current_templates->start; t < current_templates->end; ++t)
5957     {
5958       supported |= cpu_flags_match (t);
5959
5960       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5961         supported &= ~CPU_FLAGS_64BIT_MATCH;
5962
5963       if (supported == CPU_FLAGS_PERFECT_MATCH)
5964         return l;
5965     }
5966
5967   if (pass1)
5968     {
5969       if (supported & CPU_FLAGS_64BIT_MATCH)
5970         i.error = unsupported_on_arch;
5971       else
5972         i.error = unsupported_64bit;
5973     }
5974
5975   return NULL;
5976 }
5977
5978 static char *
5979 parse_operands (char *l, const char *mnemonic)
5980 {
5981   char *token_start;
5982
5983   /* 1 if operand is pending after ','.  */
5984   unsigned int expecting_operand = 0;
5985
5986   while (*l != END_OF_INSN)
5987     {
5988       /* Non-zero if operand parens not balanced.  */
5989       unsigned int paren_not_balanced = 0;
5990       /* True if inside double quotes.  */
5991       bool in_quotes = false;
5992
5993       /* Skip optional white space before operand.  */
5994       if (is_space_char (*l))
5995         ++l;
5996       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
5997         {
5998           as_bad (_("invalid character %s before operand %d"),
5999                   output_invalid (*l),
6000                   i.operands + 1);
6001           return NULL;
6002         }
6003       token_start = l;  /* After white space.  */
6004       while (in_quotes || paren_not_balanced || *l != ',')
6005         {
6006           if (*l == END_OF_INSN)
6007             {
6008               if (in_quotes)
6009                 {
6010                   as_bad (_("unbalanced double quotes in operand %d."),
6011                           i.operands + 1);
6012                   return NULL;
6013                 }
6014               if (paren_not_balanced)
6015                 {
6016                   know (!intel_syntax);
6017                   as_bad (_("unbalanced parenthesis in operand %d."),
6018                           i.operands + 1);
6019                   return NULL;
6020                 }
6021               else
6022                 break;  /* we are done */
6023             }
6024           else if (*l == '\\' && l[1] == '"')
6025             ++l;
6026           else if (*l == '"')
6027             in_quotes = !in_quotes;
6028           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
6029             {
6030               as_bad (_("invalid character %s in operand %d"),
6031                       output_invalid (*l),
6032                       i.operands + 1);
6033               return NULL;
6034             }
6035           if (!intel_syntax && !in_quotes)
6036             {
6037               if (*l == '(')
6038                 ++paren_not_balanced;
6039               if (*l == ')')
6040                 --paren_not_balanced;
6041             }
6042           l++;
6043         }
6044       if (l != token_start)
6045         {                       /* Yes, we've read in another operand.  */
6046           unsigned int operand_ok;
6047           this_operand = i.operands++;
6048           if (i.operands > MAX_OPERANDS)
6049             {
6050               as_bad (_("spurious operands; (%d operands/instruction max)"),
6051                       MAX_OPERANDS);
6052               return NULL;
6053             }
6054           i.types[this_operand].bitfield.unspecified = 1;
6055           /* Now parse operand adding info to 'i' as we go along.  */
6056           END_STRING_AND_SAVE (l);
6057
6058           if (i.mem_operands > 1)
6059             {
6060               as_bad (_("too many memory references for `%s'"),
6061                       mnemonic);
6062               return 0;
6063             }
6064
6065           if (intel_syntax)
6066             operand_ok =
6067               i386_intel_operand (token_start,
6068                                   intel_float_operand (mnemonic));
6069           else
6070             operand_ok = i386_att_operand (token_start);
6071
6072           RESTORE_END_STRING (l);
6073           if (!operand_ok)
6074             return NULL;
6075         }
6076       else
6077         {
6078           if (expecting_operand)
6079             {
6080             expecting_operand_after_comma:
6081               as_bad (_("expecting operand after ','; got nothing"));
6082               return NULL;
6083             }
6084           if (*l == ',')
6085             {
6086               as_bad (_("expecting operand before ','; got nothing"));
6087               return NULL;
6088             }
6089         }
6090
6091       /* Now *l must be either ',' or END_OF_INSN.  */
6092       if (*l == ',')
6093         {
6094           if (*++l == END_OF_INSN)
6095             {
6096               /* Just skip it, if it's \n complain.  */
6097               goto expecting_operand_after_comma;
6098             }
6099           expecting_operand = 1;
6100         }
6101     }
6102   return l;
6103 }
6104
6105 static void
6106 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
6107 {
6108   union i386_op temp_op;
6109   i386_operand_type temp_type;
6110   unsigned int temp_flags;
6111   enum bfd_reloc_code_real temp_reloc;
6112
6113   temp_type = i.types[xchg2];
6114   i.types[xchg2] = i.types[xchg1];
6115   i.types[xchg1] = temp_type;
6116
6117   temp_flags = i.flags[xchg2];
6118   i.flags[xchg2] = i.flags[xchg1];
6119   i.flags[xchg1] = temp_flags;
6120
6121   temp_op = i.op[xchg2];
6122   i.op[xchg2] = i.op[xchg1];
6123   i.op[xchg1] = temp_op;
6124
6125   temp_reloc = i.reloc[xchg2];
6126   i.reloc[xchg2] = i.reloc[xchg1];
6127   i.reloc[xchg1] = temp_reloc;
6128
6129   temp_flags = i.imm_bits[xchg2];
6130   i.imm_bits[xchg2] = i.imm_bits[xchg1];
6131   i.imm_bits[xchg1] = temp_flags;
6132
6133   if (i.mask.reg)
6134     {
6135       if (i.mask.operand == xchg1)
6136         i.mask.operand = xchg2;
6137       else if (i.mask.operand == xchg2)
6138         i.mask.operand = xchg1;
6139     }
6140   if (i.broadcast.type || i.broadcast.bytes)
6141     {
6142       if (i.broadcast.operand == xchg1)
6143         i.broadcast.operand = xchg2;
6144       else if (i.broadcast.operand == xchg2)
6145         i.broadcast.operand = xchg1;
6146     }
6147 }
6148
6149 static void
6150 swap_operands (void)
6151 {
6152   switch (i.operands)
6153     {
6154     case 5:
6155     case 4:
6156       swap_2_operands (1, i.operands - 2);
6157       /* Fall through.  */
6158     case 3:
6159     case 2:
6160       swap_2_operands (0, i.operands - 1);
6161       break;
6162     default:
6163       abort ();
6164     }
6165
6166   if (i.mem_operands == 2)
6167     {
6168       const reg_entry *temp_seg;
6169       temp_seg = i.seg[0];
6170       i.seg[0] = i.seg[1];
6171       i.seg[1] = temp_seg;
6172     }
6173 }
6174
6175 /* Try to ensure constant immediates are represented in the smallest
6176    opcode possible.  */
6177 static void
6178 optimize_imm (void)
6179 {
6180   char guess_suffix = 0;
6181   int op;
6182
6183   if (i.suffix)
6184     guess_suffix = i.suffix;
6185   else if (i.reg_operands)
6186     {
6187       /* Figure out a suffix from the last register operand specified.
6188          We can't do this properly yet, i.e. excluding special register
6189          instances, but the following works for instructions with
6190          immediates.  In any case, we can't set i.suffix yet.  */
6191       for (op = i.operands; --op >= 0;)
6192         if (i.types[op].bitfield.class != Reg)
6193           continue;
6194         else if (i.types[op].bitfield.byte)
6195           {
6196             guess_suffix = BYTE_MNEM_SUFFIX;
6197             break;
6198           }
6199         else if (i.types[op].bitfield.word)
6200           {
6201             guess_suffix = WORD_MNEM_SUFFIX;
6202             break;
6203           }
6204         else if (i.types[op].bitfield.dword)
6205           {
6206             guess_suffix = LONG_MNEM_SUFFIX;
6207             break;
6208           }
6209         else if (i.types[op].bitfield.qword)
6210           {
6211             guess_suffix = QWORD_MNEM_SUFFIX;
6212             break;
6213           }
6214     }
6215   else if ((flag_code == CODE_16BIT)
6216             ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
6217     guess_suffix = WORD_MNEM_SUFFIX;
6218   else if (flag_code != CODE_64BIT
6219            || (!(i.prefix[REX_PREFIX] & REX_W)
6220                /* A more generic (but also more involved) way of dealing
6221                   with the special case(s) would be to go look for
6222                   DefaultSize attributes on any of the templates.  */
6223                && current_templates->start->mnem_off != MN_push))
6224     guess_suffix = LONG_MNEM_SUFFIX;
6225
6226   for (op = i.operands; --op >= 0;)
6227     if (operand_type_check (i.types[op], imm))
6228       {
6229         switch (i.op[op].imms->X_op)
6230           {
6231           case O_constant:
6232             /* If a suffix is given, this operand may be shortened.  */
6233             switch (guess_suffix)
6234               {
6235               case LONG_MNEM_SUFFIX:
6236                 i.types[op].bitfield.imm32 = 1;
6237                 i.types[op].bitfield.imm64 = 1;
6238                 break;
6239               case WORD_MNEM_SUFFIX:
6240                 i.types[op].bitfield.imm16 = 1;
6241                 i.types[op].bitfield.imm32 = 1;
6242                 i.types[op].bitfield.imm32s = 1;
6243                 i.types[op].bitfield.imm64 = 1;
6244                 break;
6245               case BYTE_MNEM_SUFFIX:
6246                 i.types[op].bitfield.imm8 = 1;
6247                 i.types[op].bitfield.imm8s = 1;
6248                 i.types[op].bitfield.imm16 = 1;
6249                 i.types[op].bitfield.imm32 = 1;
6250                 i.types[op].bitfield.imm32s = 1;
6251                 i.types[op].bitfield.imm64 = 1;
6252                 break;
6253               }
6254
6255             /* If this operand is at most 16 bits, convert it
6256                to a signed 16 bit number before trying to see
6257                whether it will fit in an even smaller size.
6258                This allows a 16-bit operand such as $0xffe0 to
6259                be recognised as within Imm8S range.  */
6260             if ((i.types[op].bitfield.imm16)
6261                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6262               {
6263                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6264                                                 ^ 0x8000) - 0x8000);
6265               }
6266 #ifdef BFD64
6267             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6268             if ((i.types[op].bitfield.imm32)
6269                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6270               {
6271                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6272                                                 ^ ((offsetT) 1 << 31))
6273                                                - ((offsetT) 1 << 31));
6274               }
6275 #endif
6276             i.types[op]
6277               = operand_type_or (i.types[op],
6278                                  smallest_imm_type (i.op[op].imms->X_add_number));
6279
6280             /* We must avoid matching of Imm32 templates when 64bit
6281                only immediate is available.  */
6282             if (guess_suffix == QWORD_MNEM_SUFFIX)
6283               i.types[op].bitfield.imm32 = 0;
6284             break;
6285
6286           case O_absent:
6287           case O_register:
6288             abort ();
6289
6290             /* Symbols and expressions.  */
6291           default:
6292             /* Convert symbolic operand to proper sizes for matching, but don't
6293                prevent matching a set of insns that only supports sizes other
6294                than those matching the insn suffix.  */
6295             {
6296               i386_operand_type mask, allowed;
6297               const insn_template *t = current_templates->start;
6298
6299               operand_type_set (&mask, 0);
6300               switch (guess_suffix)
6301                 {
6302                 case QWORD_MNEM_SUFFIX:
6303                   mask.bitfield.imm64 = 1;
6304                   mask.bitfield.imm32s = 1;
6305                   break;
6306                 case LONG_MNEM_SUFFIX:
6307                   mask.bitfield.imm32 = 1;
6308                   break;
6309                 case WORD_MNEM_SUFFIX:
6310                   mask.bitfield.imm16 = 1;
6311                   break;
6312                 case BYTE_MNEM_SUFFIX:
6313                   mask.bitfield.imm8 = 1;
6314                   break;
6315                 default:
6316                   break;
6317                 }
6318
6319               allowed = operand_type_and (t->operand_types[op], mask);
6320               while (++t < current_templates->end)
6321                 {
6322                   allowed = operand_type_or (allowed, t->operand_types[op]);
6323                   allowed = operand_type_and (allowed, mask);
6324                 }
6325
6326               if (!operand_type_all_zero (&allowed))
6327                 i.types[op] = operand_type_and (i.types[op], mask);
6328             }
6329             break;
6330           }
6331       }
6332 }
6333
6334 /* Try to use the smallest displacement type too.  */
6335 static bool
6336 optimize_disp (const insn_template *t)
6337 {
6338   unsigned int op;
6339
6340   if (!want_disp32 (t)
6341       && (!t->opcode_modifier.jump
6342           || i.jumpabsolute || i.types[0].bitfield.baseindex))
6343     {
6344       for (op = 0; op < i.operands; ++op)
6345         {
6346           const expressionS *exp = i.op[op].disps;
6347
6348           if (!operand_type_check (i.types[op], disp))
6349             continue;
6350
6351           if (exp->X_op != O_constant)
6352             continue;
6353
6354           /* Since displacement is signed extended to 64bit, don't allow
6355              disp32 if it is out of range.  */
6356           if (fits_in_signed_long (exp->X_add_number))
6357             continue;
6358
6359           i.types[op].bitfield.disp32 = 0;
6360           if (i.types[op].bitfield.baseindex)
6361             {
6362               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
6363                       (uint64_t) exp->X_add_number);
6364               return false;
6365             }
6366         }
6367     }
6368
6369   /* Don't optimize displacement for movabs since it only takes 64bit
6370      displacement.  */
6371   if (i.disp_encoding > disp_encoding_8bit
6372       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
6373     return true;
6374
6375   for (op = i.operands; op-- > 0;)
6376     if (operand_type_check (i.types[op], disp))
6377       {
6378         if (i.op[op].disps->X_op == O_constant)
6379           {
6380             offsetT op_disp = i.op[op].disps->X_add_number;
6381
6382             if (!op_disp && i.types[op].bitfield.baseindex)
6383               {
6384                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6385                 i.op[op].disps = NULL;
6386                 i.disp_operands--;
6387                 continue;
6388               }
6389
6390             if (i.types[op].bitfield.disp16
6391                 && fits_in_unsigned_word (op_disp))
6392               {
6393                 /* If this operand is at most 16 bits, convert
6394                    to a signed 16 bit number and don't use 64bit
6395                    displacement.  */
6396                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6397                 i.types[op].bitfield.disp64 = 0;
6398               }
6399
6400 #ifdef BFD64
6401             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6402             if ((flag_code != CODE_64BIT
6403                  ? i.types[op].bitfield.disp32
6404                  : want_disp32 (t)
6405                    && (!t->opcode_modifier.jump
6406                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6407                 && fits_in_unsigned_long (op_disp))
6408               {
6409                 /* If this operand is at most 32 bits, convert
6410                    to a signed 32 bit number and don't use 64bit
6411                    displacement.  */
6412                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6413                 i.types[op].bitfield.disp64 = 0;
6414                 i.types[op].bitfield.disp32 = 1;
6415               }
6416
6417             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6418               {
6419                 i.types[op].bitfield.disp64 = 0;
6420                 i.types[op].bitfield.disp32 = 1;
6421               }
6422 #endif
6423             if ((i.types[op].bitfield.disp32
6424                  || i.types[op].bitfield.disp16)
6425                 && fits_in_disp8 (op_disp))
6426               i.types[op].bitfield.disp8 = 1;
6427
6428             i.op[op].disps->X_add_number = op_disp;
6429           }
6430         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6431                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6432           {
6433             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6434                          i.op[op].disps, 0, i.reloc[op]);
6435             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6436           }
6437         else
6438           /* We only support 64bit displacement on constants.  */
6439           i.types[op].bitfield.disp64 = 0;
6440       }
6441
6442   return true;
6443 }
6444
6445 /* Return 1 if there is a match in broadcast bytes between operand
6446    GIVEN and instruction template T.   */
6447
6448 static INLINE int
6449 match_broadcast_size (const insn_template *t, unsigned int given)
6450 {
6451   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6452            && i.types[given].bitfield.byte)
6453           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6454               && i.types[given].bitfield.word)
6455           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6456               && i.types[given].bitfield.dword)
6457           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6458               && i.types[given].bitfield.qword));
6459 }
6460
6461 /* Check if operands are valid for the instruction.  */
6462
6463 static int
6464 check_VecOperands (const insn_template *t)
6465 {
6466   unsigned int op;
6467   i386_cpu_flags cpu;
6468
6469   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6470      any one operand are implicity requiring AVX512VL support if the actual
6471      operand size is YMMword or XMMword.  Since this function runs after
6472      template matching, there's no need to check for YMMword/XMMword in
6473      the template.  */
6474   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
6475   if (!cpu_flags_all_zero (&cpu)
6476       && !is_cpu (t, CpuAVX512VL)
6477       && !cpu_arch_flags.bitfield.cpuavx512vl)
6478     {
6479       for (op = 0; op < t->operands; ++op)
6480         {
6481           if (t->operand_types[op].bitfield.zmmword
6482               && (i.types[op].bitfield.ymmword
6483                   || i.types[op].bitfield.xmmword))
6484             {
6485               i.error = unsupported;
6486               return 1;
6487             }
6488         }
6489     }
6490
6491   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6492      requiring AVX2 support if the actual operand size is YMMword.  */
6493   if (is_cpu (t, CpuAVX) && is_cpu (t, CpuAVX2)
6494       && !cpu_arch_flags.bitfield.cpuavx2)
6495     {
6496       for (op = 0; op < t->operands; ++op)
6497         {
6498           if (t->operand_types[op].bitfield.xmmword
6499               && i.types[op].bitfield.ymmword)
6500             {
6501               i.error = unsupported;
6502               return 1;
6503             }
6504         }
6505     }
6506
6507   /* Without VSIB byte, we can't have a vector register for index.  */
6508   if (!t->opcode_modifier.sib
6509       && i.index_reg
6510       && (i.index_reg->reg_type.bitfield.xmmword
6511           || i.index_reg->reg_type.bitfield.ymmword
6512           || i.index_reg->reg_type.bitfield.zmmword))
6513     {
6514       i.error = unsupported_vector_index_register;
6515       return 1;
6516     }
6517
6518   /* Check if default mask is allowed.  */
6519   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6520       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6521     {
6522       i.error = no_default_mask;
6523       return 1;
6524     }
6525
6526   /* For VSIB byte, we need a vector register for index, and all vector
6527      registers must be distinct.  */
6528   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6529     {
6530       if (!i.index_reg
6531           || !((t->opcode_modifier.sib == VECSIB128
6532                 && i.index_reg->reg_type.bitfield.xmmword)
6533                || (t->opcode_modifier.sib == VECSIB256
6534                    && i.index_reg->reg_type.bitfield.ymmword)
6535                || (t->opcode_modifier.sib == VECSIB512
6536                    && i.index_reg->reg_type.bitfield.zmmword)))
6537       {
6538         i.error = invalid_vsib_address;
6539         return 1;
6540       }
6541
6542       gas_assert (i.reg_operands == 2 || i.mask.reg);
6543       if (i.reg_operands == 2 && !i.mask.reg)
6544         {
6545           gas_assert (i.types[0].bitfield.class == RegSIMD);
6546           gas_assert (i.types[0].bitfield.xmmword
6547                       || i.types[0].bitfield.ymmword);
6548           gas_assert (i.types[2].bitfield.class == RegSIMD);
6549           gas_assert (i.types[2].bitfield.xmmword
6550                       || i.types[2].bitfield.ymmword);
6551           if (operand_check == check_none)
6552             return 0;
6553           if (register_number (i.op[0].regs)
6554               != register_number (i.index_reg)
6555               && register_number (i.op[2].regs)
6556                  != register_number (i.index_reg)
6557               && register_number (i.op[0].regs)
6558                  != register_number (i.op[2].regs))
6559             return 0;
6560           if (operand_check == check_error)
6561             {
6562               i.error = invalid_vector_register_set;
6563               return 1;
6564             }
6565           as_warn (_("mask, index, and destination registers should be distinct"));
6566         }
6567       else if (i.reg_operands == 1 && i.mask.reg)
6568         {
6569           if (i.types[1].bitfield.class == RegSIMD
6570               && (i.types[1].bitfield.xmmword
6571                   || i.types[1].bitfield.ymmword
6572                   || i.types[1].bitfield.zmmword)
6573               && (register_number (i.op[1].regs)
6574                   == register_number (i.index_reg)))
6575             {
6576               if (operand_check == check_error)
6577                 {
6578                   i.error = invalid_vector_register_set;
6579                   return 1;
6580                 }
6581               if (operand_check != check_none)
6582                 as_warn (_("index and destination registers should be distinct"));
6583             }
6584         }
6585     }
6586
6587   /* For AMX instructions with 3 TMM register operands, all operands
6588       must be distinct.  */
6589   if (i.reg_operands == 3
6590       && t->operand_types[0].bitfield.tmmword
6591       && (i.op[0].regs == i.op[1].regs
6592           || i.op[0].regs == i.op[2].regs
6593           || i.op[1].regs == i.op[2].regs))
6594     {
6595       i.error = invalid_tmm_register_set;
6596       return 1;
6597     }
6598
6599   /* For some special instructions require that destination must be distinct
6600      from source registers.  */
6601   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6602     {
6603       unsigned int dest_reg = i.operands - 1;
6604
6605       know (i.operands >= 3);
6606
6607       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6608       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6609           || (i.reg_operands > 2
6610               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6611         {
6612           i.error = invalid_dest_and_src_register_set;
6613           return 1;
6614         }
6615     }
6616
6617   /* Check if broadcast is supported by the instruction and is applied
6618      to the memory operand.  */
6619   if (i.broadcast.type || i.broadcast.bytes)
6620     {
6621       i386_operand_type type, overlap;
6622
6623       /* Check if specified broadcast is supported in this instruction,
6624          and its broadcast bytes match the memory operand.  */
6625       op = i.broadcast.operand;
6626       if (!t->opcode_modifier.broadcast
6627           || !(i.flags[op] & Operand_Mem)
6628           || (!i.types[op].bitfield.unspecified
6629               && !match_broadcast_size (t, op)))
6630         {
6631         bad_broadcast:
6632           i.error = unsupported_broadcast;
6633           return 1;
6634         }
6635
6636       operand_type_set (&type, 0);
6637       switch (get_broadcast_bytes (t, false))
6638         {
6639         case 2:
6640           type.bitfield.word = 1;
6641           break;
6642         case 4:
6643           type.bitfield.dword = 1;
6644           break;
6645         case 8:
6646           type.bitfield.qword = 1;
6647           break;
6648         case 16:
6649           type.bitfield.xmmword = 1;
6650           break;
6651         case 32:
6652           if (vector_size < VSZ256)
6653             goto bad_broadcast;
6654           type.bitfield.ymmword = 1;
6655           break;
6656         case 64:
6657           if (vector_size < VSZ512)
6658             goto bad_broadcast;
6659           type.bitfield.zmmword = 1;
6660           break;
6661         default:
6662           goto bad_broadcast;
6663         }
6664
6665       overlap = operand_type_and (type, t->operand_types[op]);
6666       if (t->operand_types[op].bitfield.class == RegSIMD
6667           && t->operand_types[op].bitfield.byte
6668              + t->operand_types[op].bitfield.word
6669              + t->operand_types[op].bitfield.dword
6670              + t->operand_types[op].bitfield.qword > 1)
6671         {
6672           overlap.bitfield.xmmword = 0;
6673           overlap.bitfield.ymmword = 0;
6674           overlap.bitfield.zmmword = 0;
6675         }
6676       if (operand_type_all_zero (&overlap))
6677           goto bad_broadcast;
6678
6679       if (t->opcode_modifier.checkoperandsize)
6680         {
6681           unsigned int j;
6682
6683           type.bitfield.baseindex = 1;
6684           for (j = 0; j < i.operands; ++j)
6685             {
6686               if (j != op
6687                   && !operand_type_register_match(i.types[j],
6688                                                   t->operand_types[j],
6689                                                   type,
6690                                                   t->operand_types[op]))
6691                 goto bad_broadcast;
6692             }
6693         }
6694     }
6695   /* If broadcast is supported in this instruction, we need to check if
6696      operand of one-element size isn't specified without broadcast.  */
6697   else if (t->opcode_modifier.broadcast && i.mem_operands)
6698     {
6699       /* Find memory operand.  */
6700       for (op = 0; op < i.operands; op++)
6701         if (i.flags[op] & Operand_Mem)
6702           break;
6703       gas_assert (op < i.operands);
6704       /* Check size of the memory operand.  */
6705       if (match_broadcast_size (t, op))
6706         {
6707           i.error = broadcast_needed;
6708           return 1;
6709         }
6710     }
6711   else
6712     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6713
6714   /* Check if requested masking is supported.  */
6715   if (i.mask.reg)
6716     {
6717       if (!t->opcode_modifier.masking)
6718         {
6719           i.error = unsupported_masking;
6720           return 1;
6721         }
6722
6723       /* Common rules for masking:
6724          - mask register destinations permit only zeroing-masking, without
6725            that actually being expressed by a {z} operand suffix or EVEX.z,
6726          - memory destinations allow only merging-masking,
6727          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
6728            masking.  */
6729       if (i.mask.zeroing
6730           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
6731               || (i.flags[t->operands - 1] & Operand_Mem)
6732               || t->opcode_modifier.sib))
6733         {
6734           i.error = unsupported_masking;
6735           return 1;
6736         }
6737     }
6738
6739   /* Check if masking is applied to dest operand.  */
6740   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6741     {
6742       i.error = mask_not_on_destination;
6743       return 1;
6744     }
6745
6746   /* Check RC/SAE.  */
6747   if (i.rounding.type != rc_none)
6748     {
6749       if (!t->opcode_modifier.sae
6750           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6751           || i.mem_operands)
6752         {
6753           i.error = unsupported_rc_sae;
6754           return 1;
6755         }
6756
6757       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6758          operand.  */
6759       if (t->opcode_modifier.evex != EVEXLIG)
6760         {
6761           for (op = 0; op < t->operands; ++op)
6762             if (i.types[op].bitfield.zmmword)
6763               break;
6764           if (op >= t->operands)
6765             {
6766               i.error = operand_size_mismatch;
6767               return 1;
6768             }
6769         }
6770     }
6771
6772   /* Check the special Imm4 cases; must be the first operand.  */
6773   if (is_cpu (t, CpuXOP) && t->operands == 5)
6774     {
6775       if (i.op[0].imms->X_op != O_constant
6776           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6777         {
6778           i.error = bad_imm4;
6779           return 1;
6780         }
6781
6782       /* Turn off Imm<N> so that update_imm won't complain.  */
6783       operand_type_set (&i.types[0], 0);
6784     }
6785
6786   /* Check vector Disp8 operand.  */
6787   if (t->opcode_modifier.disp8memshift
6788       && i.disp_encoding <= disp_encoding_8bit)
6789     {
6790       if (i.broadcast.type || i.broadcast.bytes)
6791         i.memshift = t->opcode_modifier.broadcast - 1;
6792       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6793         i.memshift = t->opcode_modifier.disp8memshift;
6794       else
6795         {
6796           const i386_operand_type *type = NULL, *fallback = NULL;
6797
6798           i.memshift = 0;
6799           for (op = 0; op < i.operands; op++)
6800             if (i.flags[op] & Operand_Mem)
6801               {
6802                 if (t->opcode_modifier.evex == EVEXLIG)
6803                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6804                 else if (t->operand_types[op].bitfield.xmmword
6805                          + t->operand_types[op].bitfield.ymmword
6806                          + t->operand_types[op].bitfield.zmmword <= 1)
6807                   type = &t->operand_types[op];
6808                 else if (!i.types[op].bitfield.unspecified)
6809                   type = &i.types[op];
6810                 else /* Ambiguities get resolved elsewhere.  */
6811                   fallback = &t->operand_types[op];
6812               }
6813             else if (i.types[op].bitfield.class == RegSIMD
6814                      && t->opcode_modifier.evex != EVEXLIG)
6815               {
6816                 if (i.types[op].bitfield.zmmword)
6817                   i.memshift = 6;
6818                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6819                   i.memshift = 5;
6820                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6821                   i.memshift = 4;
6822               }
6823
6824           if (!type && !i.memshift)
6825             type = fallback;
6826           if (type)
6827             {
6828               if (type->bitfield.zmmword)
6829                 i.memshift = 6;
6830               else if (type->bitfield.ymmword)
6831                 i.memshift = 5;
6832               else if (type->bitfield.xmmword)
6833                 i.memshift = 4;
6834             }
6835
6836           /* For the check in fits_in_disp8().  */
6837           if (i.memshift == 0)
6838             i.memshift = -1;
6839         }
6840
6841       for (op = 0; op < i.operands; op++)
6842         if (operand_type_check (i.types[op], disp)
6843             && i.op[op].disps->X_op == O_constant)
6844           {
6845             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6846               {
6847                 i.types[op].bitfield.disp8 = 1;
6848                 return 0;
6849               }
6850             i.types[op].bitfield.disp8 = 0;
6851           }
6852     }
6853
6854   i.memshift = 0;
6855
6856   return 0;
6857 }
6858
6859 /* Check if encoding requirements are met by the instruction.  */
6860
6861 static int
6862 VEX_check_encoding (const insn_template *t)
6863 {
6864   if (i.vec_encoding == vex_encoding_error)
6865     {
6866       i.error = unsupported;
6867       return 1;
6868     }
6869
6870   /* Vector size restrictions.  */
6871   if ((vector_size < VSZ512
6872        && (t->opcode_modifier.evex == EVEX512
6873            || t->opcode_modifier.vsz >= VSZ512))
6874       || (vector_size < VSZ256
6875           && (t->opcode_modifier.evex == EVEX256
6876               || t->opcode_modifier.vex == VEX256
6877               || t->opcode_modifier.vsz >= VSZ256)))
6878     {
6879       i.error = unsupported;
6880       return 1;
6881     }
6882
6883   if (i.vec_encoding == vex_encoding_evex)
6884     {
6885       /* This instruction must be encoded with EVEX prefix.  */
6886       if (!is_evex_encoding (t))
6887         {
6888           i.error = unsupported;
6889           return 1;
6890         }
6891       return 0;
6892     }
6893
6894   if (!t->opcode_modifier.vex)
6895     {
6896       /* This instruction template doesn't have VEX prefix.  */
6897       if (i.vec_encoding != vex_encoding_default)
6898         {
6899           i.error = unsupported;
6900           return 1;
6901         }
6902       return 0;
6903     }
6904
6905   return 0;
6906 }
6907
6908 /* Helper function for the progress() macro in match_template().  */
6909 static INLINE enum i386_error progress (enum i386_error new,
6910                                         enum i386_error last,
6911                                         unsigned int line, unsigned int *line_p)
6912 {
6913   if (line <= *line_p)
6914     return last;
6915   *line_p = line;
6916   return new;
6917 }
6918
6919 static const insn_template *
6920 match_template (char mnem_suffix)
6921 {
6922   /* Points to template once we've found it.  */
6923   const insn_template *t;
6924   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6925   i386_operand_type overlap4;
6926   unsigned int found_reverse_match;
6927   i386_operand_type operand_types [MAX_OPERANDS];
6928   int addr_prefix_disp;
6929   unsigned int j, size_match, check_register, errline = __LINE__;
6930   enum i386_error specific_error = number_of_operands_mismatch;
6931 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6932
6933 #if MAX_OPERANDS != 5
6934 # error "MAX_OPERANDS must be 5."
6935 #endif
6936
6937   found_reverse_match = 0;
6938   addr_prefix_disp = -1;
6939
6940   for (t = current_templates->start; t < current_templates->end; t++)
6941     {
6942       addr_prefix_disp = -1;
6943       found_reverse_match = 0;
6944
6945       /* Must have right number of operands.  */
6946       if (i.operands != t->operands)
6947         continue;
6948
6949       /* Check processor support.  */
6950       specific_error = progress (unsupported);
6951       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6952         continue;
6953
6954       /* Check AT&T mnemonic.   */
6955       specific_error = progress (unsupported_with_intel_mnemonic);
6956       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6957         continue;
6958
6959       /* Check AT&T/Intel syntax.  */
6960       specific_error = progress (unsupported_syntax);
6961       if ((intel_syntax && t->opcode_modifier.attsyntax)
6962           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6963         continue;
6964
6965       /* Check Intel64/AMD64 ISA.   */
6966       switch (isa64)
6967         {
6968         default:
6969           /* Default: Don't accept Intel64.  */
6970           if (t->opcode_modifier.isa64 == INTEL64)
6971             continue;
6972           break;
6973         case amd64:
6974           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
6975           if (t->opcode_modifier.isa64 >= INTEL64)
6976             continue;
6977           break;
6978         case intel64:
6979           /* -mintel64: Don't accept AMD64.  */
6980           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
6981             continue;
6982           break;
6983         }
6984
6985       /* Check the suffix.  */
6986       specific_error = progress (invalid_instruction_suffix);
6987       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
6988           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
6989           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
6990           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
6991           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
6992         continue;
6993
6994       specific_error = progress (operand_size_mismatch);
6995       size_match = operand_size_match (t);
6996       if (!size_match)
6997         continue;
6998
6999       /* This is intentionally not
7000
7001          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
7002
7003          as the case of a missing * on the operand is accepted (perhaps with
7004          a warning, issued further down).  */
7005       specific_error = progress (operand_type_mismatch);
7006       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
7007         continue;
7008
7009       /* In Intel syntax, normally we can check for memory operand size when
7010          there is no mnemonic suffix.  But jmp and call have 2 different
7011          encodings with Dword memory operand size.  Skip the "near" one
7012          (permitting a register operand) when "far" was requested.  */
7013       if (i.far_branch
7014           && t->opcode_modifier.jump == JUMP_ABSOLUTE
7015           && t->operand_types[0].bitfield.class == Reg)
7016         continue;
7017
7018       for (j = 0; j < MAX_OPERANDS; j++)
7019         operand_types[j] = t->operand_types[j];
7020
7021       /* In general, don't allow 32-bit operands on pre-386.  */
7022       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
7023                                              : operand_size_mismatch);
7024       j = i.imm_operands + (t->operands > i.imm_operands + 1);
7025       if (i.suffix == LONG_MNEM_SUFFIX
7026           && !cpu_arch_flags.bitfield.cpui386
7027           && (intel_syntax
7028               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
7029                  && !intel_float_operand (insn_name (t)))
7030               : intel_float_operand (insn_name (t)) != 2)
7031           && (t->operands == i.imm_operands
7032               || (operand_types[i.imm_operands].bitfield.class != RegMMX
7033                && operand_types[i.imm_operands].bitfield.class != RegSIMD
7034                && operand_types[i.imm_operands].bitfield.class != RegMask)
7035               || (operand_types[j].bitfield.class != RegMMX
7036                   && operand_types[j].bitfield.class != RegSIMD
7037                   && operand_types[j].bitfield.class != RegMask))
7038           && !t->opcode_modifier.sib)
7039         continue;
7040
7041       /* Do not verify operands when there are none.  */
7042       if (!t->operands)
7043         {
7044           if (VEX_check_encoding (t))
7045             {
7046               specific_error = progress (i.error);
7047               continue;
7048             }
7049
7050           /* We've found a match; break out of loop.  */
7051           break;
7052         }
7053
7054       if (!t->opcode_modifier.jump
7055           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
7056         {
7057           /* There should be only one Disp operand.  */
7058           for (j = 0; j < MAX_OPERANDS; j++)
7059             if (operand_type_check (operand_types[j], disp))
7060               break;
7061           if (j < MAX_OPERANDS)
7062             {
7063               bool override = (i.prefix[ADDR_PREFIX] != 0);
7064
7065               addr_prefix_disp = j;
7066
7067               /* Address size prefix will turn Disp64 operand into Disp32 and
7068                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
7069               switch (flag_code)
7070                 {
7071                 case CODE_16BIT:
7072                   override = !override;
7073                   /* Fall through.  */
7074                 case CODE_32BIT:
7075                   if (operand_types[j].bitfield.disp32
7076                       && operand_types[j].bitfield.disp16)
7077                     {
7078                       operand_types[j].bitfield.disp16 = override;
7079                       operand_types[j].bitfield.disp32 = !override;
7080                     }
7081                   gas_assert (!operand_types[j].bitfield.disp64);
7082                   break;
7083
7084                 case CODE_64BIT:
7085                   if (operand_types[j].bitfield.disp64)
7086                     {
7087                       gas_assert (!operand_types[j].bitfield.disp32);
7088                       operand_types[j].bitfield.disp32 = override;
7089                       operand_types[j].bitfield.disp64 = !override;
7090                     }
7091                   operand_types[j].bitfield.disp16 = 0;
7092                   break;
7093                 }
7094             }
7095         }
7096
7097       /* We check register size if needed.  */
7098       if (t->opcode_modifier.checkoperandsize)
7099         {
7100           check_register = (1 << t->operands) - 1;
7101           if (i.broadcast.type || i.broadcast.bytes)
7102             check_register &= ~(1 << i.broadcast.operand);
7103         }
7104       else
7105         check_register = 0;
7106
7107       overlap0 = operand_type_and (i.types[0], operand_types[0]);
7108       switch (t->operands)
7109         {
7110         case 1:
7111           if (!operand_type_match (overlap0, i.types[0]))
7112             continue;
7113
7114           /* Allow the ModR/M encoding to be requested by using the {load} or
7115              {store} pseudo prefix on an applicable insn.  */
7116           if (!t->opcode_modifier.modrm
7117               && i.reg_operands == 1
7118               && ((i.dir_encoding == dir_encoding_load
7119                    && t->mnem_off != MN_pop)
7120                   || (i.dir_encoding == dir_encoding_store
7121                       && t->mnem_off != MN_push))
7122               /* Avoid BSWAP.  */
7123               && t->mnem_off != MN_bswap)
7124             continue;
7125           break;
7126
7127         case 2:
7128           /* xchg %eax, %eax is a special case. It is an alias for nop
7129              only in 32bit mode and we can use opcode 0x90.  In 64bit
7130              mode, we can't use 0x90 for xchg %eax, %eax since it should
7131              zero-extend %eax to %rax.  */
7132           if (t->base_opcode == 0x90
7133               && t->opcode_space == SPACE_BASE)
7134             {
7135               if (flag_code == CODE_64BIT
7136                   && i.types[0].bitfield.instance == Accum
7137                   && i.types[0].bitfield.dword
7138                   && i.types[1].bitfield.instance == Accum)
7139                 continue;
7140
7141               /* Allow the ModR/M encoding to be requested by using the
7142                  {load} or {store} pseudo prefix.  */
7143               if (i.dir_encoding == dir_encoding_load
7144                   || i.dir_encoding == dir_encoding_store)
7145                 continue;
7146             }
7147
7148           if (t->base_opcode == MOV_AX_DISP32
7149               && t->opcode_space == SPACE_BASE
7150               && t->mnem_off != MN_movabs)
7151             {
7152               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
7153               if (i.reloc[0] == BFD_RELOC_386_GOT32)
7154                 continue;
7155
7156               /* xrelease mov %eax, <disp> is another special case. It must not
7157                  match the accumulator-only encoding of mov.  */
7158               if (i.hle_prefix)
7159                 continue;
7160
7161               /* Allow the ModR/M encoding to be requested by using a suitable
7162                  {load} or {store} pseudo prefix.  */
7163               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
7164                                      ? dir_encoding_store
7165                                      : dir_encoding_load)
7166                   && !i.types[0].bitfield.disp64
7167                   && !i.types[1].bitfield.disp64)
7168                 continue;
7169             }
7170
7171           /* Allow the ModR/M encoding to be requested by using the {load} or
7172              {store} pseudo prefix on an applicable insn.  */
7173           if (!t->opcode_modifier.modrm
7174               && i.reg_operands == 1
7175               && i.imm_operands == 1
7176               && (i.dir_encoding == dir_encoding_load
7177                   || i.dir_encoding == dir_encoding_store)
7178               && t->opcode_space == SPACE_BASE)
7179             {
7180               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
7181                   && i.dir_encoding == dir_encoding_store)
7182                 continue;
7183
7184               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
7185                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
7186                       || i.dir_encoding == dir_encoding_load))
7187                 continue;
7188
7189               if (t->base_opcode == 0xa8 /* test $imm, %acc */
7190                   && i.dir_encoding == dir_encoding_load)
7191                 continue;
7192             }
7193           /* Fall through.  */
7194
7195         case 3:
7196           if (!(size_match & MATCH_STRAIGHT))
7197             goto check_reverse;
7198           /* Reverse direction of operands if swapping is possible in the first
7199              place (operands need to be symmetric) and
7200              - the load form is requested, and the template is a store form,
7201              - the store form is requested, and the template is a load form,
7202              - the non-default (swapped) form is requested.  */
7203           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
7204           if (t->opcode_modifier.d && i.reg_operands == i.operands
7205               && !operand_type_all_zero (&overlap1))
7206             switch (i.dir_encoding)
7207               {
7208               case dir_encoding_load:
7209                 if (operand_type_check (operand_types[i.operands - 1], anymem)
7210                     || t->opcode_modifier.regmem)
7211                   goto check_reverse;
7212                 break;
7213
7214               case dir_encoding_store:
7215                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
7216                     && !t->opcode_modifier.regmem)
7217                   goto check_reverse;
7218                 break;
7219
7220               case dir_encoding_swap:
7221                 goto check_reverse;
7222
7223               case dir_encoding_default:
7224                 break;
7225               }
7226           /* If we want store form, we skip the current load.  */
7227           if ((i.dir_encoding == dir_encoding_store
7228                || i.dir_encoding == dir_encoding_swap)
7229               && i.mem_operands == 0
7230               && t->opcode_modifier.load)
7231             continue;
7232           /* Fall through.  */
7233         case 4:
7234         case 5:
7235           overlap1 = operand_type_and (i.types[1], operand_types[1]);
7236           if (!operand_type_match (overlap0, i.types[0])
7237               || !operand_type_match (overlap1, i.types[1])
7238               || ((check_register & 3) == 3
7239                   && !operand_type_register_match (i.types[0],
7240                                                    operand_types[0],
7241                                                    i.types[1],
7242                                                    operand_types[1])))
7243             {
7244               specific_error = progress (i.error);
7245
7246               /* Check if other direction is valid ...  */
7247               if (!t->opcode_modifier.d)
7248                 continue;
7249
7250             check_reverse:
7251               if (!(size_match & MATCH_REVERSE))
7252                 continue;
7253               /* Try reversing direction of operands.  */
7254               j = is_cpu (t, CpuFMA4)
7255                   || is_cpu (t, CpuXOP) ? 1 : i.operands - 1;
7256               overlap0 = operand_type_and (i.types[0], operand_types[j]);
7257               overlap1 = operand_type_and (i.types[j], operand_types[0]);
7258               overlap2 = operand_type_and (i.types[1], operand_types[1]);
7259               gas_assert (t->operands != 3 || !check_register);
7260               if (!operand_type_match (overlap0, i.types[0])
7261                   || !operand_type_match (overlap1, i.types[j])
7262                   || (t->operands == 3
7263                       && !operand_type_match (overlap2, i.types[1]))
7264                   || (check_register
7265                       && !operand_type_register_match (i.types[0],
7266                                                        operand_types[j],
7267                                                        i.types[j],
7268                                                        operand_types[0])))
7269                 {
7270                   /* Does not match either direction.  */
7271                   specific_error = progress (i.error);
7272                   continue;
7273                 }
7274               /* found_reverse_match holds which variant of D
7275                  we've found.  */
7276               if (!t->opcode_modifier.d)
7277                 found_reverse_match = 0;
7278               else if (operand_types[0].bitfield.tbyte)
7279                 {
7280                   if (t->opcode_modifier.operandconstraint != UGH)
7281                     found_reverse_match = Opcode_FloatD;
7282                   else
7283                     found_reverse_match = ~0;
7284                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
7285                   if ((t->extension_opcode & 4)
7286                       && (intel_syntax || intel_mnemonic))
7287                     found_reverse_match |= Opcode_FloatR;
7288                 }
7289               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
7290                 {
7291                   found_reverse_match = Opcode_VexW;
7292                   goto check_operands_345;
7293                 }
7294               else if (t->opcode_space != SPACE_BASE
7295                        && (t->opcode_space != SPACE_0F
7296                            /* MOV to/from CR/DR/TR, as an exception, follow
7297                               the base opcode space encoding model.  */
7298                            || (t->base_opcode | 7) != 0x27))
7299                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7300                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7301               else if (!t->opcode_modifier.commutative)
7302                 found_reverse_match = Opcode_D;
7303               else
7304                 found_reverse_match = ~0;
7305             }
7306           else
7307             {
7308               /* Found a forward 2 operand match here.  */
7309             check_operands_345:
7310               switch (t->operands)
7311                 {
7312                 case 5:
7313                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7314                   if (!operand_type_match (overlap4, i.types[4])
7315                       || !operand_type_register_match (i.types[3],
7316                                                        operand_types[3],
7317                                                        i.types[4],
7318                                                        operand_types[4]))
7319                     {
7320                       specific_error = progress (i.error);
7321                       continue;
7322                     }
7323                   /* Fall through.  */
7324                 case 4:
7325                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7326                   if (!operand_type_match (overlap3, i.types[3])
7327                       || ((check_register & 0xa) == 0xa
7328                           && !operand_type_register_match (i.types[1],
7329                                                             operand_types[1],
7330                                                             i.types[3],
7331                                                             operand_types[3]))
7332                       || ((check_register & 0xc) == 0xc
7333                           && !operand_type_register_match (i.types[2],
7334                                                             operand_types[2],
7335                                                             i.types[3],
7336                                                             operand_types[3])))
7337                     {
7338                       specific_error = progress (i.error);
7339                       continue;
7340                     }
7341                   /* Fall through.  */
7342                 case 3:
7343                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7344                   if (!operand_type_match (overlap2, i.types[2])
7345                       || ((check_register & 5) == 5
7346                           && !operand_type_register_match (i.types[0],
7347                                                             operand_types[0],
7348                                                             i.types[2],
7349                                                             operand_types[2]))
7350                       || ((check_register & 6) == 6
7351                           && !operand_type_register_match (i.types[1],
7352                                                             operand_types[1],
7353                                                             i.types[2],
7354                                                             operand_types[2])))
7355                     {
7356                       specific_error = progress (i.error);
7357                       continue;
7358                     }
7359                   break;
7360                 }
7361             }
7362           /* Found either forward/reverse 2, 3 or 4 operand match here:
7363              slip through to break.  */
7364         }
7365
7366       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7367       if (VEX_check_encoding (t))
7368         {
7369           specific_error = progress (i.error);
7370           continue;
7371         }
7372
7373       /* Check if vector operands are valid.  */
7374       if (check_VecOperands (t))
7375         {
7376           specific_error = progress (i.error);
7377           continue;
7378         }
7379
7380       /* We've found a match; break out of loop.  */
7381       break;
7382     }
7383
7384 #undef progress
7385
7386   if (t == current_templates->end)
7387     {
7388       /* We found no match.  */
7389       i.error = specific_error;
7390       return NULL;
7391     }
7392
7393   if (!quiet_warnings)
7394     {
7395       if (!intel_syntax
7396           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7397         as_warn (_("indirect %s without `*'"), insn_name (t));
7398
7399       if (t->opcode_modifier.isprefix
7400           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7401         {
7402           /* Warn them that a data or address size prefix doesn't
7403              affect assembly of the next line of code.  */
7404           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7405         }
7406     }
7407
7408   /* Copy the template we found.  */
7409   install_template (t);
7410
7411   if (addr_prefix_disp != -1)
7412     i.tm.operand_types[addr_prefix_disp]
7413       = operand_types[addr_prefix_disp];
7414
7415   switch (found_reverse_match)
7416     {
7417     case 0:
7418       break;
7419
7420     case Opcode_FloatR:
7421     case Opcode_FloatR | Opcode_FloatD:
7422       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7423       found_reverse_match &= Opcode_FloatD;
7424
7425       /* Fall through.  */
7426     default:
7427       /* If we found a reverse match we must alter the opcode direction
7428          bit and clear/flip the regmem modifier one.  found_reverse_match
7429          holds bits to change (different for int & float insns).  */
7430
7431       i.tm.base_opcode ^= found_reverse_match;
7432
7433       /* Certain SIMD insns have their load forms specified in the opcode
7434          table, and hence we need to _set_ RegMem instead of clearing it.
7435          We need to avoid setting the bit though on insns like KMOVW.  */
7436       i.tm.opcode_modifier.regmem
7437         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7438           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7439           && !i.tm.opcode_modifier.regmem;
7440
7441       /* Fall through.  */
7442     case ~0:
7443       i.tm.operand_types[0] = operand_types[i.operands - 1];
7444       i.tm.operand_types[i.operands - 1] = operand_types[0];
7445       break;
7446
7447     case Opcode_VexW:
7448       /* Only the first two register operands need reversing, alongside
7449          flipping VEX.W.  */
7450       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7451
7452       j = i.tm.operand_types[0].bitfield.imm8;
7453       i.tm.operand_types[j] = operand_types[j + 1];
7454       i.tm.operand_types[j + 1] = operand_types[j];
7455       break;
7456     }
7457
7458   return t;
7459 }
7460
7461 static int
7462 check_string (void)
7463 {
7464   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7465   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7466
7467   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7468     {
7469       as_bad (_("`%s' operand %u must use `%ses' segment"),
7470               insn_name (&i.tm),
7471               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7472               register_prefix);
7473       return 0;
7474     }
7475
7476   /* There's only ever one segment override allowed per instruction.
7477      This instruction possibly has a legal segment override on the
7478      second operand, so copy the segment to where non-string
7479      instructions store it, allowing common code.  */
7480   i.seg[op] = i.seg[1];
7481
7482   return 1;
7483 }
7484
7485 static int
7486 process_suffix (void)
7487 {
7488   bool is_movx = false;
7489
7490   /* If matched instruction specifies an explicit instruction mnemonic
7491      suffix, use it.  */
7492   if (i.tm.opcode_modifier.size == SIZE16)
7493     i.suffix = WORD_MNEM_SUFFIX;
7494   else if (i.tm.opcode_modifier.size == SIZE32)
7495     i.suffix = LONG_MNEM_SUFFIX;
7496   else if (i.tm.opcode_modifier.size == SIZE64)
7497     i.suffix = QWORD_MNEM_SUFFIX;
7498   else if (i.reg_operands
7499            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7500            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7501     {
7502       unsigned int numop = i.operands;
7503
7504       /* MOVSX/MOVZX */
7505       is_movx = (i.tm.opcode_space == SPACE_0F
7506                  && (i.tm.base_opcode | 8) == 0xbe)
7507                 || (i.tm.opcode_space == SPACE_BASE
7508                     && i.tm.base_opcode == 0x63
7509                     && is_cpu (&i.tm, Cpu64));
7510
7511       /* movsx/movzx want only their source operand considered here, for the
7512          ambiguity checking below.  The suffix will be replaced afterwards
7513          to represent the destination (register).  */
7514       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7515         --i.operands;
7516
7517       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7518       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
7519         i.rex |= REX_W;
7520
7521       /* If there's no instruction mnemonic suffix we try to invent one
7522          based on GPR operands.  */
7523       if (!i.suffix)
7524         {
7525           /* We take i.suffix from the last register operand specified,
7526              Destination register type is more significant than source
7527              register type.  crc32 in SSE4.2 prefers source register
7528              type. */
7529           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
7530
7531           while (op--)
7532             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7533                 || i.tm.operand_types[op].bitfield.instance == Accum)
7534               {
7535                 if (i.types[op].bitfield.class != Reg)
7536                   continue;
7537                 if (i.types[op].bitfield.byte)
7538                   i.suffix = BYTE_MNEM_SUFFIX;
7539                 else if (i.types[op].bitfield.word)
7540                   i.suffix = WORD_MNEM_SUFFIX;
7541                 else if (i.types[op].bitfield.dword)
7542                   i.suffix = LONG_MNEM_SUFFIX;
7543                 else if (i.types[op].bitfield.qword)
7544                   i.suffix = QWORD_MNEM_SUFFIX;
7545                 else
7546                   continue;
7547                 break;
7548               }
7549
7550           /* As an exception, movsx/movzx silently default to a byte source
7551              in AT&T mode.  */
7552           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7553             i.suffix = BYTE_MNEM_SUFFIX;
7554         }
7555       else if (i.suffix == BYTE_MNEM_SUFFIX)
7556         {
7557           if (!check_byte_reg ())
7558             return 0;
7559         }
7560       else if (i.suffix == LONG_MNEM_SUFFIX)
7561         {
7562           if (!check_long_reg ())
7563             return 0;
7564         }
7565       else if (i.suffix == QWORD_MNEM_SUFFIX)
7566         {
7567           if (!check_qword_reg ())
7568             return 0;
7569         }
7570       else if (i.suffix == WORD_MNEM_SUFFIX)
7571         {
7572           if (!check_word_reg ())
7573             return 0;
7574         }
7575       else if (intel_syntax
7576                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7577         /* Do nothing if the instruction is going to ignore the prefix.  */
7578         ;
7579       else
7580         abort ();
7581
7582       /* Undo the movsx/movzx change done above.  */
7583       i.operands = numop;
7584     }
7585   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7586            && !i.suffix)
7587     {
7588       i.suffix = stackop_size;
7589       if (stackop_size == LONG_MNEM_SUFFIX)
7590         {
7591           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7592              .code16gcc directive to support 16-bit mode with
7593              32-bit address.  For IRET without a suffix, generate
7594              16-bit IRET (opcode 0xcf) to return from an interrupt
7595              handler.  */
7596           if (i.tm.base_opcode == 0xcf)
7597             {
7598               i.suffix = WORD_MNEM_SUFFIX;
7599               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7600             }
7601           /* Warn about changed behavior for segment register push/pop.  */
7602           else if ((i.tm.base_opcode | 1) == 0x07)
7603             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7604                      insn_name (&i.tm));
7605         }
7606     }
7607   else if (!i.suffix
7608            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7609                || i.tm.opcode_modifier.jump == JUMP_BYTE
7610                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7611                || (i.tm.opcode_space == SPACE_0F
7612                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7613                    && i.tm.extension_opcode <= 3)))
7614     {
7615       switch (flag_code)
7616         {
7617         case CODE_64BIT:
7618           if (!i.tm.opcode_modifier.no_qsuf)
7619             {
7620               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7621                   || i.tm.opcode_modifier.no_lsuf)
7622                 i.suffix = QWORD_MNEM_SUFFIX;
7623               break;
7624             }
7625           /* Fall through.  */
7626         case CODE_32BIT:
7627           if (!i.tm.opcode_modifier.no_lsuf)
7628             i.suffix = LONG_MNEM_SUFFIX;
7629           break;
7630         case CODE_16BIT:
7631           if (!i.tm.opcode_modifier.no_wsuf)
7632             i.suffix = WORD_MNEM_SUFFIX;
7633           break;
7634         }
7635     }
7636
7637   if (!i.suffix
7638       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7639           /* Also cover lret/retf/iret in 64-bit mode.  */
7640           || (flag_code == CODE_64BIT
7641               && !i.tm.opcode_modifier.no_lsuf
7642               && !i.tm.opcode_modifier.no_qsuf))
7643       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7644       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7645       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7646       /* Accept FLDENV et al without suffix.  */
7647       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7648     {
7649       unsigned int suffixes, evex = 0;
7650
7651       suffixes = !i.tm.opcode_modifier.no_bsuf;
7652       if (!i.tm.opcode_modifier.no_wsuf)
7653         suffixes |= 1 << 1;
7654       if (!i.tm.opcode_modifier.no_lsuf)
7655         suffixes |= 1 << 2;
7656       if (!i.tm.opcode_modifier.no_ssuf)
7657         suffixes |= 1 << 4;
7658       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7659         suffixes |= 1 << 5;
7660
7661       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7662          also suitable for AT&T syntax mode, it was requested that this be
7663          restricted to just Intel syntax.  */
7664       if (intel_syntax && is_any_vex_encoding (&i.tm)
7665           && !i.broadcast.type && !i.broadcast.bytes)
7666         {
7667           unsigned int op;
7668
7669           for (op = 0; op < i.tm.operands; ++op)
7670             {
7671               if (vector_size < VSZ512)
7672                 {
7673                   i.tm.operand_types[op].bitfield.zmmword = 0;
7674                   if (vector_size < VSZ256)
7675                     {
7676                       i.tm.operand_types[op].bitfield.ymmword = 0;
7677                       if (i.tm.operand_types[op].bitfield.xmmword
7678                           && (i.tm.opcode_modifier.evex == EVEXDYN
7679                               || (!i.tm.opcode_modifier.evex
7680                                   && is_evex_encoding (&i.tm))))
7681                         i.tm.opcode_modifier.evex = EVEX128;
7682                     }
7683                   else if (i.tm.operand_types[op].bitfield.ymmword
7684                            && !i.tm.operand_types[op].bitfield.xmmword
7685                            && (i.tm.opcode_modifier.evex == EVEXDYN
7686                                || (!i.tm.opcode_modifier.evex
7687                                    && is_evex_encoding (&i.tm))))
7688                     i.tm.opcode_modifier.evex = EVEX256;
7689                 }
7690               else if (is_evex_encoding (&i.tm)
7691                        && !cpu_arch_flags.bitfield.cpuavx512vl)
7692                 {
7693                   if (i.tm.operand_types[op].bitfield.ymmword)
7694                     i.tm.operand_types[op].bitfield.xmmword = 0;
7695                   if (i.tm.operand_types[op].bitfield.zmmword)
7696                     i.tm.operand_types[op].bitfield.ymmword = 0;
7697                   if (!i.tm.opcode_modifier.evex
7698                       || i.tm.opcode_modifier.evex == EVEXDYN)
7699                     i.tm.opcode_modifier.evex = EVEX512;
7700                 }
7701
7702               if (i.tm.operand_types[op].bitfield.xmmword
7703                   + i.tm.operand_types[op].bitfield.ymmword
7704                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7705                 continue;
7706
7707               /* Any properly sized operand disambiguates the insn.  */
7708               if (i.types[op].bitfield.xmmword
7709                   || i.types[op].bitfield.ymmword
7710                   || i.types[op].bitfield.zmmword)
7711                 {
7712                   suffixes &= ~(7 << 6);
7713                   evex = 0;
7714                   break;
7715                 }
7716
7717               if ((i.flags[op] & Operand_Mem)
7718                   && i.tm.operand_types[op].bitfield.unspecified)
7719                 {
7720                   if (i.tm.operand_types[op].bitfield.xmmword)
7721                     suffixes |= 1 << 6;
7722                   if (i.tm.operand_types[op].bitfield.ymmword)
7723                     suffixes |= 1 << 7;
7724                   if (i.tm.operand_types[op].bitfield.zmmword)
7725                     suffixes |= 1 << 8;
7726                   if (is_evex_encoding (&i.tm))
7727                     evex = EVEX512;
7728                 }
7729             }
7730         }
7731
7732       /* Are multiple suffixes / operand sizes allowed?  */
7733       if (suffixes & (suffixes - 1))
7734         {
7735           if (intel_syntax
7736               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7737                   || operand_check == check_error))
7738             {
7739               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7740               return 0;
7741             }
7742           if (operand_check == check_error)
7743             {
7744               as_bad (_("no instruction mnemonic suffix given and "
7745                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7746               return 0;
7747             }
7748           if (operand_check == check_warning)
7749             as_warn (_("%s; using default for `%s'"),
7750                        intel_syntax
7751                        ? _("ambiguous operand size")
7752                        : _("no instruction mnemonic suffix given and "
7753                            "no register operands"),
7754                        insn_name (&i.tm));
7755
7756           if (i.tm.opcode_modifier.floatmf)
7757             i.suffix = SHORT_MNEM_SUFFIX;
7758           else if (is_movx)
7759             /* handled below */;
7760           else if (evex)
7761             i.tm.opcode_modifier.evex = evex;
7762           else if (flag_code == CODE_16BIT)
7763             i.suffix = WORD_MNEM_SUFFIX;
7764           else if (!i.tm.opcode_modifier.no_lsuf)
7765             i.suffix = LONG_MNEM_SUFFIX;
7766           else
7767             i.suffix = QWORD_MNEM_SUFFIX;
7768         }
7769     }
7770
7771   if (is_movx)
7772     {
7773       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7774          In AT&T syntax, if there is no suffix (warned about above), the default
7775          will be byte extension.  */
7776       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7777         i.tm.base_opcode |= 1;
7778
7779       /* For further processing, the suffix should represent the destination
7780          (register).  This is already the case when one was used with
7781          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7782          no suffix to begin with.  */
7783       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7784         {
7785           if (i.types[1].bitfield.word)
7786             i.suffix = WORD_MNEM_SUFFIX;
7787           else if (i.types[1].bitfield.qword)
7788             i.suffix = QWORD_MNEM_SUFFIX;
7789           else
7790             i.suffix = LONG_MNEM_SUFFIX;
7791
7792           i.tm.opcode_modifier.w = 0;
7793         }
7794     }
7795
7796   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7797     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7798                    != (i.tm.operand_types[1].bitfield.class == Reg);
7799
7800   /* Change the opcode based on the operand size given by i.suffix.  */
7801   switch (i.suffix)
7802     {
7803     /* Size floating point instruction.  */
7804     case LONG_MNEM_SUFFIX:
7805       if (i.tm.opcode_modifier.floatmf)
7806         {
7807           i.tm.base_opcode ^= 4;
7808           break;
7809         }
7810     /* fall through */
7811     case WORD_MNEM_SUFFIX:
7812     case QWORD_MNEM_SUFFIX:
7813       /* It's not a byte, select word/dword operation.  */
7814       if (i.tm.opcode_modifier.w)
7815         {
7816           if (i.short_form)
7817             i.tm.base_opcode |= 8;
7818           else
7819             i.tm.base_opcode |= 1;
7820         }
7821     /* fall through */
7822     case SHORT_MNEM_SUFFIX:
7823       /* Now select between word & dword operations via the operand
7824          size prefix, except for instructions that will ignore this
7825          prefix anyway.  */
7826       if (i.suffix != QWORD_MNEM_SUFFIX
7827           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7828           && !i.tm.opcode_modifier.floatmf
7829           && !is_any_vex_encoding (&i.tm)
7830           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7831               || (flag_code == CODE_64BIT
7832                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7833         {
7834           unsigned int prefix = DATA_PREFIX_OPCODE;
7835
7836           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7837             prefix = ADDR_PREFIX_OPCODE;
7838
7839           if (!add_prefix (prefix))
7840             return 0;
7841         }
7842
7843       /* Set mode64 for an operand.  */
7844       if (i.suffix == QWORD_MNEM_SUFFIX
7845           && flag_code == CODE_64BIT
7846           && !i.tm.opcode_modifier.norex64
7847           && !i.tm.opcode_modifier.vexw
7848           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7849              need rex64. */
7850           && ! (i.operands == 2
7851                 && i.tm.base_opcode == 0x90
7852                 && i.tm.opcode_space == SPACE_BASE
7853                 && i.types[0].bitfield.instance == Accum
7854                 && i.types[0].bitfield.qword
7855                 && i.types[1].bitfield.instance == Accum))
7856         i.rex |= REX_W;
7857
7858       break;
7859
7860     case 0:
7861       /* Select word/dword/qword operation with explicit data sizing prefix
7862          when there are no suitable register operands.  */
7863       if (i.tm.opcode_modifier.w
7864           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7865           && (!i.reg_operands
7866               || (i.reg_operands == 1
7867                       /* ShiftCount */
7868                   && (i.tm.operand_types[0].bitfield.instance == RegC
7869                       /* InOutPortReg */
7870                       || i.tm.operand_types[0].bitfield.instance == RegD
7871                       || i.tm.operand_types[1].bitfield.instance == RegD
7872                       || i.tm.mnem_off == MN_crc32))))
7873         i.tm.base_opcode |= 1;
7874       break;
7875     }
7876
7877   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7878     {
7879       gas_assert (!i.suffix);
7880       gas_assert (i.reg_operands);
7881
7882       if (i.tm.operand_types[0].bitfield.instance == Accum
7883           || i.operands == 1)
7884         {
7885           /* The address size override prefix changes the size of the
7886              first operand.  */
7887           if (flag_code == CODE_64BIT
7888               && i.op[0].regs->reg_type.bitfield.word)
7889             {
7890               as_bad (_("16-bit addressing unavailable for `%s'"),
7891                       insn_name (&i.tm));
7892               return 0;
7893             }
7894
7895           if ((flag_code == CODE_32BIT
7896                ? i.op[0].regs->reg_type.bitfield.word
7897                : i.op[0].regs->reg_type.bitfield.dword)
7898               && !add_prefix (ADDR_PREFIX_OPCODE))
7899             return 0;
7900         }
7901       else
7902         {
7903           /* Check invalid register operand when the address size override
7904              prefix changes the size of register operands.  */
7905           unsigned int op;
7906           enum { need_word, need_dword, need_qword } need;
7907
7908           /* Check the register operand for the address size prefix if
7909              the memory operand has no real registers, like symbol, DISP
7910              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7911           if (i.mem_operands == 1
7912               && i.reg_operands == 1
7913               && i.operands == 2
7914               && i.types[1].bitfield.class == Reg
7915               && (flag_code == CODE_32BIT
7916                   ? i.op[1].regs->reg_type.bitfield.word
7917                   : i.op[1].regs->reg_type.bitfield.dword)
7918               && ((i.base_reg == NULL && i.index_reg == NULL)
7919 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7920                   || (x86_elf_abi == X86_64_X32_ABI
7921                       && i.base_reg
7922                       && i.base_reg->reg_num == RegIP
7923                       && i.base_reg->reg_type.bitfield.qword))
7924 #else
7925                   || 0)
7926 #endif
7927               && !add_prefix (ADDR_PREFIX_OPCODE))
7928             return 0;
7929
7930           if (flag_code == CODE_32BIT)
7931             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7932           else if (i.prefix[ADDR_PREFIX])
7933             need = need_dword;
7934           else
7935             need = flag_code == CODE_64BIT ? need_qword : need_word;
7936
7937           for (op = 0; op < i.operands; op++)
7938             {
7939               if (i.types[op].bitfield.class != Reg)
7940                 continue;
7941
7942               switch (need)
7943                 {
7944                 case need_word:
7945                   if (i.op[op].regs->reg_type.bitfield.word)
7946                     continue;
7947                   break;
7948                 case need_dword:
7949                   if (i.op[op].regs->reg_type.bitfield.dword)
7950                     continue;
7951                   break;
7952                 case need_qword:
7953                   if (i.op[op].regs->reg_type.bitfield.qword)
7954                     continue;
7955                   break;
7956                 }
7957
7958               as_bad (_("invalid register operand size for `%s'"),
7959                       insn_name (&i.tm));
7960               return 0;
7961             }
7962         }
7963     }
7964
7965   return 1;
7966 }
7967
7968 static int
7969 check_byte_reg (void)
7970 {
7971   int op;
7972
7973   for (op = i.operands; --op >= 0;)
7974     {
7975       /* Skip non-register operands. */
7976       if (i.types[op].bitfield.class != Reg)
7977         continue;
7978
7979       /* If this is an eight bit register, it's OK.  If it's the 16 or
7980          32 bit version of an eight bit register, we will just use the
7981          low portion, and that's OK too.  */
7982       if (i.types[op].bitfield.byte)
7983         continue;
7984
7985       /* I/O port address operands are OK too.  */
7986       if (i.tm.operand_types[op].bitfield.instance == RegD
7987           && i.tm.operand_types[op].bitfield.word)
7988         continue;
7989
7990       /* crc32 only wants its source operand checked here.  */
7991       if (i.tm.mnem_off == MN_crc32 && op != 0)
7992         continue;
7993
7994       /* Any other register is bad.  */
7995       as_bad (_("`%s%s' not allowed with `%s%c'"),
7996               register_prefix, i.op[op].regs->reg_name,
7997               insn_name (&i.tm), i.suffix);
7998       return 0;
7999     }
8000   return 1;
8001 }
8002
8003 static int
8004 check_long_reg (void)
8005 {
8006   int op;
8007
8008   for (op = i.operands; --op >= 0;)
8009     /* Skip non-register operands. */
8010     if (i.types[op].bitfield.class != Reg)
8011       continue;
8012     /* Reject eight bit registers, except where the template requires
8013        them. (eg. movzb)  */
8014     else if (i.types[op].bitfield.byte
8015              && (i.tm.operand_types[op].bitfield.class == Reg
8016                  || i.tm.operand_types[op].bitfield.instance == Accum)
8017              && (i.tm.operand_types[op].bitfield.word
8018                  || i.tm.operand_types[op].bitfield.dword))
8019       {
8020         as_bad (_("`%s%s' not allowed with `%s%c'"),
8021                 register_prefix,
8022                 i.op[op].regs->reg_name,
8023                 insn_name (&i.tm),
8024                 i.suffix);
8025         return 0;
8026       }
8027     /* Error if the e prefix on a general reg is missing.  */
8028     else if (i.types[op].bitfield.word
8029              && (i.tm.operand_types[op].bitfield.class == Reg
8030                  || i.tm.operand_types[op].bitfield.instance == Accum)
8031              && i.tm.operand_types[op].bitfield.dword)
8032       {
8033         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8034                 register_prefix, i.op[op].regs->reg_name,
8035                 i.suffix);
8036         return 0;
8037       }
8038     /* Warn if the r prefix on a general reg is present.  */
8039     else if (i.types[op].bitfield.qword
8040              && (i.tm.operand_types[op].bitfield.class == Reg
8041                  || i.tm.operand_types[op].bitfield.instance == Accum)
8042              && i.tm.operand_types[op].bitfield.dword)
8043       {
8044         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8045                 register_prefix, i.op[op].regs->reg_name, i.suffix);
8046         return 0;
8047       }
8048   return 1;
8049 }
8050
8051 static int
8052 check_qword_reg (void)
8053 {
8054   int op;
8055
8056   for (op = i.operands; --op >= 0; )
8057     /* Skip non-register operands. */
8058     if (i.types[op].bitfield.class != Reg)
8059       continue;
8060     /* Reject eight bit registers, except where the template requires
8061        them. (eg. movzb)  */
8062     else if (i.types[op].bitfield.byte
8063              && (i.tm.operand_types[op].bitfield.class == Reg
8064                  || i.tm.operand_types[op].bitfield.instance == Accum)
8065              && (i.tm.operand_types[op].bitfield.word
8066                  || i.tm.operand_types[op].bitfield.dword))
8067       {
8068         as_bad (_("`%s%s' not allowed with `%s%c'"),
8069                 register_prefix,
8070                 i.op[op].regs->reg_name,
8071                 insn_name (&i.tm),
8072                 i.suffix);
8073         return 0;
8074       }
8075     /* Warn if the r prefix on a general reg is missing.  */
8076     else if ((i.types[op].bitfield.word
8077               || i.types[op].bitfield.dword)
8078              && (i.tm.operand_types[op].bitfield.class == Reg
8079                  || i.tm.operand_types[op].bitfield.instance == Accum)
8080              && i.tm.operand_types[op].bitfield.qword)
8081       {
8082         /* Prohibit these changes in the 64bit mode, since the
8083            lowering is more complicated.  */
8084         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8085                 register_prefix, i.op[op].regs->reg_name, i.suffix);
8086         return 0;
8087       }
8088   return 1;
8089 }
8090
8091 static int
8092 check_word_reg (void)
8093 {
8094   int op;
8095   for (op = i.operands; --op >= 0;)
8096     /* Skip non-register operands. */
8097     if (i.types[op].bitfield.class != Reg)
8098       continue;
8099     /* Reject eight bit registers, except where the template requires
8100        them. (eg. movzb)  */
8101     else if (i.types[op].bitfield.byte
8102              && (i.tm.operand_types[op].bitfield.class == Reg
8103                  || i.tm.operand_types[op].bitfield.instance == Accum)
8104              && (i.tm.operand_types[op].bitfield.word
8105                  || i.tm.operand_types[op].bitfield.dword))
8106       {
8107         as_bad (_("`%s%s' not allowed with `%s%c'"),
8108                 register_prefix,
8109                 i.op[op].regs->reg_name,
8110                 insn_name (&i.tm),
8111                 i.suffix);
8112         return 0;
8113       }
8114     /* Error if the e or r prefix on a general reg is present.  */
8115     else if ((i.types[op].bitfield.dword
8116                  || i.types[op].bitfield.qword)
8117              && (i.tm.operand_types[op].bitfield.class == Reg
8118                  || i.tm.operand_types[op].bitfield.instance == Accum)
8119              && i.tm.operand_types[op].bitfield.word)
8120       {
8121         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8122                 register_prefix, i.op[op].regs->reg_name,
8123                 i.suffix);
8124         return 0;
8125       }
8126   return 1;
8127 }
8128
8129 static int
8130 update_imm (unsigned int j)
8131 {
8132   i386_operand_type overlap = i.types[j];
8133
8134   if (i.tm.operand_types[j].bitfield.imm8
8135       && i.tm.operand_types[j].bitfield.imm8s
8136       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
8137     {
8138       /* This combination is used on 8-bit immediates where e.g. $~0 is
8139          desirable to permit.  We're past operand type matching, so simply
8140          put things back in the shape they were before introducing the
8141          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
8142       overlap.bitfield.imm8s = 0;
8143     }
8144
8145   if (overlap.bitfield.imm8
8146       + overlap.bitfield.imm8s
8147       + overlap.bitfield.imm16
8148       + overlap.bitfield.imm32
8149       + overlap.bitfield.imm32s
8150       + overlap.bitfield.imm64 > 1)
8151     {
8152       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
8153       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
8154       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
8155       static const i386_operand_type imm16_32 = { .bitfield =
8156         { .imm16 = 1, .imm32 = 1 }
8157       };
8158       static const i386_operand_type imm16_32s =  { .bitfield =
8159         { .imm16 = 1, .imm32s = 1 }
8160       };
8161       static const i386_operand_type imm16_32_32s = { .bitfield =
8162         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
8163       };
8164
8165       if (i.suffix)
8166         {
8167           i386_operand_type temp;
8168
8169           operand_type_set (&temp, 0);
8170           if (i.suffix == BYTE_MNEM_SUFFIX)
8171             {
8172               temp.bitfield.imm8 = overlap.bitfield.imm8;
8173               temp.bitfield.imm8s = overlap.bitfield.imm8s;
8174             }
8175           else if (i.suffix == WORD_MNEM_SUFFIX)
8176             temp.bitfield.imm16 = overlap.bitfield.imm16;
8177           else if (i.suffix == QWORD_MNEM_SUFFIX)
8178             {
8179               temp.bitfield.imm64 = overlap.bitfield.imm64;
8180               temp.bitfield.imm32s = overlap.bitfield.imm32s;
8181             }
8182           else
8183             temp.bitfield.imm32 = overlap.bitfield.imm32;
8184           overlap = temp;
8185         }
8186       else if (operand_type_equal (&overlap, &imm16_32_32s)
8187                || operand_type_equal (&overlap, &imm16_32)
8188                || operand_type_equal (&overlap, &imm16_32s))
8189         {
8190           if ((flag_code == CODE_16BIT)
8191               ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
8192             overlap = imm16;
8193           else
8194             overlap = imm32s;
8195         }
8196       else if (i.prefix[REX_PREFIX] & REX_W)
8197         overlap = operand_type_and (overlap, imm32s);
8198       else if (i.prefix[DATA_PREFIX])
8199         overlap = operand_type_and (overlap,
8200                                     flag_code != CODE_16BIT ? imm16 : imm32);
8201       if (overlap.bitfield.imm8
8202           + overlap.bitfield.imm8s
8203           + overlap.bitfield.imm16
8204           + overlap.bitfield.imm32
8205           + overlap.bitfield.imm32s
8206           + overlap.bitfield.imm64 != 1)
8207         {
8208           as_bad (_("no instruction mnemonic suffix given; "
8209                     "can't determine immediate size"));
8210           return 0;
8211         }
8212     }
8213   i.types[j] = overlap;
8214
8215   return 1;
8216 }
8217
8218 static int
8219 finalize_imm (void)
8220 {
8221   unsigned int j, n;
8222
8223   /* Update the first 2 immediate operands.  */
8224   n = i.operands > 2 ? 2 : i.operands;
8225   if (n)
8226     {
8227       for (j = 0; j < n; j++)
8228         if (update_imm (j) == 0)
8229           return 0;
8230
8231       /* The 3rd operand can't be immediate operand.  */
8232       gas_assert (operand_type_check (i.types[2], imm) == 0);
8233     }
8234
8235   return 1;
8236 }
8237
8238 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8239                                  bool do_sse2avx)
8240 {
8241   if (r->reg_flags & RegRex)
8242     {
8243       if (i.rex & rex_bit)
8244         as_bad (_("same type of prefix used twice"));
8245       i.rex |= rex_bit;
8246     }
8247   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8248     {
8249       gas_assert (i.vex.register_specifier == r);
8250       i.vex.register_specifier += 8;
8251     }
8252
8253   if (r->reg_flags & RegVRex)
8254     i.vrex |= rex_bit;
8255 }
8256
8257 static int
8258 process_operands (void)
8259 {
8260   /* Default segment register this instruction will use for memory
8261      accesses.  0 means unknown.  This is only for optimizing out
8262      unnecessary segment overrides.  */
8263   const reg_entry *default_seg = NULL;
8264
8265   /* We only need to check those implicit registers for instructions
8266      with 3 operands or less.  */
8267   if (i.operands <= 3)
8268     for (unsigned int j = 0; j < i.operands; j++)
8269       if (i.types[j].bitfield.instance != InstanceNone)
8270         i.reg_operands--;
8271
8272   if (i.tm.opcode_modifier.sse2avx)
8273     {
8274       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
8275          need converting.  */
8276       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
8277       i.prefix[REX_PREFIX] = 0;
8278       i.rex_encoding = 0;
8279     }
8280   /* ImmExt should be processed after SSE2AVX.  */
8281   else if (i.tm.opcode_modifier.immext)
8282     process_immext ();
8283
8284   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
8285      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
8286      new destination operand here, while converting the source one to register
8287      number 0.  */
8288   if (i.tm.mnem_off == MN_tilezero)
8289     {
8290       i.op[1].regs = i.op[0].regs;
8291       i.op[0].regs -= i.op[0].regs->reg_num;
8292       i.types[1] = i.types[0];
8293       i.tm.operand_types[1] = i.tm.operand_types[0];
8294       i.flags[1] = i.flags[0];
8295       i.operands++;
8296       i.reg_operands++;
8297       i.tm.operands++;
8298     }
8299
8300   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
8301     {
8302       static const i386_operand_type regxmm = {
8303         .bitfield = { .class = RegSIMD, .xmmword = 1 }
8304       };
8305       unsigned int dupl = i.operands;
8306       unsigned int dest = dupl - 1;
8307       unsigned int j;
8308
8309       /* The destination must be an xmm register.  */
8310       gas_assert (i.reg_operands
8311                   && MAX_OPERANDS > dupl
8312                   && operand_type_equal (&i.types[dest], &regxmm));
8313
8314       if (i.tm.operand_types[0].bitfield.instance == Accum
8315           && i.tm.operand_types[0].bitfield.xmmword)
8316         {
8317           /* Keep xmm0 for instructions with VEX prefix and 3
8318              sources.  */
8319           i.tm.operand_types[0].bitfield.instance = InstanceNone;
8320           i.tm.operand_types[0].bitfield.class = RegSIMD;
8321           i.reg_operands++;
8322           goto duplicate;
8323         }
8324
8325       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
8326         {
8327           gas_assert ((MAX_OPERANDS - 1) > dupl);
8328
8329           /* Add the implicit xmm0 for instructions with VEX prefix
8330              and 3 sources.  */
8331           for (j = i.operands; j > 0; j--)
8332             {
8333               i.op[j] = i.op[j - 1];
8334               i.types[j] = i.types[j - 1];
8335               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
8336               i.flags[j] = i.flags[j - 1];
8337             }
8338           i.op[0].regs
8339             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
8340           i.types[0] = regxmm;
8341           i.tm.operand_types[0] = regxmm;
8342
8343           i.operands += 2;
8344           i.reg_operands += 2;
8345           i.tm.operands += 2;
8346
8347           dupl++;
8348           dest++;
8349           i.op[dupl] = i.op[dest];
8350           i.types[dupl] = i.types[dest];
8351           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8352           i.flags[dupl] = i.flags[dest];
8353         }
8354       else
8355         {
8356         duplicate:
8357           i.operands++;
8358           i.reg_operands++;
8359           i.tm.operands++;
8360
8361           i.op[dupl] = i.op[dest];
8362           i.types[dupl] = i.types[dest];
8363           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8364           i.flags[dupl] = i.flags[dest];
8365         }
8366
8367        if (i.tm.opcode_modifier.immext)
8368          process_immext ();
8369     }
8370   else if (i.tm.operand_types[0].bitfield.instance == Accum
8371            && i.tm.opcode_modifier.modrm)
8372     {
8373       unsigned int j;
8374
8375       for (j = 1; j < i.operands; j++)
8376         {
8377           i.op[j - 1] = i.op[j];
8378           i.types[j - 1] = i.types[j];
8379
8380           /* We need to adjust fields in i.tm since they are used by
8381              build_modrm_byte.  */
8382           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8383
8384           i.flags[j - 1] = i.flags[j];
8385         }
8386
8387       /* No adjustment to i.reg_operands: This was already done at the top
8388          of the function.  */
8389       i.operands--;
8390       i.tm.operands--;
8391     }
8392   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8393     {
8394       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8395
8396       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8397       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8398       regnum = register_number (i.op[1].regs);
8399       first_reg_in_group = regnum & ~3;
8400       last_reg_in_group = first_reg_in_group + 3;
8401       if (regnum != first_reg_in_group)
8402         as_warn (_("source register `%s%s' implicitly denotes"
8403                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8404                  register_prefix, i.op[1].regs->reg_name,
8405                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8406                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8407                  insn_name (&i.tm));
8408     }
8409   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8410     {
8411       /* The imul $imm, %reg instruction is converted into
8412          imul $imm, %reg, %reg, and the clr %reg instruction
8413          is converted into xor %reg, %reg.  */
8414
8415       unsigned int first_reg_op;
8416
8417       if (operand_type_check (i.types[0], reg))
8418         first_reg_op = 0;
8419       else
8420         first_reg_op = 1;
8421       /* Pretend we saw the extra register operand.  */
8422       gas_assert (i.reg_operands == 1
8423                   && i.op[first_reg_op + 1].regs == 0);
8424       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8425       i.types[first_reg_op + 1] = i.types[first_reg_op];
8426       i.operands++;
8427       i.reg_operands++;
8428     }
8429
8430   if (i.tm.opcode_modifier.modrm)
8431     {
8432       /* The opcode is completed (modulo i.tm.extension_opcode which
8433          must be put into the modrm byte).  Now, we make the modrm and
8434          index base bytes based on all the info we've collected.  */
8435
8436       default_seg = build_modrm_byte ();
8437
8438       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8439         {
8440           /* Warn about some common errors, but press on regardless.  */
8441           if (i.operands == 2)
8442             {
8443               /* Reversed arguments on faddp or fmulp.  */
8444               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8445                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8446                        register_prefix, i.op[intel_syntax].regs->reg_name);
8447             }
8448           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8449             {
8450               /* Extraneous `l' suffix on fp insn.  */
8451               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8452                        register_prefix, i.op[0].regs->reg_name);
8453             }
8454         }
8455     }
8456   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
8457     {
8458       if (flag_code != CODE_64BIT
8459           ? i.tm.base_opcode == POP_SEG_SHORT
8460             && i.op[0].regs->reg_num == 1
8461           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8462             && i.op[0].regs->reg_num < 4)
8463         {
8464           as_bad (_("you can't `%s %s%s'"),
8465                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8466           return 0;
8467         }
8468       if (i.op[0].regs->reg_num > 3
8469           && i.tm.opcode_space == SPACE_BASE )
8470         {
8471           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8472           i.tm.opcode_space = SPACE_0F;
8473         }
8474       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8475     }
8476   else if (i.tm.opcode_space == SPACE_BASE
8477            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8478     {
8479       default_seg = reg_ds;
8480     }
8481   else if (i.tm.opcode_modifier.isstring)
8482     {
8483       /* For the string instructions that allow a segment override
8484          on one of their operands, the default segment is ds.  */
8485       default_seg = reg_ds;
8486     }
8487   else if (i.short_form)
8488     {
8489       /* The register operand is in the 1st or 2nd non-immediate operand.  */
8490       const reg_entry *r = i.op[i.imm_operands].regs;
8491
8492       if (!dot_insn ()
8493           && r->reg_type.bitfield.instance == Accum
8494           && i.op[i.imm_operands + 1].regs)
8495         r = i.op[i.imm_operands + 1].regs;
8496       /* Register goes in low 3 bits of opcode.  */
8497       i.tm.base_opcode |= r->reg_num;
8498       set_rex_vrex (r, REX_B, false);
8499
8500       if (dot_insn () && i.reg_operands == 2)
8501         {
8502           gas_assert (is_any_vex_encoding (&i.tm)
8503                       || i.vec_encoding != vex_encoding_default);
8504           i.vex.register_specifier = i.op[i.operands - 1].regs;
8505         }
8506     }
8507   else if (i.reg_operands == 1
8508            && !i.flags[i.operands - 1]
8509            && i.tm.operand_types[i.operands - 1].bitfield.instance
8510               == InstanceNone)
8511     {
8512       gas_assert (is_any_vex_encoding (&i.tm)
8513                   || i.vec_encoding != vex_encoding_default);
8514       i.vex.register_specifier = i.op[i.operands - 1].regs;
8515     }
8516
8517   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8518       && i.tm.mnem_off == MN_lea)
8519     {
8520       if (!quiet_warnings)
8521         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8522       if (optimize && !i.no_optimize)
8523         {
8524           i.seg[0] = NULL;
8525           i.prefix[SEG_PREFIX] = 0;
8526         }
8527     }
8528
8529   /* If a segment was explicitly specified, and the specified segment
8530      is neither the default nor the one already recorded from a prefix,
8531      use an opcode prefix to select it.  If we never figured out what
8532      the default segment is, then default_seg will be zero at this
8533      point, and the specified segment prefix will always be used.  */
8534   if (i.seg[0]
8535       && i.seg[0] != default_seg
8536       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8537     {
8538       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8539         return 0;
8540     }
8541   return 1;
8542 }
8543
8544 static const reg_entry *
8545 build_modrm_byte (void)
8546 {
8547   const reg_entry *default_seg = NULL;
8548   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
8549                         /* Compensate for kludge in md_assemble().  */
8550                         + i.tm.operand_types[0].bitfield.imm1;
8551   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
8552   unsigned int v, op, reg_slot = ~0;
8553
8554   /* Accumulator (in particular %st), shift count (%cl), and alike need
8555      to be skipped just like immediate operands do.  */
8556   if (i.tm.operand_types[source].bitfield.instance)
8557     ++source;
8558   while (i.tm.operand_types[dest].bitfield.instance)
8559     --dest;
8560
8561   for (op = source; op < i.operands; ++op)
8562     if (i.tm.operand_types[op].bitfield.baseindex)
8563       break;
8564
8565   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
8566     {
8567       expressionS *exp;
8568
8569       /* There are 2 kinds of instructions:
8570          1. 5 operands: 4 register operands or 3 register operands
8571          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8572          VexW0 or VexW1.  The destination must be either XMM, YMM or
8573          ZMM register.
8574          2. 4 operands: 4 register operands or 3 register operands
8575          plus 1 memory operand, with VexXDS.
8576          3. Other equivalent combinations when coming from s_insn().  */
8577       gas_assert (i.tm.opcode_modifier.vexvvvv
8578                   && i.tm.opcode_modifier.vexw);
8579       gas_assert (dot_insn ()
8580                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
8581
8582       /* Of the first two non-immediate operands the one with the template
8583          not allowing for a memory one is encoded in the immediate operand.  */
8584       if (source == op)
8585         reg_slot = source + 1;
8586       else
8587         reg_slot = source++;
8588
8589       if (!dot_insn ())
8590         {
8591           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8592           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
8593         }
8594       else
8595         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
8596
8597       if (i.imm_operands == 0)
8598         {
8599           /* When there is no immediate operand, generate an 8bit
8600              immediate operand to encode the first operand.  */
8601           exp = &im_expressions[i.imm_operands++];
8602           i.op[i.operands].imms = exp;
8603           i.types[i.operands].bitfield.imm8 = 1;
8604           i.operands++;
8605
8606           exp->X_op = O_constant;
8607         }
8608       else
8609         {
8610           gas_assert (i.imm_operands == 1);
8611           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8612           gas_assert (!i.tm.opcode_modifier.immext);
8613
8614           /* Turn on Imm8 again so that output_imm will generate it.  */
8615           i.types[0].bitfield.imm8 = 1;
8616
8617           exp = i.op[0].imms;
8618         }
8619       exp->X_add_number |= register_number (i.op[reg_slot].regs)
8620                            << (3 + !(is_evex_encoding (&i.tm)
8621                                      || i.vec_encoding == vex_encoding_evex));
8622     }
8623
8624   for (v = source + 1; v < dest; ++v)
8625     if (v != reg_slot)
8626       break;
8627   if (v >= dest)
8628     v = ~0;
8629   if (i.tm.extension_opcode != None)
8630     {
8631       if (dest != source)
8632         v = dest;
8633       dest = ~0;
8634     }
8635   gas_assert (source < dest);
8636   if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
8637       && source != op)
8638     {
8639       unsigned int tmp = source;
8640
8641       source = v;
8642       v = tmp;
8643     }
8644
8645   if (v < MAX_OPERANDS)
8646     {
8647       gas_assert (i.tm.opcode_modifier.vexvvvv);
8648       i.vex.register_specifier = i.op[v].regs;
8649     }
8650
8651   if (op < i.operands)
8652     {
8653       if (i.mem_operands)
8654         {
8655           unsigned int fake_zero_displacement = 0;
8656
8657           gas_assert (i.flags[op] & Operand_Mem);
8658
8659           if (i.tm.opcode_modifier.sib)
8660             {
8661               /* The index register of VSIB shouldn't be RegIZ.  */
8662               if (i.tm.opcode_modifier.sib != SIBMEM
8663                   && i.index_reg->reg_num == RegIZ)
8664                 abort ();
8665
8666               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8667               if (!i.base_reg)
8668                 {
8669                   i.sib.base = NO_BASE_REGISTER;
8670                   i.sib.scale = i.log2_scale_factor;
8671                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8672                   i.types[op].bitfield.disp32 = 1;
8673                 }
8674
8675               /* Since the mandatory SIB always has index register, so
8676                  the code logic remains unchanged. The non-mandatory SIB
8677                  without index register is allowed and will be handled
8678                  later.  */
8679               if (i.index_reg)
8680                 {
8681                   if (i.index_reg->reg_num == RegIZ)
8682                     i.sib.index = NO_INDEX_REGISTER;
8683                   else
8684                     i.sib.index = i.index_reg->reg_num;
8685                   set_rex_vrex (i.index_reg, REX_X, false);
8686                 }
8687             }
8688
8689           default_seg = reg_ds;
8690
8691           if (i.base_reg == 0)
8692             {
8693               i.rm.mode = 0;
8694               if (!i.disp_operands)
8695                 fake_zero_displacement = 1;
8696               if (i.index_reg == 0)
8697                 {
8698                   /* Both check for VSIB and mandatory non-vector SIB. */
8699                   gas_assert (!i.tm.opcode_modifier.sib
8700                               || i.tm.opcode_modifier.sib == SIBMEM);
8701                   /* Operand is just <disp>  */
8702                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8703                   if (flag_code == CODE_64BIT)
8704                     {
8705                       /* 64bit mode overwrites the 32bit absolute
8706                          addressing by RIP relative addressing and
8707                          absolute addressing is encoded by one of the
8708                          redundant SIB forms.  */
8709                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8710                       i.sib.base = NO_BASE_REGISTER;
8711                       i.sib.index = NO_INDEX_REGISTER;
8712                       i.types[op].bitfield.disp32 = 1;
8713                     }
8714                   else if ((flag_code == CODE_16BIT)
8715                            ^ (i.prefix[ADDR_PREFIX] != 0))
8716                     {
8717                       i.rm.regmem = NO_BASE_REGISTER_16;
8718                       i.types[op].bitfield.disp16 = 1;
8719                     }
8720                   else
8721                     {
8722                       i.rm.regmem = NO_BASE_REGISTER;
8723                       i.types[op].bitfield.disp32 = 1;
8724                     }
8725                 }
8726               else if (!i.tm.opcode_modifier.sib)
8727                 {
8728                   /* !i.base_reg && i.index_reg  */
8729                   if (i.index_reg->reg_num == RegIZ)
8730                     i.sib.index = NO_INDEX_REGISTER;
8731                   else
8732                     i.sib.index = i.index_reg->reg_num;
8733                   i.sib.base = NO_BASE_REGISTER;
8734                   i.sib.scale = i.log2_scale_factor;
8735                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8736                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8737                   i.types[op].bitfield.disp32 = 1;
8738                   if ((i.index_reg->reg_flags & RegRex) != 0)
8739                     i.rex |= REX_X;
8740                 }
8741             }
8742           /* RIP addressing for 64bit mode.  */
8743           else if (i.base_reg->reg_num == RegIP)
8744             {
8745               gas_assert (!i.tm.opcode_modifier.sib);
8746               i.rm.regmem = NO_BASE_REGISTER;
8747               i.types[op].bitfield.disp8 = 0;
8748               i.types[op].bitfield.disp16 = 0;
8749               i.types[op].bitfield.disp32 = 1;
8750               i.types[op].bitfield.disp64 = 0;
8751               i.flags[op] |= Operand_PCrel;
8752               if (! i.disp_operands)
8753                 fake_zero_displacement = 1;
8754             }
8755           else if (i.base_reg->reg_type.bitfield.word)
8756             {
8757               gas_assert (!i.tm.opcode_modifier.sib);
8758               switch (i.base_reg->reg_num)
8759                 {
8760                 case 3: /* (%bx)  */
8761                   if (i.index_reg == 0)
8762                     i.rm.regmem = 7;
8763                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8764                     i.rm.regmem = i.index_reg->reg_num - 6;
8765                   break;
8766                 case 5: /* (%bp)  */
8767                   default_seg = reg_ss;
8768                   if (i.index_reg == 0)
8769                     {
8770                       i.rm.regmem = 6;
8771                       if (operand_type_check (i.types[op], disp) == 0)
8772                         {
8773                           /* fake (%bp) into 0(%bp)  */
8774                           if (i.disp_encoding == disp_encoding_16bit)
8775                             i.types[op].bitfield.disp16 = 1;
8776                           else
8777                             i.types[op].bitfield.disp8 = 1;
8778                           fake_zero_displacement = 1;
8779                         }
8780                     }
8781                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8782                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8783                   break;
8784                 default: /* (%si) -> 4 or (%di) -> 5  */
8785                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8786                 }
8787               if (!fake_zero_displacement
8788                   && !i.disp_operands
8789                   && i.disp_encoding)
8790                 {
8791                   fake_zero_displacement = 1;
8792                   if (i.disp_encoding == disp_encoding_8bit)
8793                     i.types[op].bitfield.disp8 = 1;
8794                   else
8795                     i.types[op].bitfield.disp16 = 1;
8796                 }
8797               i.rm.mode = mode_from_disp_size (i.types[op]);
8798             }
8799           else /* i.base_reg and 32/64 bit mode  */
8800             {
8801               if (operand_type_check (i.types[op], disp))
8802                 {
8803                   i.types[op].bitfield.disp16 = 0;
8804                   i.types[op].bitfield.disp64 = 0;
8805                   i.types[op].bitfield.disp32 = 1;
8806                 }
8807
8808               if (!i.tm.opcode_modifier.sib)
8809                 i.rm.regmem = i.base_reg->reg_num;
8810               if ((i.base_reg->reg_flags & RegRex) != 0)
8811                 i.rex |= REX_B;
8812               i.sib.base = i.base_reg->reg_num;
8813               /* x86-64 ignores REX prefix bit here to avoid decoder
8814                  complications.  */
8815               if (!(i.base_reg->reg_flags & RegRex)
8816                   && (i.base_reg->reg_num == EBP_REG_NUM
8817                    || i.base_reg->reg_num == ESP_REG_NUM))
8818                   default_seg = reg_ss;
8819               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8820                 {
8821                   fake_zero_displacement = 1;
8822                   if (i.disp_encoding == disp_encoding_32bit)
8823                     i.types[op].bitfield.disp32 = 1;
8824                   else
8825                     i.types[op].bitfield.disp8 = 1;
8826                 }
8827               i.sib.scale = i.log2_scale_factor;
8828               if (i.index_reg == 0)
8829                 {
8830                   /* Only check for VSIB. */
8831                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8832                               && i.tm.opcode_modifier.sib != VECSIB256
8833                               && i.tm.opcode_modifier.sib != VECSIB512);
8834
8835                   /* <disp>(%esp) becomes two byte modrm with no index
8836                      register.  We've already stored the code for esp
8837                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8838                      Any base register besides %esp will not use the
8839                      extra modrm byte.  */
8840                   i.sib.index = NO_INDEX_REGISTER;
8841                 }
8842               else if (!i.tm.opcode_modifier.sib)
8843                 {
8844                   if (i.index_reg->reg_num == RegIZ)
8845                     i.sib.index = NO_INDEX_REGISTER;
8846                   else
8847                     i.sib.index = i.index_reg->reg_num;
8848                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8849                   if ((i.index_reg->reg_flags & RegRex) != 0)
8850                     i.rex |= REX_X;
8851                 }
8852
8853               if (i.disp_operands
8854                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8855                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8856                 i.rm.mode = 0;
8857               else
8858                 {
8859                   if (!fake_zero_displacement
8860                       && !i.disp_operands
8861                       && i.disp_encoding)
8862                     {
8863                       fake_zero_displacement = 1;
8864                       if (i.disp_encoding == disp_encoding_8bit)
8865                         i.types[op].bitfield.disp8 = 1;
8866                       else
8867                         i.types[op].bitfield.disp32 = 1;
8868                     }
8869                   i.rm.mode = mode_from_disp_size (i.types[op]);
8870                 }
8871             }
8872
8873           if (fake_zero_displacement)
8874             {
8875               /* Fakes a zero displacement assuming that i.types[op]
8876                  holds the correct displacement size.  */
8877               expressionS *exp;
8878
8879               gas_assert (i.op[op].disps == 0);
8880               exp = &disp_expressions[i.disp_operands++];
8881               i.op[op].disps = exp;
8882               exp->X_op = O_constant;
8883               exp->X_add_number = 0;
8884               exp->X_add_symbol = (symbolS *) 0;
8885               exp->X_op_symbol = (symbolS *) 0;
8886             }
8887         }
8888     else
8889         {
8890       i.rm.mode = 3;
8891       i.rm.regmem = i.op[op].regs->reg_num;
8892       set_rex_vrex (i.op[op].regs, REX_B, false);
8893         }
8894
8895       if (op == dest)
8896         dest = ~0;
8897       if (op == source)
8898         source = ~0;
8899     }
8900   else
8901     {
8902       i.rm.mode = 3;
8903       if (!i.tm.opcode_modifier.regmem)
8904         {
8905           gas_assert (source < MAX_OPERANDS);
8906           i.rm.regmem = i.op[source].regs->reg_num;
8907           set_rex_vrex (i.op[source].regs, REX_B,
8908                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
8909           source = ~0;
8910         }
8911       else
8912         {
8913           gas_assert (dest < MAX_OPERANDS);
8914           i.rm.regmem = i.op[dest].regs->reg_num;
8915           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8916           dest = ~0;
8917         }
8918     }
8919
8920   /* Fill in i.rm.reg field with extension opcode (if any) or the
8921      appropriate register.  */
8922   if (i.tm.extension_opcode != None)
8923     i.rm.reg = i.tm.extension_opcode;
8924   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
8925     {
8926       i.rm.reg = i.op[dest].regs->reg_num;
8927       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8928     }
8929   else
8930     {
8931       gas_assert (source < MAX_OPERANDS);
8932       i.rm.reg = i.op[source].regs->reg_num;
8933       set_rex_vrex (i.op[source].regs, REX_R, false);
8934     }
8935
8936   if (flag_code != CODE_64BIT && (i.rex & REX_R))
8937     {
8938       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
8939       i.rex &= ~REX_R;
8940       add_prefix (LOCK_PREFIX_OPCODE);
8941     }
8942
8943   return default_seg;
8944 }
8945
8946 static INLINE void
8947 frag_opcode_byte (unsigned char byte)
8948 {
8949   if (now_seg != absolute_section)
8950     FRAG_APPEND_1_CHAR (byte);
8951   else
8952     ++abs_section_offset;
8953 }
8954
8955 static unsigned int
8956 flip_code16 (unsigned int code16)
8957 {
8958   gas_assert (i.tm.operands == 1);
8959
8960   return !(i.prefix[REX_PREFIX] & REX_W)
8961          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8962                     : i.tm.operand_types[0].bitfield.disp16)
8963          ? CODE16 : 0;
8964 }
8965
8966 static void
8967 output_branch (void)
8968 {
8969   char *p;
8970   int size;
8971   int code16;
8972   int prefix;
8973   relax_substateT subtype;
8974   symbolS *sym;
8975   offsetT off;
8976
8977   if (now_seg == absolute_section)
8978     {
8979       as_bad (_("relaxable branches not supported in absolute section"));
8980       return;
8981     }
8982
8983   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
8984   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
8985
8986   prefix = 0;
8987   if (i.prefix[DATA_PREFIX] != 0)
8988     {
8989       prefix = 1;
8990       i.prefixes -= 1;
8991       code16 ^= flip_code16(code16);
8992     }
8993   /* Pentium4 branch hints.  */
8994   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
8995       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
8996     {
8997       prefix++;
8998       i.prefixes--;
8999     }
9000   if (i.prefix[REX_PREFIX] != 0)
9001     {
9002       prefix++;
9003       i.prefixes--;
9004     }
9005
9006   /* BND prefixed jump.  */
9007   if (i.prefix[BND_PREFIX] != 0)
9008     {
9009       prefix++;
9010       i.prefixes--;
9011     }
9012
9013   if (i.prefixes != 0)
9014     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9015
9016   /* It's always a symbol;  End frag & setup for relax.
9017      Make sure there is enough room in this frag for the largest
9018      instruction we may generate in md_convert_frag.  This is 2
9019      bytes for the opcode and room for the prefix and largest
9020      displacement.  */
9021   frag_grow (prefix + 2 + 4);
9022   /* Prefix and 1 opcode byte go in fr_fix.  */
9023   p = frag_more (prefix + 1);
9024   if (i.prefix[DATA_PREFIX] != 0)
9025     *p++ = DATA_PREFIX_OPCODE;
9026   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
9027       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
9028     *p++ = i.prefix[SEG_PREFIX];
9029   if (i.prefix[BND_PREFIX] != 0)
9030     *p++ = BND_PREFIX_OPCODE;
9031   if (i.prefix[REX_PREFIX] != 0)
9032     *p++ = i.prefix[REX_PREFIX];
9033   *p = i.tm.base_opcode;
9034
9035   if ((unsigned char) *p == JUMP_PC_RELATIVE)
9036     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
9037   else if (cpu_arch_flags.bitfield.cpui386)
9038     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
9039   else
9040     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
9041   subtype |= code16;
9042
9043   sym = i.op[0].disps->X_add_symbol;
9044   off = i.op[0].disps->X_add_number;
9045
9046   if (i.op[0].disps->X_op != O_constant
9047       && i.op[0].disps->X_op != O_symbol)
9048     {
9049       /* Handle complex expressions.  */
9050       sym = make_expr_symbol (i.op[0].disps);
9051       off = 0;
9052     }
9053
9054   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
9055
9056   /* 1 possible extra opcode + 4 byte displacement go in var part.
9057      Pass reloc in fr_var.  */
9058   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
9059 }
9060
9061 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9062 /* Return TRUE iff PLT32 relocation should be used for branching to
9063    symbol S.  */
9064
9065 static bool
9066 need_plt32_p (symbolS *s)
9067 {
9068   /* PLT32 relocation is ELF only.  */
9069   if (!IS_ELF)
9070     return false;
9071
9072 #ifdef TE_SOLARIS
9073   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
9074      krtld support it.  */
9075   return false;
9076 #endif
9077
9078   /* Since there is no need to prepare for PLT branch on x86-64, we
9079      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
9080      be used as a marker for 32-bit PC-relative branches.  */
9081   if (!object_64bit)
9082     return false;
9083
9084   if (s == NULL)
9085     return false;
9086
9087   /* Weak or undefined symbol need PLT32 relocation.  */
9088   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
9089     return true;
9090
9091   /* Non-global symbol doesn't need PLT32 relocation.  */
9092   if (! S_IS_EXTERNAL (s))
9093     return false;
9094
9095   /* Other global symbols need PLT32 relocation.  NB: Symbol with
9096      non-default visibilities are treated as normal global symbol
9097      so that PLT32 relocation can be used as a marker for 32-bit
9098      PC-relative branches.  It is useful for linker relaxation.  */
9099   return true;
9100 }
9101 #endif
9102
9103 static void
9104 output_jump (void)
9105 {
9106   char *p;
9107   int size;
9108   fixS *fixP;
9109   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
9110
9111   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
9112     {
9113       /* This is a loop or jecxz type instruction.  */
9114       size = 1;
9115       if (i.prefix[ADDR_PREFIX] != 0)
9116         {
9117           frag_opcode_byte (ADDR_PREFIX_OPCODE);
9118           i.prefixes -= 1;
9119         }
9120       /* Pentium4 branch hints.  */
9121       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9122           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9123         {
9124           frag_opcode_byte (i.prefix[SEG_PREFIX]);
9125           i.prefixes--;
9126         }
9127     }
9128   else
9129     {
9130       int code16;
9131
9132       code16 = 0;
9133       if (flag_code == CODE_16BIT)
9134         code16 = CODE16;
9135
9136       if (i.prefix[DATA_PREFIX] != 0)
9137         {
9138           frag_opcode_byte (DATA_PREFIX_OPCODE);
9139           i.prefixes -= 1;
9140           code16 ^= flip_code16(code16);
9141         }
9142
9143       size = 4;
9144       if (code16)
9145         size = 2;
9146     }
9147
9148   /* BND prefixed jump.  */
9149   if (i.prefix[BND_PREFIX] != 0)
9150     {
9151       frag_opcode_byte (i.prefix[BND_PREFIX]);
9152       i.prefixes -= 1;
9153     }
9154
9155   if (i.prefix[REX_PREFIX] != 0)
9156     {
9157       frag_opcode_byte (i.prefix[REX_PREFIX]);
9158       i.prefixes -= 1;
9159     }
9160
9161   if (i.prefixes != 0)
9162     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9163
9164   if (now_seg == absolute_section)
9165     {
9166       abs_section_offset += i.opcode_length + size;
9167       return;
9168     }
9169
9170   p = frag_more (i.opcode_length + size);
9171   switch (i.opcode_length)
9172     {
9173     case 2:
9174       *p++ = i.tm.base_opcode >> 8;
9175       /* Fall through.  */
9176     case 1:
9177       *p++ = i.tm.base_opcode;
9178       break;
9179     default:
9180       abort ();
9181     }
9182
9183 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9184   if (flag_code == CODE_64BIT && size == 4
9185       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9186       && need_plt32_p (i.op[0].disps->X_add_symbol))
9187     jump_reloc = BFD_RELOC_X86_64_PLT32;
9188 #endif
9189
9190   jump_reloc = reloc (size, 1, 1, jump_reloc);
9191
9192   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9193                       i.op[0].disps, 1, jump_reloc);
9194
9195   /* All jumps handled here are signed, but don't unconditionally use a
9196      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9197      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9198      respectively.  */
9199   switch (size)
9200     {
9201     case 1:
9202       fixP->fx_signed = 1;
9203       break;
9204
9205     case 2:
9206       if (i.tm.mnem_off == MN_xbegin)
9207         fixP->fx_signed = 1;
9208       break;
9209
9210     case 4:
9211       if (flag_code == CODE_64BIT)
9212         fixP->fx_signed = 1;
9213       break;
9214     }
9215 }
9216
9217 static void
9218 output_interseg_jump (void)
9219 {
9220   char *p;
9221   int size;
9222   int prefix;
9223   int code16;
9224
9225   code16 = 0;
9226   if (flag_code == CODE_16BIT)
9227     code16 = CODE16;
9228
9229   prefix = 0;
9230   if (i.prefix[DATA_PREFIX] != 0)
9231     {
9232       prefix = 1;
9233       i.prefixes -= 1;
9234       code16 ^= CODE16;
9235     }
9236
9237   gas_assert (!i.prefix[REX_PREFIX]);
9238
9239   size = 4;
9240   if (code16)
9241     size = 2;
9242
9243   if (i.prefixes != 0)
9244     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9245
9246   if (now_seg == absolute_section)
9247     {
9248       abs_section_offset += prefix + 1 + 2 + size;
9249       return;
9250     }
9251
9252   /* 1 opcode; 2 segment; offset  */
9253   p = frag_more (prefix + 1 + 2 + size);
9254
9255   if (i.prefix[DATA_PREFIX] != 0)
9256     *p++ = DATA_PREFIX_OPCODE;
9257
9258   if (i.prefix[REX_PREFIX] != 0)
9259     *p++ = i.prefix[REX_PREFIX];
9260
9261   *p++ = i.tm.base_opcode;
9262   if (i.op[1].imms->X_op == O_constant)
9263     {
9264       offsetT n = i.op[1].imms->X_add_number;
9265
9266       if (size == 2
9267           && !fits_in_unsigned_word (n)
9268           && !fits_in_signed_word (n))
9269         {
9270           as_bad (_("16-bit jump out of range"));
9271           return;
9272         }
9273       md_number_to_chars (p, n, size);
9274     }
9275   else
9276     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9277                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9278
9279   p += size;
9280   if (i.op[0].imms->X_op == O_constant)
9281     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9282   else
9283     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9284                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9285 }
9286
9287 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9288 void
9289 x86_cleanup (void)
9290 {
9291   char *p;
9292   asection *seg = now_seg;
9293   subsegT subseg = now_subseg;
9294   asection *sec;
9295   unsigned int alignment, align_size_1;
9296   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9297   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9298   unsigned int padding;
9299
9300   if (!IS_ELF || !x86_used_note)
9301     return;
9302
9303   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9304
9305   /* The .note.gnu.property section layout:
9306
9307      Field      Length          Contents
9308      ----       ----            ----
9309      n_namsz    4               4
9310      n_descsz   4               The note descriptor size
9311      n_type     4               NT_GNU_PROPERTY_TYPE_0
9312      n_name     4               "GNU"
9313      n_desc     n_descsz        The program property array
9314      ....       ....            ....
9315    */
9316
9317   /* Create the .note.gnu.property section.  */
9318   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9319   bfd_set_section_flags (sec,
9320                          (SEC_ALLOC
9321                           | SEC_LOAD
9322                           | SEC_DATA
9323                           | SEC_HAS_CONTENTS
9324                           | SEC_READONLY));
9325
9326   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9327     {
9328       align_size_1 = 7;
9329       alignment = 3;
9330     }
9331   else
9332     {
9333       align_size_1 = 3;
9334       alignment = 2;
9335     }
9336
9337   bfd_set_section_alignment (sec, alignment);
9338   elf_section_type (sec) = SHT_NOTE;
9339
9340   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9341                                   + 4-byte data  */
9342   isa_1_descsz_raw = 4 + 4 + 4;
9343   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9344   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9345
9346   feature_2_descsz_raw = isa_1_descsz;
9347   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9348                                       + 4-byte data  */
9349   feature_2_descsz_raw += 4 + 4 + 4;
9350   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9351   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9352                       & ~align_size_1);
9353
9354   descsz = feature_2_descsz;
9355   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9356   p = frag_more (4 + 4 + 4 + 4 + descsz);
9357
9358   /* Write n_namsz.  */
9359   md_number_to_chars (p, (valueT) 4, 4);
9360
9361   /* Write n_descsz.  */
9362   md_number_to_chars (p + 4, (valueT) descsz, 4);
9363
9364   /* Write n_type.  */
9365   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9366
9367   /* Write n_name.  */
9368   memcpy (p + 4 * 3, "GNU", 4);
9369
9370   /* Write 4-byte type.  */
9371   md_number_to_chars (p + 4 * 4,
9372                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9373
9374   /* Write 4-byte data size.  */
9375   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9376
9377   /* Write 4-byte data.  */
9378   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9379
9380   /* Zero out paddings.  */
9381   padding = isa_1_descsz - isa_1_descsz_raw;
9382   if (padding)
9383     memset (p + 4 * 7, 0, padding);
9384
9385   /* Write 4-byte type.  */
9386   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9387                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9388
9389   /* Write 4-byte data size.  */
9390   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9391
9392   /* Write 4-byte data.  */
9393   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9394                       (valueT) x86_feature_2_used, 4);
9395
9396   /* Zero out paddings.  */
9397   padding = feature_2_descsz - feature_2_descsz_raw;
9398   if (padding)
9399     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9400
9401   /* We probably can't restore the current segment, for there likely
9402      isn't one yet...  */
9403   if (seg && subseg)
9404     subseg_set (seg, subseg);
9405 }
9406
9407 bool
9408 x86_support_sframe_p (void)
9409 {
9410   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9411   return (x86_elf_abi == X86_64_ABI);
9412 }
9413
9414 bool
9415 x86_sframe_ra_tracking_p (void)
9416 {
9417   /* In AMD64, return address is always stored on the stack at a fixed offset
9418      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9419      Do not track explicitly via an SFrame Frame Row Entry.  */
9420   return false;
9421 }
9422
9423 offsetT
9424 x86_sframe_cfa_ra_offset (void)
9425 {
9426   gas_assert (x86_elf_abi == X86_64_ABI);
9427   return (offsetT) -8;
9428 }
9429
9430 unsigned char
9431 x86_sframe_get_abi_arch (void)
9432 {
9433   unsigned char sframe_abi_arch = 0;
9434
9435   if (x86_support_sframe_p ())
9436     {
9437       gas_assert (!target_big_endian);
9438       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9439     }
9440
9441   return sframe_abi_arch;
9442 }
9443
9444 #endif
9445
9446 static unsigned int
9447 encoding_length (const fragS *start_frag, offsetT start_off,
9448                  const char *frag_now_ptr)
9449 {
9450   unsigned int len = 0;
9451
9452   if (start_frag != frag_now)
9453     {
9454       const fragS *fr = start_frag;
9455
9456       do {
9457         len += fr->fr_fix;
9458         fr = fr->fr_next;
9459       } while (fr && fr != frag_now);
9460     }
9461
9462   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9463 }
9464
9465 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9466    be macro-fused with conditional jumps.
9467    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9468    or is one of the following format:
9469
9470     cmp m, imm
9471     add m, imm
9472     sub m, imm
9473    test m, imm
9474     and m, imm
9475     inc m
9476     dec m
9477
9478    it is unfusible.  */
9479
9480 static int
9481 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9482 {
9483   /* No RIP address.  */
9484   if (i.base_reg && i.base_reg->reg_num == RegIP)
9485     return 0;
9486
9487   /* No opcodes outside of base encoding space.  */
9488   if (i.tm.opcode_space != SPACE_BASE)
9489     return 0;
9490
9491   /* add, sub without add/sub m, imm.  */
9492   if (i.tm.base_opcode <= 5
9493       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9494       || ((i.tm.base_opcode | 3) == 0x83
9495           && (i.tm.extension_opcode == 0x5
9496               || i.tm.extension_opcode == 0x0)))
9497     {
9498       *mf_cmp_p = mf_cmp_alu_cmp;
9499       return !(i.mem_operands && i.imm_operands);
9500     }
9501
9502   /* and without and m, imm.  */
9503   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9504       || ((i.tm.base_opcode | 3) == 0x83
9505           && i.tm.extension_opcode == 0x4))
9506     {
9507       *mf_cmp_p = mf_cmp_test_and;
9508       return !(i.mem_operands && i.imm_operands);
9509     }
9510
9511   /* test without test m imm.  */
9512   if ((i.tm.base_opcode | 1) == 0x85
9513       || (i.tm.base_opcode | 1) == 0xa9
9514       || ((i.tm.base_opcode | 1) == 0xf7
9515           && i.tm.extension_opcode == 0))
9516     {
9517       *mf_cmp_p = mf_cmp_test_and;
9518       return !(i.mem_operands && i.imm_operands);
9519     }
9520
9521   /* cmp without cmp m, imm.  */
9522   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9523       || ((i.tm.base_opcode | 3) == 0x83
9524           && (i.tm.extension_opcode == 0x7)))
9525     {
9526       *mf_cmp_p = mf_cmp_alu_cmp;
9527       return !(i.mem_operands && i.imm_operands);
9528     }
9529
9530   /* inc, dec without inc/dec m.   */
9531   if ((is_cpu (&i.tm, CpuNo64)
9532        && (i.tm.base_opcode | 0xf) == 0x4f)
9533       || ((i.tm.base_opcode | 1) == 0xff
9534           && i.tm.extension_opcode <= 0x1))
9535     {
9536       *mf_cmp_p = mf_cmp_incdec;
9537       return !i.mem_operands;
9538     }
9539
9540   return 0;
9541 }
9542
9543 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9544
9545 static int
9546 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9547 {
9548   /* NB: Don't work with COND_JUMP86 without i386.  */
9549   if (!align_branch_power
9550       || now_seg == absolute_section
9551       || !cpu_arch_flags.bitfield.cpui386
9552       || !(align_branch & align_branch_fused_bit))
9553     return 0;
9554
9555   if (maybe_fused_with_jcc_p (mf_cmp_p))
9556     {
9557       if (last_insn.kind == last_insn_other
9558           || last_insn.seg != now_seg)
9559         return 1;
9560       if (flag_debug)
9561         as_warn_where (last_insn.file, last_insn.line,
9562                        _("`%s` skips -malign-branch-boundary on `%s`"),
9563                        last_insn.name, insn_name (&i.tm));
9564     }
9565
9566   return 0;
9567 }
9568
9569 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9570
9571 static int
9572 add_branch_prefix_frag_p (void)
9573 {
9574   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9575      to PadLock instructions since they include prefixes in opcode.  */
9576   if (!align_branch_power
9577       || !align_branch_prefix_size
9578       || now_seg == absolute_section
9579       || is_cpu (&i.tm, CpuPadLock)
9580       || !cpu_arch_flags.bitfield.cpui386)
9581     return 0;
9582
9583   /* Don't add prefix if it is a prefix or there is no operand in case
9584      that segment prefix is special.  */
9585   if (!i.operands || i.tm.opcode_modifier.isprefix)
9586     return 0;
9587
9588   if (last_insn.kind == last_insn_other
9589       || last_insn.seg != now_seg)
9590     return 1;
9591
9592   if (flag_debug)
9593     as_warn_where (last_insn.file, last_insn.line,
9594                    _("`%s` skips -malign-branch-boundary on `%s`"),
9595                    last_insn.name, insn_name (&i.tm));
9596
9597   return 0;
9598 }
9599
9600 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9601
9602 static int
9603 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9604                            enum mf_jcc_kind *mf_jcc_p)
9605 {
9606   int add_padding;
9607
9608   /* NB: Don't work with COND_JUMP86 without i386.  */
9609   if (!align_branch_power
9610       || now_seg == absolute_section
9611       || !cpu_arch_flags.bitfield.cpui386
9612       || i.tm.opcode_space != SPACE_BASE)
9613     return 0;
9614
9615   add_padding = 0;
9616
9617   /* Check for jcc and direct jmp.  */
9618   if (i.tm.opcode_modifier.jump == JUMP)
9619     {
9620       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9621         {
9622           *branch_p = align_branch_jmp;
9623           add_padding = align_branch & align_branch_jmp_bit;
9624         }
9625       else
9626         {
9627           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9628              igore the lowest bit.  */
9629           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9630           *branch_p = align_branch_jcc;
9631           if ((align_branch & align_branch_jcc_bit))
9632             add_padding = 1;
9633         }
9634     }
9635   else if ((i.tm.base_opcode | 1) == 0xc3)
9636     {
9637       /* Near ret.  */
9638       *branch_p = align_branch_ret;
9639       if ((align_branch & align_branch_ret_bit))
9640         add_padding = 1;
9641     }
9642   else
9643     {
9644       /* Check for indirect jmp, direct and indirect calls.  */
9645       if (i.tm.base_opcode == 0xe8)
9646         {
9647           /* Direct call.  */
9648           *branch_p = align_branch_call;
9649           if ((align_branch & align_branch_call_bit))
9650             add_padding = 1;
9651         }
9652       else if (i.tm.base_opcode == 0xff
9653                && (i.tm.extension_opcode == 2
9654                    || i.tm.extension_opcode == 4))
9655         {
9656           /* Indirect call and jmp.  */
9657           *branch_p = align_branch_indirect;
9658           if ((align_branch & align_branch_indirect_bit))
9659             add_padding = 1;
9660         }
9661
9662       if (add_padding
9663           && i.disp_operands
9664           && tls_get_addr
9665           && (i.op[0].disps->X_op == O_symbol
9666               || (i.op[0].disps->X_op == O_subtract
9667                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9668         {
9669           symbolS *s = i.op[0].disps->X_add_symbol;
9670           /* No padding to call to global or undefined tls_get_addr.  */
9671           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9672               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9673             return 0;
9674         }
9675     }
9676
9677   if (add_padding
9678       && last_insn.kind != last_insn_other
9679       && last_insn.seg == now_seg)
9680     {
9681       if (flag_debug)
9682         as_warn_where (last_insn.file, last_insn.line,
9683                        _("`%s` skips -malign-branch-boundary on `%s`"),
9684                        last_insn.name, insn_name (&i.tm));
9685       return 0;
9686     }
9687
9688   return add_padding;
9689 }
9690
9691 static void
9692 output_insn (void)
9693 {
9694   fragS *insn_start_frag;
9695   offsetT insn_start_off;
9696   fragS *fragP = NULL;
9697   enum align_branch_kind branch = align_branch_none;
9698   /* The initializer is arbitrary just to avoid uninitialized error.
9699      it's actually either assigned in add_branch_padding_frag_p
9700      or never be used.  */
9701   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9702
9703 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9704   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9705     {
9706       if ((i.xstate & xstate_tmm) == xstate_tmm
9707           || is_cpu (&i.tm, CpuAMX_TILE))
9708         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9709
9710       if (is_cpu (&i.tm, Cpu8087)
9711           || is_cpu (&i.tm, Cpu287)
9712           || is_cpu (&i.tm, Cpu387)
9713           || is_cpu (&i.tm, Cpu687)
9714           || is_cpu (&i.tm, CpuFISTTP))
9715         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9716
9717       if ((i.xstate & xstate_mmx)
9718           || i.tm.mnem_off == MN_emms
9719           || i.tm.mnem_off == MN_femms)
9720         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9721
9722       if (i.index_reg)
9723         {
9724           if (i.index_reg->reg_type.bitfield.zmmword)
9725             i.xstate |= xstate_zmm;
9726           else if (i.index_reg->reg_type.bitfield.ymmword)
9727             i.xstate |= xstate_ymm;
9728           else if (i.index_reg->reg_type.bitfield.xmmword)
9729             i.xstate |= xstate_xmm;
9730         }
9731
9732       /* vzeroall / vzeroupper */
9733       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
9734         i.xstate |= xstate_ymm;
9735
9736       if ((i.xstate & xstate_xmm)
9737           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9738           || (i.tm.base_opcode == 0xae
9739               && (is_cpu (&i.tm, CpuSSE)
9740                   || is_cpu (&i.tm, CpuAVX)))
9741           || is_cpu (&i.tm, CpuWideKL)
9742           || is_cpu (&i.tm, CpuKL))
9743         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9744
9745       if ((i.xstate & xstate_ymm) == xstate_ymm)
9746         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9747       if ((i.xstate & xstate_zmm) == xstate_zmm)
9748         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9749       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9750         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9751       if (is_cpu (&i.tm, CpuFXSR))
9752         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9753       if (is_cpu (&i.tm, CpuXsave))
9754         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9755       if (is_cpu (&i.tm, CpuXsaveopt))
9756         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9757       if (is_cpu (&i.tm, CpuXSAVEC))
9758         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9759
9760       if (x86_feature_2_used
9761           || is_cpu (&i.tm, CpuCMOV)
9762           || is_cpu (&i.tm, CpuSYSCALL)
9763           || i.tm.mnem_off == MN_cmpxchg8b)
9764         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9765       if (is_cpu (&i.tm, CpuSSE3)
9766           || is_cpu (&i.tm, CpuSSSE3)
9767           || is_cpu (&i.tm, CpuSSE4_1)
9768           || is_cpu (&i.tm, CpuSSE4_2)
9769           || is_cpu (&i.tm, CpuCX16)
9770           || is_cpu (&i.tm, CpuPOPCNT)
9771           /* LAHF-SAHF insns in 64-bit mode.  */
9772           || (flag_code == CODE_64BIT
9773               && (i.tm.base_opcode | 1) == 0x9f
9774               && i.tm.opcode_space == SPACE_BASE))
9775         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9776       if (is_cpu (&i.tm, CpuAVX)
9777           || is_cpu (&i.tm, CpuAVX2)
9778           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9779              XOP, FMA4, LPW, TBM, and AMX.  */
9780           || (i.tm.opcode_modifier.vex
9781               && !is_cpu (&i.tm, CpuAVX512F)
9782               && !is_cpu (&i.tm, CpuAVX512BW)
9783               && !is_cpu (&i.tm, CpuAVX512DQ)
9784               && !is_cpu (&i.tm, CpuXOP)
9785               && !is_cpu (&i.tm, CpuFMA4)
9786               && !is_cpu (&i.tm, CpuLWP)
9787               && !is_cpu (&i.tm, CpuTBM)
9788               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9789           || is_cpu (&i.tm, CpuF16C)
9790           || is_cpu (&i.tm, CpuFMA)
9791           || is_cpu (&i.tm, CpuLZCNT)
9792           || is_cpu (&i.tm, CpuMovbe)
9793           || is_cpu (&i.tm, CpuXSAVES)
9794           || (x86_feature_2_used
9795               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9796                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9797                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9798         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9799       if (is_cpu (&i.tm, CpuAVX512F)
9800           || is_cpu (&i.tm, CpuAVX512BW)
9801           || is_cpu (&i.tm, CpuAVX512DQ)
9802           || is_cpu (&i.tm, CpuAVX512VL)
9803           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9804              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9805           || (i.tm.opcode_modifier.evex
9806               && !is_cpu (&i.tm, CpuAVX512ER)
9807               && !is_cpu (&i.tm, CpuAVX512PF)
9808               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
9809               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
9810         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9811     }
9812 #endif
9813
9814   /* Tie dwarf2 debug info to the address at the start of the insn.
9815      We can't do this after the insn has been output as the current
9816      frag may have been closed off.  eg. by frag_var.  */
9817   dwarf2_emit_insn (0);
9818
9819   insn_start_frag = frag_now;
9820   insn_start_off = frag_now_fix ();
9821
9822   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9823     {
9824       char *p;
9825       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9826       unsigned int max_branch_padding_size = 14;
9827
9828       /* Align section to boundary.  */
9829       record_alignment (now_seg, align_branch_power);
9830
9831       /* Make room for padding.  */
9832       frag_grow (max_branch_padding_size);
9833
9834       /* Start of the padding.  */
9835       p = frag_more (0);
9836
9837       fragP = frag_now;
9838
9839       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9840                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9841                 NULL, 0, p);
9842
9843       fragP->tc_frag_data.mf_type = mf_jcc;
9844       fragP->tc_frag_data.branch_type = branch;
9845       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9846     }
9847
9848   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9849       && !pre_386_16bit_warned)
9850     {
9851       as_warn (_("use .code16 to ensure correct addressing mode"));
9852       pre_386_16bit_warned = true;
9853     }
9854
9855   /* Output jumps.  */
9856   if (i.tm.opcode_modifier.jump == JUMP)
9857     output_branch ();
9858   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9859            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9860     output_jump ();
9861   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9862     output_interseg_jump ();
9863   else
9864     {
9865       /* Output normal instructions here.  */
9866       char *p;
9867       unsigned char *q;
9868       unsigned int j;
9869       enum mf_cmp_kind mf_cmp;
9870
9871       if (avoid_fence
9872           && (i.tm.base_opcode == 0xaee8
9873               || i.tm.base_opcode == 0xaef0
9874               || i.tm.base_opcode == 0xaef8))
9875         {
9876           /* Encode lfence, mfence, and sfence as
9877              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9878           if (flag_code == CODE_16BIT)
9879             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9880           else if (omit_lock_prefix)
9881             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9882                     insn_name (&i.tm));
9883           else if (now_seg != absolute_section)
9884             {
9885               offsetT val = 0x240483f0ULL;
9886
9887               p = frag_more (5);
9888               md_number_to_chars (p, val, 5);
9889             }
9890           else
9891             abs_section_offset += 5;
9892           return;
9893         }
9894
9895       /* Some processors fail on LOCK prefix. This options makes
9896          assembler ignore LOCK prefix and serves as a workaround.  */
9897       if (omit_lock_prefix)
9898         {
9899           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9900               && i.tm.opcode_modifier.isprefix)
9901             return;
9902           i.prefix[LOCK_PREFIX] = 0;
9903         }
9904
9905       if (branch)
9906         /* Skip if this is a branch.  */
9907         ;
9908       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9909         {
9910           /* Make room for padding.  */
9911           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9912           p = frag_more (0);
9913
9914           fragP = frag_now;
9915
9916           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9917                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9918                     NULL, 0, p);
9919
9920           fragP->tc_frag_data.mf_type = mf_cmp;
9921           fragP->tc_frag_data.branch_type = align_branch_fused;
9922           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9923         }
9924       else if (add_branch_prefix_frag_p ())
9925         {
9926           unsigned int max_prefix_size = align_branch_prefix_size;
9927
9928           /* Make room for padding.  */
9929           frag_grow (max_prefix_size);
9930           p = frag_more (0);
9931
9932           fragP = frag_now;
9933
9934           frag_var (rs_machine_dependent, max_prefix_size, 0,
9935                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9936                     NULL, 0, p);
9937
9938           fragP->tc_frag_data.max_bytes = max_prefix_size;
9939         }
9940
9941       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9942          don't need the explicit prefix.  */
9943       if (!is_any_vex_encoding (&i.tm))
9944         {
9945           switch (i.tm.opcode_modifier.opcodeprefix)
9946             {
9947             case PREFIX_0X66:
9948               add_prefix (0x66);
9949               break;
9950             case PREFIX_0XF2:
9951               add_prefix (0xf2);
9952               break;
9953             case PREFIX_0XF3:
9954               if (!is_cpu (&i.tm, CpuPadLock)
9955                   || (i.prefix[REP_PREFIX] != 0xf3))
9956                 add_prefix (0xf3);
9957               break;
9958             case PREFIX_NONE:
9959               switch (i.opcode_length)
9960                 {
9961                 case 2:
9962                   break;
9963                 case 1:
9964                   /* Check for pseudo prefixes.  */
9965                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9966                     break;
9967                   as_bad_where (insn_start_frag->fr_file,
9968                                 insn_start_frag->fr_line,
9969                                 _("pseudo prefix without instruction"));
9970                   return;
9971                 default:
9972                   abort ();
9973                 }
9974               break;
9975             default:
9976               abort ();
9977             }
9978
9979 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
9980           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
9981              R_X86_64_GOTTPOFF relocation so that linker can safely
9982              perform IE->LE optimization.  A dummy REX_OPCODE prefix
9983              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
9984              relocation for GDesc -> IE/LE optimization.  */
9985           if (x86_elf_abi == X86_64_X32_ABI
9986               && i.operands == 2
9987               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
9988                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
9989               && i.prefix[REX_PREFIX] == 0)
9990             add_prefix (REX_OPCODE);
9991 #endif
9992
9993           /* The prefix bytes.  */
9994           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
9995             if (*q)
9996               frag_opcode_byte (*q);
9997         }
9998       else
9999         {
10000           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
10001             if (*q)
10002               switch (j)
10003                 {
10004                 case SEG_PREFIX:
10005                 case ADDR_PREFIX:
10006                   frag_opcode_byte (*q);
10007                   break;
10008                 default:
10009                   /* There should be no other prefixes for instructions
10010                      with VEX prefix.  */
10011                   abort ();
10012                 }
10013
10014           /* For EVEX instructions i.vrex should become 0 after
10015              build_evex_prefix.  For VEX instructions upper 16 registers
10016              aren't available, so VREX should be 0.  */
10017           if (i.vrex)
10018             abort ();
10019           /* Now the VEX prefix.  */
10020           if (now_seg != absolute_section)
10021             {
10022               p = frag_more (i.vex.length);
10023               for (j = 0; j < i.vex.length; j++)
10024                 p[j] = i.vex.bytes[j];
10025             }
10026           else
10027             abs_section_offset += i.vex.length;
10028         }
10029
10030       /* Now the opcode; be careful about word order here!  */
10031       j = i.opcode_length;
10032       if (!i.vex.length)
10033         switch (i.tm.opcode_space)
10034           {
10035           case SPACE_BASE:
10036             break;
10037           case SPACE_0F:
10038             ++j;
10039             break;
10040           case SPACE_0F38:
10041           case SPACE_0F3A:
10042             j += 2;
10043             break;
10044           default:
10045             abort ();
10046           }
10047
10048       if (now_seg == absolute_section)
10049         abs_section_offset += j;
10050       else if (j == 1)
10051         {
10052           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
10053         }
10054       else
10055         {
10056           p = frag_more (j);
10057           if (!i.vex.length
10058               && i.tm.opcode_space != SPACE_BASE)
10059             {
10060               *p++ = 0x0f;
10061               if (i.tm.opcode_space != SPACE_0F)
10062                 *p++ = i.tm.opcode_space == SPACE_0F38
10063                        ? 0x38 : 0x3a;
10064             }
10065
10066           switch (i.opcode_length)
10067             {
10068             case 2:
10069               /* Put out high byte first: can't use md_number_to_chars!  */
10070               *p++ = (i.tm.base_opcode >> 8) & 0xff;
10071               /* Fall through.  */
10072             case 1:
10073               *p = i.tm.base_opcode & 0xff;
10074               break;
10075             default:
10076               abort ();
10077               break;
10078             }
10079
10080         }
10081
10082       /* Now the modrm byte and sib byte (if present).  */
10083       if (i.tm.opcode_modifier.modrm)
10084         {
10085           frag_opcode_byte ((i.rm.regmem << 0)
10086                              | (i.rm.reg << 3)
10087                              | (i.rm.mode << 6));
10088           /* If i.rm.regmem == ESP (4)
10089              && i.rm.mode != (Register mode)
10090              && not 16 bit
10091              ==> need second modrm byte.  */
10092           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
10093               && i.rm.mode != 3
10094               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
10095             frag_opcode_byte ((i.sib.base << 0)
10096                               | (i.sib.index << 3)
10097                               | (i.sib.scale << 6));
10098         }
10099
10100       if (i.disp_operands)
10101         output_disp (insn_start_frag, insn_start_off);
10102
10103       if (i.imm_operands)
10104         output_imm (insn_start_frag, insn_start_off);
10105
10106       /*
10107        * frag_now_fix () returning plain abs_section_offset when we're in the
10108        * absolute section, and abs_section_offset not getting updated as data
10109        * gets added to the frag breaks the logic below.
10110        */
10111       if (now_seg != absolute_section)
10112         {
10113           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
10114           if (j > 15)
10115             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
10116                      j);
10117           else if (fragP)
10118             {
10119               /* NB: Don't add prefix with GOTPC relocation since
10120                  output_disp() above depends on the fixed encoding
10121                  length.  Can't add prefix with TLS relocation since
10122                  it breaks TLS linker optimization.  */
10123               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
10124               /* Prefix count on the current instruction.  */
10125               unsigned int count = i.vex.length;
10126               unsigned int k;
10127               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
10128                 /* REX byte is encoded in VEX/EVEX prefix.  */
10129                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10130                   count++;
10131
10132               /* Count prefixes for extended opcode maps.  */
10133               if (!i.vex.length)
10134                 switch (i.tm.opcode_space)
10135                   {
10136                   case SPACE_BASE:
10137                     break;
10138                   case SPACE_0F:
10139                     count++;
10140                     break;
10141                   case SPACE_0F38:
10142                   case SPACE_0F3A:
10143                     count += 2;
10144                     break;
10145                   default:
10146                     abort ();
10147                   }
10148
10149               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10150                   == BRANCH_PREFIX)
10151                 {
10152                   /* Set the maximum prefix size in BRANCH_PREFIX
10153                      frag.  */
10154                   if (fragP->tc_frag_data.max_bytes > max)
10155                     fragP->tc_frag_data.max_bytes = max;
10156                   if (fragP->tc_frag_data.max_bytes > count)
10157                     fragP->tc_frag_data.max_bytes -= count;
10158                   else
10159                     fragP->tc_frag_data.max_bytes = 0;
10160                 }
10161               else
10162                 {
10163                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10164                      frag.  */
10165                   unsigned int max_prefix_size;
10166                   if (align_branch_prefix_size > max)
10167                     max_prefix_size = max;
10168                   else
10169                     max_prefix_size = align_branch_prefix_size;
10170                   if (max_prefix_size > count)
10171                     fragP->tc_frag_data.max_prefix_length
10172                       = max_prefix_size - count;
10173                 }
10174
10175               /* Use existing segment prefix if possible.  Use CS
10176                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10177                  segment prefix with ESP/EBP base register and use DS
10178                  segment prefix without ESP/EBP base register.  */
10179               if (i.prefix[SEG_PREFIX])
10180                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10181               else if (flag_code == CODE_64BIT)
10182                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10183               else if (i.base_reg
10184                        && (i.base_reg->reg_num == 4
10185                            || i.base_reg->reg_num == 5))
10186                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10187               else
10188                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10189             }
10190         }
10191     }
10192
10193   /* NB: Don't work with COND_JUMP86 without i386.  */
10194   if (align_branch_power
10195       && now_seg != absolute_section
10196       && cpu_arch_flags.bitfield.cpui386)
10197     {
10198       /* Terminate each frag so that we can add prefix and check for
10199          fused jcc.  */
10200       frag_wane (frag_now);
10201       frag_new (0);
10202     }
10203
10204 #ifdef DEBUG386
10205   if (flag_debug)
10206     {
10207       pi ("" /*line*/, &i);
10208     }
10209 #endif /* DEBUG386  */
10210 }
10211
10212 /* Return the size of the displacement operand N.  */
10213
10214 static int
10215 disp_size (unsigned int n)
10216 {
10217   int size = 4;
10218
10219   if (i.types[n].bitfield.disp64)
10220     size = 8;
10221   else if (i.types[n].bitfield.disp8)
10222     size = 1;
10223   else if (i.types[n].bitfield.disp16)
10224     size = 2;
10225   return size;
10226 }
10227
10228 /* Return the size of the immediate operand N.  */
10229
10230 static int
10231 imm_size (unsigned int n)
10232 {
10233   int size = 4;
10234   if (i.types[n].bitfield.imm64)
10235     size = 8;
10236   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10237     size = 1;
10238   else if (i.types[n].bitfield.imm16)
10239     size = 2;
10240   return size;
10241 }
10242
10243 static void
10244 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10245 {
10246   char *p;
10247   unsigned int n;
10248
10249   for (n = 0; n < i.operands; n++)
10250     {
10251       if (operand_type_check (i.types[n], disp))
10252         {
10253           int size = disp_size (n);
10254
10255           if (now_seg == absolute_section)
10256             abs_section_offset += size;
10257           else if (i.op[n].disps->X_op == O_constant)
10258             {
10259               offsetT val = i.op[n].disps->X_add_number;
10260
10261               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10262                                      size);
10263               p = frag_more (size);
10264               md_number_to_chars (p, val, size);
10265             }
10266           else
10267             {
10268               enum bfd_reloc_code_real reloc_type;
10269               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10270               bool sign = (flag_code == CODE_64BIT && size == 4
10271                            && (!want_disp32 (&i.tm)
10272                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10273                                    && !i.types[n].bitfield.baseindex)))
10274                           || pcrel;
10275               fixS *fixP;
10276
10277               /* We can't have 8 bit displacement here.  */
10278               gas_assert (!i.types[n].bitfield.disp8);
10279
10280               /* The PC relative address is computed relative
10281                  to the instruction boundary, so in case immediate
10282                  fields follows, we need to adjust the value.  */
10283               if (pcrel && i.imm_operands)
10284                 {
10285                   unsigned int n1;
10286                   int sz = 0;
10287
10288                   for (n1 = 0; n1 < i.operands; n1++)
10289                     if (operand_type_check (i.types[n1], imm))
10290                       {
10291                         /* Only one immediate is allowed for PC
10292                            relative address, except with .insn.  */
10293                         gas_assert (sz == 0 || dot_insn ());
10294                         sz += imm_size (n1);
10295                       }
10296                   /* We should find at least one immediate.  */
10297                   gas_assert (sz != 0);
10298                   i.op[n].disps->X_add_number -= sz;
10299                 }
10300
10301               p = frag_more (size);
10302               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10303               if (GOT_symbol
10304                   && GOT_symbol == i.op[n].disps->X_add_symbol
10305                   && (((reloc_type == BFD_RELOC_32
10306                         || reloc_type == BFD_RELOC_X86_64_32S
10307                         || (reloc_type == BFD_RELOC_64
10308                             && object_64bit))
10309                        && (i.op[n].disps->X_op == O_symbol
10310                            || (i.op[n].disps->X_op == O_add
10311                                && ((symbol_get_value_expression
10312                                     (i.op[n].disps->X_op_symbol)->X_op)
10313                                    == O_subtract))))
10314                       || reloc_type == BFD_RELOC_32_PCREL))
10315                 {
10316                   if (!object_64bit)
10317                     {
10318                       reloc_type = BFD_RELOC_386_GOTPC;
10319                       i.has_gotpc_tls_reloc = true;
10320                       i.op[n].disps->X_add_number +=
10321                         encoding_length (insn_start_frag, insn_start_off, p);
10322                     }
10323                   else if (reloc_type == BFD_RELOC_64)
10324                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10325                   else
10326                     /* Don't do the adjustment for x86-64, as there
10327                        the pcrel addressing is relative to the _next_
10328                        insn, and that is taken care of in other code.  */
10329                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10330                 }
10331               else if (align_branch_power)
10332                 {
10333                   switch (reloc_type)
10334                     {
10335                     case BFD_RELOC_386_TLS_GD:
10336                     case BFD_RELOC_386_TLS_LDM:
10337                     case BFD_RELOC_386_TLS_IE:
10338                     case BFD_RELOC_386_TLS_IE_32:
10339                     case BFD_RELOC_386_TLS_GOTIE:
10340                     case BFD_RELOC_386_TLS_GOTDESC:
10341                     case BFD_RELOC_386_TLS_DESC_CALL:
10342                     case BFD_RELOC_X86_64_TLSGD:
10343                     case BFD_RELOC_X86_64_TLSLD:
10344                     case BFD_RELOC_X86_64_GOTTPOFF:
10345                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10346                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10347                       i.has_gotpc_tls_reloc = true;
10348                     default:
10349                       break;
10350                     }
10351                 }
10352               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10353                                   size, i.op[n].disps, pcrel,
10354                                   reloc_type);
10355
10356               if (flag_code == CODE_64BIT && size == 4 && pcrel
10357                   && !i.prefix[ADDR_PREFIX])
10358                 fixP->fx_signed = 1;
10359
10360               /* Check for "call/jmp *mem", "mov mem, %reg",
10361                  "test %reg, mem" and "binop mem, %reg" where binop
10362                  is one of adc, add, and, cmp, or, sbb, sub, xor
10363                  instructions without data prefix.  Always generate
10364                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10365               if (i.prefix[DATA_PREFIX] == 0
10366                   && (generate_relax_relocations
10367                       || (!object_64bit
10368                           && i.rm.mode == 0
10369                           && i.rm.regmem == 5))
10370                   && (i.rm.mode == 2
10371                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10372                   && i.tm.opcode_space == SPACE_BASE
10373                   && ((i.operands == 1
10374                        && i.tm.base_opcode == 0xff
10375                        && (i.rm.reg == 2 || i.rm.reg == 4))
10376                       || (i.operands == 2
10377                           && (i.tm.base_opcode == 0x8b
10378                               || i.tm.base_opcode == 0x85
10379                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10380                 {
10381                   if (object_64bit)
10382                     {
10383                       fixP->fx_tcbit = i.rex != 0;
10384                       if (i.base_reg
10385                           && (i.base_reg->reg_num == RegIP))
10386                       fixP->fx_tcbit2 = 1;
10387                     }
10388                   else
10389                     fixP->fx_tcbit2 = 1;
10390                 }
10391             }
10392         }
10393     }
10394 }
10395
10396 static void
10397 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10398 {
10399   char *p;
10400   unsigned int n;
10401
10402   for (n = 0; n < i.operands; n++)
10403     {
10404       if (operand_type_check (i.types[n], imm))
10405         {
10406           int size = imm_size (n);
10407
10408           if (now_seg == absolute_section)
10409             abs_section_offset += size;
10410           else if (i.op[n].imms->X_op == O_constant)
10411             {
10412               offsetT val;
10413
10414               val = offset_in_range (i.op[n].imms->X_add_number,
10415                                      size);
10416               p = frag_more (size);
10417               md_number_to_chars (p, val, size);
10418             }
10419           else
10420             {
10421               /* Not absolute_section.
10422                  Need a 32-bit fixup (don't support 8bit
10423                  non-absolute imms).  Try to support other
10424                  sizes ...  */
10425               enum bfd_reloc_code_real reloc_type;
10426               int sign;
10427
10428               if (i.types[n].bitfield.imm32s
10429                   && (i.suffix == QWORD_MNEM_SUFFIX
10430                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
10431                       || (i.prefix[REX_PREFIX] & REX_W)
10432                       || dot_insn ()))
10433                 sign = 1;
10434               else
10435                 sign = 0;
10436
10437               p = frag_more (size);
10438               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10439
10440               /*   This is tough to explain.  We end up with this one if we
10441                * have operands that look like
10442                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10443                * obtain the absolute address of the GOT, and it is strongly
10444                * preferable from a performance point of view to avoid using
10445                * a runtime relocation for this.  The actual sequence of
10446                * instructions often look something like:
10447                *
10448                *        call    .L66
10449                * .L66:
10450                *        popl    %ebx
10451                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10452                *
10453                *   The call and pop essentially return the absolute address
10454                * of the label .L66 and store it in %ebx.  The linker itself
10455                * will ultimately change the first operand of the addl so
10456                * that %ebx points to the GOT, but to keep things simple, the
10457                * .o file must have this operand set so that it generates not
10458                * the absolute address of .L66, but the absolute address of
10459                * itself.  This allows the linker itself simply treat a GOTPC
10460                * relocation as asking for a pcrel offset to the GOT to be
10461                * added in, and the addend of the relocation is stored in the
10462                * operand field for the instruction itself.
10463                *
10464                *   Our job here is to fix the operand so that it would add
10465                * the correct offset so that %ebx would point to itself.  The
10466                * thing that is tricky is that .-.L66 will point to the
10467                * beginning of the instruction, so we need to further modify
10468                * the operand so that it will point to itself.  There are
10469                * other cases where you have something like:
10470                *
10471                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10472                *
10473                * and here no correction would be required.  Internally in
10474                * the assembler we treat operands of this form as not being
10475                * pcrel since the '.' is explicitly mentioned, and I wonder
10476                * whether it would simplify matters to do it this way.  Who
10477                * knows.  In earlier versions of the PIC patches, the
10478                * pcrel_adjust field was used to store the correction, but
10479                * since the expression is not pcrel, I felt it would be
10480                * confusing to do it this way.  */
10481
10482               if ((reloc_type == BFD_RELOC_32
10483                    || reloc_type == BFD_RELOC_X86_64_32S
10484                    || reloc_type == BFD_RELOC_64)
10485                   && GOT_symbol
10486                   && GOT_symbol == i.op[n].imms->X_add_symbol
10487                   && (i.op[n].imms->X_op == O_symbol
10488                       || (i.op[n].imms->X_op == O_add
10489                           && ((symbol_get_value_expression
10490                                (i.op[n].imms->X_op_symbol)->X_op)
10491                               == O_subtract))))
10492                 {
10493                   if (!object_64bit)
10494                     reloc_type = BFD_RELOC_386_GOTPC;
10495                   else if (size == 4)
10496                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10497                   else if (size == 8)
10498                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10499                   i.has_gotpc_tls_reloc = true;
10500                   i.op[n].imms->X_add_number +=
10501                     encoding_length (insn_start_frag, insn_start_off, p);
10502                 }
10503               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10504                            i.op[n].imms, 0, reloc_type);
10505             }
10506         }
10507     }
10508 }
10509 \f
10510 /* x86_cons_fix_new is called via the expression parsing code when a
10511    reloc is needed.  We use this hook to get the correct .got reloc.  */
10512 static int cons_sign = -1;
10513
10514 void
10515 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10516                   expressionS *exp, bfd_reloc_code_real_type r)
10517 {
10518   r = reloc (len, 0, cons_sign, r);
10519
10520 #ifdef TE_PE
10521   if (exp->X_op == O_secrel)
10522     {
10523       exp->X_op = O_symbol;
10524       r = BFD_RELOC_32_SECREL;
10525     }
10526   else if (exp->X_op == O_secidx)
10527     r = BFD_RELOC_16_SECIDX;
10528 #endif
10529
10530   fix_new_exp (frag, off, len, exp, 0, r);
10531 }
10532
10533 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10534    purpose of the `.dc.a' internal pseudo-op.  */
10535
10536 int
10537 x86_address_bytes (void)
10538 {
10539   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10540     return 4;
10541   return stdoutput->arch_info->bits_per_address / 8;
10542 }
10543
10544 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10545      || defined (LEX_AT)) && !defined (TE_PE)
10546 # define lex_got(reloc, adjust, types) NULL
10547 #else
10548 /* Parse operands of the form
10549    <symbol>@GOTOFF+<nnn>
10550    and similar .plt or .got references.
10551
10552    If we find one, set up the correct relocation in RELOC and copy the
10553    input string, minus the `@GOTOFF' into a malloc'd buffer for
10554    parsing by the calling routine.  Return this buffer, and if ADJUST
10555    is non-null set it to the length of the string we removed from the
10556    input line.  Otherwise return NULL.  */
10557 static char *
10558 lex_got (enum bfd_reloc_code_real *rel,
10559          int *adjust,
10560          i386_operand_type *types)
10561 {
10562   /* Some of the relocations depend on the size of what field is to
10563      be relocated.  But in our callers i386_immediate and i386_displacement
10564      we don't yet know the operand size (this will be set by insn
10565      matching).  Hence we record the word32 relocation here,
10566      and adjust the reloc according to the real size in reloc().  */
10567   static const struct
10568   {
10569     const char *str;
10570     int len;
10571     const enum bfd_reloc_code_real rel[2];
10572     const i386_operand_type types64;
10573     bool need_GOT_symbol;
10574   }
10575     gotrel[] =
10576   {
10577
10578 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10579   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10580 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10581   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10582 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10583   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10584 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10585   { .imm64 = 1, .disp64 = 1 } }
10586
10587 #ifndef TE_PE
10588 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10589     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10590                                         BFD_RELOC_SIZE32 },
10591       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10592 #endif
10593     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10594                                        BFD_RELOC_X86_64_PLTOFF64 },
10595       { .bitfield = { .imm64 = 1 } }, true },
10596     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10597                                        BFD_RELOC_X86_64_PLT32    },
10598       OPERAND_TYPE_IMM32_32S_DISP32, false },
10599     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10600                                        BFD_RELOC_X86_64_GOTPLT64 },
10601       OPERAND_TYPE_IMM64_DISP64, true },
10602     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10603                                        BFD_RELOC_X86_64_GOTOFF64 },
10604       OPERAND_TYPE_IMM64_DISP64, true },
10605     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10606                                        BFD_RELOC_X86_64_GOTPCREL },
10607       OPERAND_TYPE_IMM32_32S_DISP32, true },
10608     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10609                                        BFD_RELOC_X86_64_TLSGD    },
10610       OPERAND_TYPE_IMM32_32S_DISP32, true },
10611     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10612                                        _dummy_first_bfd_reloc_code_real },
10613       OPERAND_TYPE_NONE, true },
10614     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10615                                        BFD_RELOC_X86_64_TLSLD    },
10616       OPERAND_TYPE_IMM32_32S_DISP32, true },
10617     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10618                                        BFD_RELOC_X86_64_GOTTPOFF },
10619       OPERAND_TYPE_IMM32_32S_DISP32, true },
10620     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10621                                        BFD_RELOC_X86_64_TPOFF32  },
10622       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10623     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10624                                        _dummy_first_bfd_reloc_code_real },
10625       OPERAND_TYPE_NONE, true },
10626     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10627                                        BFD_RELOC_X86_64_DTPOFF32 },
10628       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10629     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10630                                        _dummy_first_bfd_reloc_code_real },
10631       OPERAND_TYPE_NONE, true },
10632     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10633                                        _dummy_first_bfd_reloc_code_real },
10634       OPERAND_TYPE_NONE, true },
10635     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10636                                        BFD_RELOC_X86_64_GOT32    },
10637       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10638     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10639                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10640       OPERAND_TYPE_IMM32_32S_DISP32, true },
10641     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10642                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10643       OPERAND_TYPE_IMM32_32S_DISP32, true },
10644 #else /* TE_PE */
10645     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10646                                        BFD_RELOC_32_SECREL },
10647       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10648 #endif
10649
10650 #undef OPERAND_TYPE_IMM32_32S_DISP32
10651 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10652 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10653 #undef OPERAND_TYPE_IMM64_DISP64
10654
10655   };
10656   char *cp;
10657   unsigned int j;
10658
10659 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10660   if (!IS_ELF)
10661     return NULL;
10662 #endif
10663
10664   for (cp = input_line_pointer; *cp != '@'; cp++)
10665     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10666       return NULL;
10667
10668   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10669     {
10670       int len = gotrel[j].len;
10671       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10672         {
10673           if (gotrel[j].rel[object_64bit] != 0)
10674             {
10675               int first, second;
10676               char *tmpbuf, *past_reloc;
10677
10678               *rel = gotrel[j].rel[object_64bit];
10679
10680               if (types)
10681                 {
10682                   if (flag_code != CODE_64BIT)
10683                     {
10684                       types->bitfield.imm32 = 1;
10685                       types->bitfield.disp32 = 1;
10686                     }
10687                   else
10688                     *types = gotrel[j].types64;
10689                 }
10690
10691               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10692                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10693
10694               /* The length of the first part of our input line.  */
10695               first = cp - input_line_pointer;
10696
10697               /* The second part goes from after the reloc token until
10698                  (and including) an end_of_line char or comma.  */
10699               past_reloc = cp + 1 + len;
10700               cp = past_reloc;
10701               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10702                 ++cp;
10703               second = cp + 1 - past_reloc;
10704
10705               /* Allocate and copy string.  The trailing NUL shouldn't
10706                  be necessary, but be safe.  */
10707               tmpbuf = XNEWVEC (char, first + second + 2);
10708               memcpy (tmpbuf, input_line_pointer, first);
10709               if (second != 0 && *past_reloc != ' ')
10710                 /* Replace the relocation token with ' ', so that
10711                    errors like foo@GOTOFF1 will be detected.  */
10712                 tmpbuf[first++] = ' ';
10713               else
10714                 /* Increment length by 1 if the relocation token is
10715                    removed.  */
10716                 len++;
10717               if (adjust)
10718                 *adjust = len;
10719               memcpy (tmpbuf + first, past_reloc, second);
10720               tmpbuf[first + second] = '\0';
10721               return tmpbuf;
10722             }
10723
10724           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10725                   gotrel[j].str, 1 << (5 + object_64bit));
10726           return NULL;
10727         }
10728     }
10729
10730   /* Might be a symbol version string.  Don't as_bad here.  */
10731   return NULL;
10732 }
10733 #endif
10734
10735 bfd_reloc_code_real_type
10736 x86_cons (expressionS *exp, int size)
10737 {
10738   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10739
10740   intel_syntax = -intel_syntax;
10741   exp->X_md = 0;
10742   expr_mode = expr_operator_none;
10743
10744 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10745       && !defined (LEX_AT)) \
10746     || defined (TE_PE)
10747   if (size == 4 || (object_64bit && size == 8))
10748     {
10749       /* Handle @GOTOFF and the like in an expression.  */
10750       char *save;
10751       char *gotfree_input_line;
10752       int adjust = 0;
10753
10754       save = input_line_pointer;
10755       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10756       if (gotfree_input_line)
10757         input_line_pointer = gotfree_input_line;
10758
10759       expression (exp);
10760
10761       if (gotfree_input_line)
10762         {
10763           /* expression () has merrily parsed up to the end of line,
10764              or a comma - in the wrong buffer.  Transfer how far
10765              input_line_pointer has moved to the right buffer.  */
10766           input_line_pointer = (save
10767                                 + (input_line_pointer - gotfree_input_line)
10768                                 + adjust);
10769           free (gotfree_input_line);
10770           if (exp->X_op == O_constant
10771               || exp->X_op == O_absent
10772               || exp->X_op == O_illegal
10773               || exp->X_op == O_register
10774               || exp->X_op == O_big)
10775             {
10776               char c = *input_line_pointer;
10777               *input_line_pointer = 0;
10778               as_bad (_("missing or invalid expression `%s'"), save);
10779               *input_line_pointer = c;
10780             }
10781           else if ((got_reloc == BFD_RELOC_386_PLT32
10782                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10783                    && exp->X_op != O_symbol)
10784             {
10785               char c = *input_line_pointer;
10786               *input_line_pointer = 0;
10787               as_bad (_("invalid PLT expression `%s'"), save);
10788               *input_line_pointer = c;
10789             }
10790         }
10791     }
10792   else
10793 #endif
10794     expression (exp);
10795
10796   intel_syntax = -intel_syntax;
10797
10798   if (intel_syntax)
10799     i386_intel_simplify (exp);
10800
10801   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10802   if (size <= 4 && expr_mode == expr_operator_present
10803       && exp->X_op == O_constant && !object_64bit)
10804     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10805
10806   return got_reloc;
10807 }
10808
10809 static void
10810 signed_cons (int size)
10811 {
10812   if (object_64bit)
10813     cons_sign = 1;
10814   cons (size);
10815   cons_sign = -1;
10816 }
10817
10818 static void
10819 s_insn (int dummy ATTRIBUTE_UNUSED)
10820 {
10821   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
10822   char *saved_ilp = find_end_of_line (line, false), saved_char;
10823   const char *end;
10824   unsigned int j;
10825   valueT val;
10826   bool vex = false, xop = false, evex = false;
10827   static const templates tt = { &i.tm, &i.tm + 1 };
10828
10829   init_globals ();
10830
10831   saved_char = *saved_ilp;
10832   *saved_ilp = 0;
10833
10834   end = parse_insn (line, mnemonic, true);
10835   if (end == NULL)
10836     {
10837   bad:
10838       *saved_ilp = saved_char;
10839       ignore_rest_of_line ();
10840       i.tm.mnem_off = 0;
10841       return;
10842     }
10843   line += end - line;
10844
10845   current_templates = &tt;
10846   i.tm.mnem_off = MN__insn;
10847   i.tm.extension_opcode = None;
10848
10849   if (startswith (line, "VEX")
10850       && (line[3] == '.' || is_space_char (line[3])))
10851     {
10852       vex = true;
10853       line += 3;
10854     }
10855   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
10856     {
10857       char *e;
10858       unsigned long n = strtoul (line + 3, &e, 16);
10859
10860       if (e == line + 5 && n >= 0x08 && n <= 0x1f
10861           && (*e == '.' || is_space_char (*e)))
10862         {
10863           xop = true;
10864           /* Arrange for build_vex_prefix() to emit 0x8f.  */
10865           i.tm.opcode_space = SPACE_XOP08;
10866           i.insn_opcode_space = n;
10867           line = e;
10868         }
10869     }
10870   else if (startswith (line, "EVEX")
10871            && (line[4] == '.' || is_space_char (line[4])))
10872     {
10873       evex = true;
10874       line += 4;
10875     }
10876
10877   if (vex || xop
10878       ? i.vec_encoding == vex_encoding_evex
10879       : evex
10880         ? i.vec_encoding == vex_encoding_vex
10881           || i.vec_encoding == vex_encoding_vex3
10882         : i.vec_encoding != vex_encoding_default)
10883     {
10884       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
10885       goto bad;
10886     }
10887
10888   if (line > end && i.vec_encoding == vex_encoding_default)
10889     i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
10890
10891   if (i.vec_encoding != vex_encoding_default)
10892     {
10893       /* Only address size and segment override prefixes are permitted with
10894          VEX/XOP/EVEX encodings.  */
10895       const unsigned char *p = i.prefix;
10896
10897       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
10898         {
10899           if (!*p)
10900             continue;
10901
10902           switch (j)
10903             {
10904             case SEG_PREFIX:
10905             case ADDR_PREFIX:
10906               break;
10907             default:
10908                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
10909                   goto bad;
10910             }
10911         }
10912     }
10913
10914   if (line > end && *line == '.')
10915     {
10916       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
10917       switch (line[1])
10918         {
10919         case 'L':
10920           switch (line[2])
10921             {
10922             case '0':
10923               if (evex)
10924                 i.tm.opcode_modifier.evex = EVEX128;
10925               else
10926                 i.tm.opcode_modifier.vex = VEX128;
10927               break;
10928
10929             case '1':
10930               if (evex)
10931                 i.tm.opcode_modifier.evex = EVEX256;
10932               else
10933                 i.tm.opcode_modifier.vex = VEX256;
10934               break;
10935
10936             case '2':
10937               if (evex)
10938                 i.tm.opcode_modifier.evex = EVEX512;
10939               break;
10940
10941             case '3':
10942               if (evex)
10943                 i.tm.opcode_modifier.evex = EVEX_L3;
10944               break;
10945
10946             case 'I':
10947               if (line[3] == 'G')
10948                 {
10949                   if (evex)
10950                     i.tm.opcode_modifier.evex = EVEXLIG;
10951                   else
10952                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
10953                   ++line;
10954                 }
10955               break;
10956             }
10957
10958           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
10959             line += 3;
10960           break;
10961
10962         case '1':
10963           if (line[2] == '2' && line[3] == '8')
10964             {
10965               if (evex)
10966                 i.tm.opcode_modifier.evex = EVEX128;
10967               else
10968                 i.tm.opcode_modifier.vex = VEX128;
10969               line += 4;
10970             }
10971           break;
10972
10973         case '2':
10974           if (line[2] == '5' && line[3] == '6')
10975             {
10976               if (evex)
10977                 i.tm.opcode_modifier.evex = EVEX256;
10978               else
10979                 i.tm.opcode_modifier.vex = VEX256;
10980               line += 4;
10981             }
10982           break;
10983
10984         case '5':
10985           if (evex && line[2] == '1' && line[3] == '2')
10986             {
10987               i.tm.opcode_modifier.evex = EVEX512;
10988               line += 4;
10989             }
10990           break;
10991         }
10992     }
10993
10994   if (line > end && *line == '.')
10995     {
10996       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
10997       switch (line[1])
10998         {
10999         case 'N':
11000           if (line[2] == 'P')
11001             line += 3;
11002           break;
11003
11004         case '6':
11005           if (line[2] == '6')
11006             {
11007               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
11008               line += 3;
11009             }
11010           break;
11011
11012         case 'F': case 'f':
11013           if (line[2] == '3')
11014             {
11015               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
11016               line += 3;
11017             }
11018           else if (line[2] == '2')
11019             {
11020               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
11021               line += 3;
11022             }
11023           break;
11024         }
11025     }
11026
11027   if (line > end && !xop && *line == '.')
11028     {
11029       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
11030       switch (line[1])
11031         {
11032         case '0':
11033           if (TOUPPER (line[2]) != 'F')
11034             break;
11035           if (line[3] == '.' || is_space_char (line[3]))
11036             {
11037               i.insn_opcode_space = SPACE_0F;
11038               line += 3;
11039             }
11040           else if (line[3] == '3'
11041                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
11042                    && (line[5] == '.' || is_space_char (line[5])))
11043             {
11044               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
11045               line += 5;
11046             }
11047           break;
11048
11049         case 'M':
11050           if (ISDIGIT (line[2]) && line[2] != '0')
11051             {
11052               char *e;
11053               unsigned long n = strtoul (line + 2, &e, 10);
11054
11055               if (n <= (evex ? 15 : 31)
11056                   && (*e == '.' || is_space_char (*e)))
11057                 {
11058                   i.insn_opcode_space = n;
11059                   line = e;
11060                 }
11061             }
11062           break;
11063         }
11064     }
11065
11066   if (line > end && *line == '.' && line[1] == 'W')
11067     {
11068       /* VEX.W, XOP.W, EVEX.W  */
11069       switch (line[2])
11070         {
11071         case '0':
11072           i.tm.opcode_modifier.vexw = VEXW0;
11073           break;
11074
11075         case '1':
11076           i.tm.opcode_modifier.vexw = VEXW1;
11077           break;
11078
11079         case 'I':
11080           if (line[3] == 'G')
11081             {
11082               i.tm.opcode_modifier.vexw = VEXWIG;
11083               ++line;
11084             }
11085           break;
11086         }
11087
11088       if (i.tm.opcode_modifier.vexw)
11089         line += 3;
11090     }
11091
11092   if (line > end && *line && !is_space_char (*line))
11093     {
11094       /* Improve diagnostic a little.  */
11095       if (*line == '.' && line[1] && !is_space_char (line[1]))
11096         ++line;
11097       goto done;
11098     }
11099
11100   /* Before processing the opcode expression, find trailing "+r" or
11101      "/<digit>" specifiers.  */
11102   for (ptr = line; ; ++ptr)
11103     {
11104       unsigned long n;
11105       char *e;
11106
11107       ptr = strpbrk (ptr, "+/,");
11108       if (ptr == NULL || *ptr == ',')
11109         break;
11110
11111       if (*ptr == '+' && ptr[1] == 'r'
11112           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
11113         {
11114           *ptr = ' ';
11115           ptr[1] = ' ';
11116           i.short_form = true;
11117           break;
11118         }
11119
11120       if (*ptr == '/' && ISDIGIT (ptr[1])
11121           && (n = strtoul (ptr + 1, &e, 8)) < 8
11122           && e == ptr + 2
11123           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
11124         {
11125           *ptr = ' ';
11126           ptr[1] = ' ';
11127           i.tm.extension_opcode = n;
11128           i.tm.opcode_modifier.modrm = 1;
11129           break;
11130         }
11131     }
11132
11133   input_line_pointer = line;
11134   val = get_absolute_expression ();
11135   line = input_line_pointer;
11136
11137   if (i.short_form && (val & 7))
11138     as_warn ("`+r' assumes low three opcode bits to be clear");
11139
11140   for (j = 1; j < sizeof(val); ++j)
11141     if (!(val >> (j * 8)))
11142       break;
11143
11144   /* Trim off a prefix if present.  */
11145   if (j > 1 && !vex && !xop && !evex)
11146     {
11147       uint8_t byte = val >> ((j - 1) * 8);
11148
11149       switch (byte)
11150         {
11151         case DATA_PREFIX_OPCODE:
11152         case REPE_PREFIX_OPCODE:
11153         case REPNE_PREFIX_OPCODE:
11154           if (!add_prefix (byte))
11155             goto bad;
11156           val &= ((uint64_t)1 << (--j * 8)) - 1;
11157           break;
11158         }
11159     }
11160
11161   /* Trim off encoding space.  */
11162   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
11163     {
11164       uint8_t byte = val >> ((--j - 1) * 8);
11165
11166       i.insn_opcode_space = SPACE_0F;
11167       switch (byte & -(j > 1))
11168         {
11169         case 0x38:
11170           i.insn_opcode_space = SPACE_0F38;
11171           --j;
11172           break;
11173         case 0x3a:
11174           i.insn_opcode_space = SPACE_0F3A;
11175           --j;
11176           break;
11177         }
11178       i.tm.opcode_space = i.insn_opcode_space;
11179       val &= ((uint64_t)1 << (j * 8)) - 1;
11180     }
11181   if (!i.tm.opcode_space && (vex || evex))
11182     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
11183        Also avoid hitting abort() there or in build_evex_prefix().  */
11184     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
11185                                                    : SPACE_0F38;
11186
11187   if (j > 2)
11188     {
11189       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
11190       goto bad;
11191     }
11192   i.opcode_length = j;
11193
11194   /* Handle operands, if any.  */
11195   if (*line == ',')
11196     {
11197       i386_operand_type combined;
11198       expressionS *disp_exp = NULL;
11199       bool changed;
11200
11201       i.memshift = -1;
11202
11203       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
11204       this_operand = -1;
11205       if (!ptr)
11206         goto bad;
11207       line = ptr;
11208
11209       if (!i.operands)
11210         {
11211           as_bad (_("expecting operand after ','; got nothing"));
11212           goto done;
11213         }
11214
11215       if (i.mem_operands > 1)
11216         {
11217           as_bad (_("too many memory references for `%s'"),
11218                   &i386_mnemonics[MN__insn]);
11219           goto done;
11220         }
11221
11222       /* Are we to emit ModR/M encoding?  */
11223       if (!i.short_form
11224           && (i.mem_operands
11225               || i.reg_operands > (i.vec_encoding != vex_encoding_default)
11226               || i.tm.extension_opcode != None))
11227         i.tm.opcode_modifier.modrm = 1;
11228
11229       if (!i.tm.opcode_modifier.modrm
11230           && (i.reg_operands
11231               > i.short_form + 0U + (i.vec_encoding != vex_encoding_default)
11232               || i.mem_operands))
11233         {
11234           as_bad (_("too many register/memory operands"));
11235           goto done;
11236         }
11237
11238       /* Enforce certain constraints on operands.  */
11239       switch (i.reg_operands + i.mem_operands
11240               + (i.tm.extension_opcode != None))
11241         {
11242         case 0:
11243           if (i.short_form)
11244             {
11245               as_bad (_("too few register/memory operands"));
11246               goto done;
11247             }
11248           /* Fall through.  */
11249         case 1:
11250           if (i.tm.opcode_modifier.modrm)
11251             {
11252               as_bad (_("too few register/memory operands"));
11253               goto done;
11254             }
11255           break;
11256
11257         case 2:
11258           break;
11259
11260         case 4:
11261           if (i.imm_operands
11262               && (i.op[0].imms->X_op != O_constant
11263                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
11264             {
11265               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
11266               goto done;
11267             }
11268           /* Fall through.  */
11269         case 3:
11270           if (i.vec_encoding != vex_encoding_default)
11271             {
11272               i.tm.opcode_modifier.vexvvvv = 1;
11273               break;
11274             }
11275           /* Fall through.  */
11276         default:
11277           as_bad (_("too many register/memory operands"));
11278           goto done;
11279         }
11280
11281       /* Bring operands into canonical order (imm, mem, reg).  */
11282       do
11283         {
11284           changed = false;
11285
11286           for (j = 1; j < i.operands; ++j)
11287             {
11288               if ((!operand_type_check (i.types[j - 1], imm)
11289                    && operand_type_check (i.types[j], imm))
11290                   || (i.types[j - 1].bitfield.class != ClassNone
11291                       && i.types[j].bitfield.class == ClassNone))
11292                 {
11293                   swap_2_operands (j - 1, j);
11294                   changed = true;
11295                 }
11296             }
11297         }
11298       while (changed);
11299
11300       /* For Intel syntax swap the order of register operands.  */
11301       if (intel_syntax)
11302         switch (i.reg_operands)
11303           {
11304           case 0:
11305           case 1:
11306             break;
11307
11308           case 4:
11309             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
11310             /* Fall through.  */
11311           case 3:
11312           case 2:
11313             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
11314             break;
11315
11316           default:
11317             abort ();
11318           }
11319
11320       /* Enforce constraints when using VSIB.  */
11321       if (i.index_reg
11322           && (i.index_reg->reg_type.bitfield.xmmword
11323               || i.index_reg->reg_type.bitfield.ymmword
11324               || i.index_reg->reg_type.bitfield.zmmword))
11325         {
11326           if (i.vec_encoding == vex_encoding_default)
11327             {
11328               as_bad (_("VSIB unavailable with legacy encoding"));
11329               goto done;
11330             }
11331
11332           if (i.vec_encoding == vex_encoding_evex
11333               && i.reg_operands > 1)
11334             {
11335               /* We could allow two register operands, encoding the 2nd one in
11336                  an 8-bit immediate like for 4-register-operand insns, but that
11337                  would require ugly fiddling with process_operands() and/or
11338                  build_modrm_byte().  */
11339               as_bad (_("too many register operands with VSIB"));
11340               goto done;
11341             }
11342
11343           i.tm.opcode_modifier.sib = 1;
11344         }
11345
11346       /* Establish operand size encoding.  */
11347       operand_type_set (&combined, 0);
11348
11349       for (j = i.imm_operands; j < i.operands; ++j)
11350         {
11351           i.types[j].bitfield.instance = InstanceNone;
11352
11353           if (operand_type_check (i.types[j], disp))
11354             {
11355               i.types[j].bitfield.baseindex = 1;
11356               disp_exp = i.op[j].disps;
11357             }
11358
11359           if (evex && i.types[j].bitfield.baseindex)
11360             {
11361               unsigned int n = i.memshift;
11362
11363               if (i.types[j].bitfield.byte)
11364                 n = 0;
11365               else if (i.types[j].bitfield.word)
11366                 n = 1;
11367               else if (i.types[j].bitfield.dword)
11368                 n = 2;
11369               else if (i.types[j].bitfield.qword)
11370                 n = 3;
11371               else if (i.types[j].bitfield.xmmword)
11372                 n = 4;
11373               else if (i.types[j].bitfield.ymmword)
11374                 n = 5;
11375               else if (i.types[j].bitfield.zmmword)
11376                 n = 6;
11377
11378               if (i.memshift < 32 && n != i.memshift)
11379                 as_warn ("conflicting memory operand size specifiers");
11380               i.memshift = n;
11381             }
11382
11383           if ((i.broadcast.type || i.broadcast.bytes)
11384               && j == i.broadcast.operand)
11385             continue;
11386
11387           combined = operand_type_or (combined, i.types[j]);
11388           combined.bitfield.class = ClassNone;
11389         }
11390
11391       switch ((i.broadcast.type ? i.broadcast.type : 1)
11392               << (i.memshift < 32 ? i.memshift : 0))
11393         {
11394         case 64: combined.bitfield.zmmword = 1; break;
11395         case 32: combined.bitfield.ymmword = 1; break;
11396         case 16: combined.bitfield.xmmword = 1; break;
11397         case  8: combined.bitfield.qword = 1; break;
11398         case  4: combined.bitfield.dword = 1; break;
11399         }
11400
11401       if (i.vec_encoding == vex_encoding_default)
11402         {
11403           if (flag_code == CODE_64BIT && combined.bitfield.qword)
11404             i.rex |= REX_W;
11405           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
11406                                             : combined.bitfield.word)
11407                    && !add_prefix (DATA_PREFIX_OPCODE))
11408             goto done;
11409         }
11410       else if (!i.tm.opcode_modifier.vexw)
11411         {
11412           if (flag_code == CODE_64BIT)
11413             {
11414               if (combined.bitfield.qword)
11415                 i.tm.opcode_modifier.vexw = VEXW1;
11416               else if (combined.bitfield.dword)
11417                 i.tm.opcode_modifier.vexw = VEXW0;
11418             }
11419
11420           if (!i.tm.opcode_modifier.vexw)
11421             i.tm.opcode_modifier.vexw = VEXWIG;
11422         }
11423
11424       if (vex || xop)
11425         {
11426           if (!i.tm.opcode_modifier.vex)
11427             {
11428               if (combined.bitfield.ymmword)
11429                 i.tm.opcode_modifier.vex = VEX256;
11430               else if (combined.bitfield.xmmword)
11431                 i.tm.opcode_modifier.vex = VEX128;
11432             }
11433         }
11434       else if (evex)
11435         {
11436           if (!i.tm.opcode_modifier.evex)
11437             {
11438               /* Do _not_ consider AVX512VL here.  */
11439               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
11440                 i.tm.opcode_modifier.evex = EVEX512;
11441               else if (combined.bitfield.ymmword)
11442                 i.tm.opcode_modifier.evex = EVEX256;
11443               else if (combined.bitfield.xmmword)
11444                 i.tm.opcode_modifier.evex = EVEX128;
11445             }
11446
11447           if (i.memshift >= 32)
11448             {
11449               unsigned int n = 0;
11450
11451               switch (i.tm.opcode_modifier.evex)
11452                 {
11453                 case EVEX512: n = 64; break;
11454                 case EVEX256: n = 32; break;
11455                 case EVEX128: n = 16; break;
11456                 }
11457
11458               if (i.broadcast.type)
11459                 n /= i.broadcast.type;
11460
11461               if (n > 0)
11462                 for (i.memshift = 0; !(n & 1); n >>= 1)
11463                   ++i.memshift;
11464               else if (disp_exp != NULL && disp_exp->X_op == O_constant
11465                        && disp_exp->X_add_number != 0
11466                        && i.disp_encoding != disp_encoding_32bit)
11467                 {
11468                   if (!quiet_warnings)
11469                     as_warn ("cannot determine memory operand size");
11470                   i.disp_encoding = disp_encoding_32bit;
11471                 }
11472             }
11473         }
11474
11475       if (i.memshift >= 32)
11476         i.memshift = 0;
11477       else if (!evex)
11478         i.vec_encoding = vex_encoding_error;
11479
11480       if (i.disp_operands && !optimize_disp (&i.tm))
11481         goto done;
11482
11483       /* Establish size for immediate operands.  */
11484       for (j = 0; j < i.imm_operands; ++j)
11485         {
11486           expressionS *expP = i.op[j].imms;
11487
11488           gas_assert (operand_type_check (i.types[j], imm));
11489           operand_type_set (&i.types[j], 0);
11490
11491           if (i.imm_bits[j] > 32)
11492             i.types[j].bitfield.imm64 = 1;
11493           else if (i.imm_bits[j] > 16)
11494             {
11495               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
11496                 i.types[j].bitfield.imm32s = 1;
11497               else
11498                 i.types[j].bitfield.imm32 = 1;
11499             }
11500           else if (i.imm_bits[j] > 8)
11501             i.types[j].bitfield.imm16 = 1;
11502           else if (i.imm_bits[j] > 0)
11503             {
11504               if (i.flags[j] & Operand_Signed)
11505                 i.types[j].bitfield.imm8s = 1;
11506               else
11507                 i.types[j].bitfield.imm8 = 1;
11508             }
11509           else if (expP->X_op == O_constant)
11510             {
11511               i.types[j] = smallest_imm_type (expP->X_add_number);
11512               i.types[j].bitfield.imm1 = 0;
11513               /* Oddly enough imm_size() checks imm64 first, so the bit needs
11514                  zapping since smallest_imm_type() sets it unconditionally.  */
11515               if (flag_code != CODE_64BIT)
11516                 {
11517                   i.types[j].bitfield.imm64 = 0;
11518                   i.types[j].bitfield.imm32s = 0;
11519                   i.types[j].bitfield.imm32 = 1;
11520                 }
11521               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
11522                 i.types[j].bitfield.imm64 = 0;
11523             }
11524           else
11525             /* Non-constant expressions are sized heuristically.  */
11526             switch (flag_code)
11527               {
11528               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
11529               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
11530               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
11531               }
11532         }
11533
11534       for (j = 0; j < i.operands; ++j)
11535         i.tm.operand_types[j] = i.types[j];
11536
11537       process_operands ();
11538     }
11539
11540   /* Don't set opcode until after processing operands, to avoid any
11541      potential special casing there.  */
11542   i.tm.base_opcode |= val;
11543
11544   if (i.vec_encoding == vex_encoding_error
11545       || (i.vec_encoding != vex_encoding_evex
11546           ? i.broadcast.type || i.broadcast.bytes
11547             || i.rounding.type != rc_none
11548             || i.mask.reg
11549           : (i.broadcast.type || i.broadcast.bytes)
11550             && i.rounding.type != rc_none))
11551     {
11552       as_bad (_("conflicting .insn operands"));
11553       goto done;
11554     }
11555
11556   if (vex || xop)
11557     {
11558       if (!i.tm.opcode_modifier.vex)
11559         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
11560
11561       build_vex_prefix (NULL);
11562       i.rex &= REX_OPCODE;
11563     }
11564   else if (evex)
11565     {
11566       if (!i.tm.opcode_modifier.evex)
11567         i.tm.opcode_modifier.evex = EVEXLIG;
11568
11569       build_evex_prefix ();
11570       i.rex &= REX_OPCODE;
11571     }
11572   else if (i.rex != 0)
11573     add_prefix (REX_OPCODE | i.rex);
11574
11575   output_insn ();
11576
11577  done:
11578   *saved_ilp = saved_char;
11579   input_line_pointer = line;
11580
11581   demand_empty_rest_of_line ();
11582
11583   /* Make sure dot_insn() won't yield "true" anymore.  */
11584   i.tm.mnem_off = 0;
11585 }
11586
11587 #ifdef TE_PE
11588 static void
11589 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
11590 {
11591   expressionS exp;
11592
11593   do
11594     {
11595       expression (&exp);
11596       if (exp.X_op == O_symbol)
11597         exp.X_op = O_secrel;
11598
11599       emit_expr (&exp, 4);
11600     }
11601   while (*input_line_pointer++ == ',');
11602
11603   input_line_pointer--;
11604   demand_empty_rest_of_line ();
11605 }
11606
11607 static void
11608 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
11609 {
11610   expressionS exp;
11611
11612   do
11613     {
11614       expression (&exp);
11615       if (exp.X_op == O_symbol)
11616         exp.X_op = O_secidx;
11617
11618       emit_expr (&exp, 2);
11619     }
11620   while (*input_line_pointer++ == ',');
11621
11622   input_line_pointer--;
11623   demand_empty_rest_of_line ();
11624 }
11625 #endif
11626
11627 /* Handle Rounding Control / SAE specifiers.  */
11628
11629 static char *
11630 RC_SAE_specifier (const char *pstr)
11631 {
11632   unsigned int j;
11633
11634   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11635     {
11636       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11637         {
11638           if (i.rounding.type != rc_none)
11639             {
11640               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
11641               return NULL;
11642             }
11643
11644           i.rounding.type = RC_NamesTable[j].type;
11645
11646           return (char *)(pstr + RC_NamesTable[j].len);
11647         }
11648     }
11649
11650   return NULL;
11651 }
11652
11653 /* Handle Vector operations.  */
11654
11655 static char *
11656 check_VecOperations (char *op_string)
11657 {
11658   const reg_entry *mask;
11659   const char *saved;
11660   char *end_op;
11661
11662   while (*op_string)
11663     {
11664       saved = op_string;
11665       if (*op_string == '{')
11666         {
11667           op_string++;
11668
11669           /* Check broadcasts.  */
11670           if (startswith (op_string, "1to"))
11671             {
11672               unsigned int bcst_type;
11673
11674               if (i.broadcast.type)
11675                 goto duplicated_vec_op;
11676
11677               op_string += 3;
11678               if (*op_string == '8')
11679                 bcst_type = 8;
11680               else if (*op_string == '4')
11681                 bcst_type = 4;
11682               else if (*op_string == '2')
11683                 bcst_type = 2;
11684               else if (*op_string == '1'
11685                        && *(op_string+1) == '6')
11686                 {
11687                   bcst_type = 16;
11688                   op_string++;
11689                 }
11690               else if (*op_string == '3'
11691                        && *(op_string+1) == '2')
11692                 {
11693                   bcst_type = 32;
11694                   op_string++;
11695                 }
11696               else
11697                 {
11698                   as_bad (_("Unsupported broadcast: `%s'"), saved);
11699                   return NULL;
11700                 }
11701               op_string++;
11702
11703               i.broadcast.type = bcst_type;
11704               i.broadcast.operand = this_operand;
11705
11706               /* For .insn a data size specifier may be appended.  */
11707               if (dot_insn () && *op_string == ':')
11708                 goto dot_insn_modifier;
11709             }
11710           /* Check .insn special cases.  */
11711           else if (dot_insn () && *op_string == ':')
11712             {
11713             dot_insn_modifier:
11714               switch (op_string[1])
11715                 {
11716                   unsigned long n;
11717
11718                 case 'd':
11719                   if (i.memshift < 32)
11720                     goto duplicated_vec_op;
11721
11722                   n = strtoul (op_string + 2, &end_op, 0);
11723                   if (n)
11724                     for (i.memshift = 0; !(n & 1); n >>= 1)
11725                       ++i.memshift;
11726                   if (i.memshift < 32 && n == 1)
11727                     op_string = end_op;
11728                   break;
11729
11730                 case 's': case 'u':
11731                   /* This isn't really a "vector" operation, but a sign/size
11732                      specifier for immediate operands of .insn.  Note that AT&T
11733                      syntax handles the same in i386_immediate().  */
11734                   if (!intel_syntax)
11735                     break;
11736
11737                   if (i.imm_bits[this_operand])
11738                     goto duplicated_vec_op;
11739
11740                   n = strtoul (op_string + 2, &end_op, 0);
11741                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11742                     {
11743                       i.imm_bits[this_operand] = n;
11744                       if (op_string[1] == 's')
11745                         i.flags[this_operand] |= Operand_Signed;
11746                       op_string = end_op;
11747                     }
11748                   break;
11749                 }
11750             }
11751           /* Check masking operation.  */
11752           else if ((mask = parse_register (op_string, &end_op)) != NULL)
11753             {
11754               if (mask == &bad_reg)
11755                 return NULL;
11756
11757               /* k0 can't be used for write mask.  */
11758               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
11759                 {
11760                   as_bad (_("`%s%s' can't be used for write mask"),
11761                           register_prefix, mask->reg_name);
11762                   return NULL;
11763                 }
11764
11765               if (!i.mask.reg)
11766                 {
11767                   i.mask.reg = mask;
11768                   i.mask.operand = this_operand;
11769                 }
11770               else if (i.mask.reg->reg_num)
11771                 goto duplicated_vec_op;
11772               else
11773                 {
11774                   i.mask.reg = mask;
11775
11776                   /* Only "{z}" is allowed here.  No need to check
11777                      zeroing mask explicitly.  */
11778                   if (i.mask.operand != (unsigned int) this_operand)
11779                     {
11780                       as_bad (_("invalid write mask `%s'"), saved);
11781                       return NULL;
11782                     }
11783                 }
11784
11785               op_string = end_op;
11786             }
11787           /* Check zeroing-flag for masking operation.  */
11788           else if (*op_string == 'z')
11789             {
11790               if (!i.mask.reg)
11791                 {
11792                   i.mask.reg = reg_k0;
11793                   i.mask.zeroing = 1;
11794                   i.mask.operand = this_operand;
11795                 }
11796               else
11797                 {
11798                   if (i.mask.zeroing)
11799                     {
11800                     duplicated_vec_op:
11801                       as_bad (_("duplicated `%s'"), saved);
11802                       return NULL;
11803                     }
11804
11805                   i.mask.zeroing = 1;
11806
11807                   /* Only "{%k}" is allowed here.  No need to check mask
11808                      register explicitly.  */
11809                   if (i.mask.operand != (unsigned int) this_operand)
11810                     {
11811                       as_bad (_("invalid zeroing-masking `%s'"),
11812                               saved);
11813                       return NULL;
11814                     }
11815                 }
11816
11817               op_string++;
11818             }
11819           else if (intel_syntax
11820                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
11821             i.rounding.modifier = true;
11822           else
11823             goto unknown_vec_op;
11824
11825           if (*op_string != '}')
11826             {
11827               as_bad (_("missing `}' in `%s'"), saved);
11828               return NULL;
11829             }
11830           op_string++;
11831
11832           /* Strip whitespace since the addition of pseudo prefixes
11833              changed how the scrubber treats '{'.  */
11834           if (is_space_char (*op_string))
11835             ++op_string;
11836
11837           continue;
11838         }
11839     unknown_vec_op:
11840       /* We don't know this one.  */
11841       as_bad (_("unknown vector operation: `%s'"), saved);
11842       return NULL;
11843     }
11844
11845   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
11846     {
11847       as_bad (_("zeroing-masking only allowed with write mask"));
11848       return NULL;
11849     }
11850
11851   return op_string;
11852 }
11853
11854 static int
11855 i386_immediate (char *imm_start)
11856 {
11857   char *save_input_line_pointer;
11858   char *gotfree_input_line;
11859   segT exp_seg = 0;
11860   expressionS *exp;
11861   i386_operand_type types;
11862
11863   operand_type_set (&types, ~0);
11864
11865   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
11866     {
11867       as_bad (_("at most %d immediate operands are allowed"),
11868               MAX_IMMEDIATE_OPERANDS);
11869       return 0;
11870     }
11871
11872   exp = &im_expressions[i.imm_operands++];
11873   i.op[this_operand].imms = exp;
11874
11875   if (is_space_char (*imm_start))
11876     ++imm_start;
11877
11878   save_input_line_pointer = input_line_pointer;
11879   input_line_pointer = imm_start;
11880
11881   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11882   if (gotfree_input_line)
11883     input_line_pointer = gotfree_input_line;
11884
11885   expr_mode = expr_operator_none;
11886   exp_seg = expression (exp);
11887
11888   /* For .insn immediates there may be a size specifier.  */
11889   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
11890       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
11891     {
11892       char *e;
11893       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
11894
11895       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11896         {
11897           i.imm_bits[this_operand] = n;
11898           if (input_line_pointer[2] == 's')
11899             i.flags[this_operand] |= Operand_Signed;
11900           input_line_pointer = e + 1;
11901         }
11902     }
11903
11904   SKIP_WHITESPACE ();
11905   if (*input_line_pointer)
11906     as_bad (_("junk `%s' after expression"), input_line_pointer);
11907
11908   input_line_pointer = save_input_line_pointer;
11909   if (gotfree_input_line)
11910     {
11911       free (gotfree_input_line);
11912
11913       if (exp->X_op == O_constant)
11914         exp->X_op = O_illegal;
11915     }
11916
11917   if (exp_seg == reg_section)
11918     {
11919       as_bad (_("illegal immediate register operand %s"), imm_start);
11920       return 0;
11921     }
11922
11923   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
11924 }
11925
11926 static int
11927 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11928                          i386_operand_type types, const char *imm_start)
11929 {
11930   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
11931     {
11932       if (imm_start)
11933         as_bad (_("missing or invalid immediate expression `%s'"),
11934                 imm_start);
11935       return 0;
11936     }
11937   else if (exp->X_op == O_constant)
11938     {
11939       /* Size it properly later.  */
11940       i.types[this_operand].bitfield.imm64 = 1;
11941
11942       /* If not 64bit, sign/zero extend val, to account for wraparound
11943          when !BFD64.  */
11944       if (expr_mode == expr_operator_present
11945           && flag_code != CODE_64BIT && !object_64bit)
11946         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11947     }
11948 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11949   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11950            && exp_seg != absolute_section
11951            && exp_seg != text_section
11952            && exp_seg != data_section
11953            && exp_seg != bss_section
11954            && exp_seg != undefined_section
11955            && !bfd_is_com_section (exp_seg))
11956     {
11957       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11958       return 0;
11959     }
11960 #endif
11961   else
11962     {
11963       /* This is an address.  The size of the address will be
11964          determined later, depending on destination register,
11965          suffix, or the default for the section.  */
11966       i.types[this_operand].bitfield.imm8 = 1;
11967       i.types[this_operand].bitfield.imm16 = 1;
11968       i.types[this_operand].bitfield.imm32 = 1;
11969       i.types[this_operand].bitfield.imm32s = 1;
11970       i.types[this_operand].bitfield.imm64 = 1;
11971       i.types[this_operand] = operand_type_and (i.types[this_operand],
11972                                                 types);
11973     }
11974
11975   return 1;
11976 }
11977
11978 static char *
11979 i386_scale (char *scale)
11980 {
11981   offsetT val;
11982   char *save = input_line_pointer;
11983
11984   input_line_pointer = scale;
11985   val = get_absolute_expression ();
11986
11987   switch (val)
11988     {
11989     case 1:
11990       i.log2_scale_factor = 0;
11991       break;
11992     case 2:
11993       i.log2_scale_factor = 1;
11994       break;
11995     case 4:
11996       i.log2_scale_factor = 2;
11997       break;
11998     case 8:
11999       i.log2_scale_factor = 3;
12000       break;
12001     default:
12002       {
12003         char sep = *input_line_pointer;
12004
12005         *input_line_pointer = '\0';
12006         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
12007                 scale);
12008         *input_line_pointer = sep;
12009         input_line_pointer = save;
12010         return NULL;
12011       }
12012     }
12013   if (i.log2_scale_factor != 0 && i.index_reg == 0)
12014     {
12015       as_warn (_("scale factor of %d without an index register"),
12016                1 << i.log2_scale_factor);
12017       i.log2_scale_factor = 0;
12018     }
12019   scale = input_line_pointer;
12020   input_line_pointer = save;
12021   return scale;
12022 }
12023
12024 static int
12025 i386_displacement (char *disp_start, char *disp_end)
12026 {
12027   expressionS *exp;
12028   segT exp_seg = 0;
12029   char *save_input_line_pointer;
12030   char *gotfree_input_line;
12031   int override;
12032   i386_operand_type bigdisp, types = anydisp;
12033   int ret;
12034
12035   if (i.disp_operands == MAX_MEMORY_OPERANDS)
12036     {
12037       as_bad (_("at most %d displacement operands are allowed"),
12038               MAX_MEMORY_OPERANDS);
12039       return 0;
12040     }
12041
12042   operand_type_set (&bigdisp, 0);
12043   if (i.jumpabsolute
12044       || i.types[this_operand].bitfield.baseindex
12045       || (current_templates->start->opcode_modifier.jump != JUMP
12046           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
12047     {
12048       i386_addressing_mode ();
12049       override = (i.prefix[ADDR_PREFIX] != 0);
12050       if (flag_code == CODE_64BIT)
12051         {
12052           bigdisp.bitfield.disp32 = 1;
12053           if (!override)
12054             bigdisp.bitfield.disp64 = 1;
12055         }
12056       else if ((flag_code == CODE_16BIT) ^ override)
12057           bigdisp.bitfield.disp16 = 1;
12058       else
12059           bigdisp.bitfield.disp32 = 1;
12060     }
12061   else
12062     {
12063       /* For PC-relative branches, the width of the displacement may be
12064          dependent upon data size, but is never dependent upon address size.
12065          Also make sure to not unintentionally match against a non-PC-relative
12066          branch template.  */
12067       static templates aux_templates;
12068       const insn_template *t = current_templates->start;
12069       bool has_intel64 = false;
12070
12071       aux_templates.start = t;
12072       while (++t < current_templates->end)
12073         {
12074           if (t->opcode_modifier.jump
12075               != current_templates->start->opcode_modifier.jump)
12076             break;
12077           if ((t->opcode_modifier.isa64 >= INTEL64))
12078             has_intel64 = true;
12079         }
12080       if (t < current_templates->end)
12081         {
12082           aux_templates.end = t;
12083           current_templates = &aux_templates;
12084         }
12085
12086       override = (i.prefix[DATA_PREFIX] != 0);
12087       if (flag_code == CODE_64BIT)
12088         {
12089           if ((override || i.suffix == WORD_MNEM_SUFFIX)
12090               && (!intel64 || !has_intel64))
12091             bigdisp.bitfield.disp16 = 1;
12092           else
12093             bigdisp.bitfield.disp32 = 1;
12094         }
12095       else
12096         {
12097           if (!override)
12098             override = (i.suffix == (flag_code != CODE_16BIT
12099                                      ? WORD_MNEM_SUFFIX
12100                                      : LONG_MNEM_SUFFIX));
12101           bigdisp.bitfield.disp32 = 1;
12102           if ((flag_code == CODE_16BIT) ^ override)
12103             {
12104               bigdisp.bitfield.disp32 = 0;
12105               bigdisp.bitfield.disp16 = 1;
12106             }
12107         }
12108     }
12109   i.types[this_operand] = operand_type_or (i.types[this_operand],
12110                                            bigdisp);
12111
12112   exp = &disp_expressions[i.disp_operands];
12113   i.op[this_operand].disps = exp;
12114   i.disp_operands++;
12115   save_input_line_pointer = input_line_pointer;
12116   input_line_pointer = disp_start;
12117   END_STRING_AND_SAVE (disp_end);
12118
12119 #ifndef GCC_ASM_O_HACK
12120 #define GCC_ASM_O_HACK 0
12121 #endif
12122 #if GCC_ASM_O_HACK
12123   END_STRING_AND_SAVE (disp_end + 1);
12124   if (i.types[this_operand].bitfield.baseIndex
12125       && displacement_string_end[-1] == '+')
12126     {
12127       /* This hack is to avoid a warning when using the "o"
12128          constraint within gcc asm statements.
12129          For instance:
12130
12131          #define _set_tssldt_desc(n,addr,limit,type) \
12132          __asm__ __volatile__ ( \
12133          "movw %w2,%0\n\t" \
12134          "movw %w1,2+%0\n\t" \
12135          "rorl $16,%1\n\t" \
12136          "movb %b1,4+%0\n\t" \
12137          "movb %4,5+%0\n\t" \
12138          "movb $0,6+%0\n\t" \
12139          "movb %h1,7+%0\n\t" \
12140          "rorl $16,%1" \
12141          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
12142
12143          This works great except that the output assembler ends
12144          up looking a bit weird if it turns out that there is
12145          no offset.  You end up producing code that looks like:
12146
12147          #APP
12148          movw $235,(%eax)
12149          movw %dx,2+(%eax)
12150          rorl $16,%edx
12151          movb %dl,4+(%eax)
12152          movb $137,5+(%eax)
12153          movb $0,6+(%eax)
12154          movb %dh,7+(%eax)
12155          rorl $16,%edx
12156          #NO_APP
12157
12158          So here we provide the missing zero.  */
12159
12160       *displacement_string_end = '0';
12161     }
12162 #endif
12163   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
12164   if (gotfree_input_line)
12165     input_line_pointer = gotfree_input_line;
12166
12167   expr_mode = expr_operator_none;
12168   exp_seg = expression (exp);
12169
12170   SKIP_WHITESPACE ();
12171   if (*input_line_pointer)
12172     as_bad (_("junk `%s' after expression"), input_line_pointer);
12173 #if GCC_ASM_O_HACK
12174   RESTORE_END_STRING (disp_end + 1);
12175 #endif
12176   input_line_pointer = save_input_line_pointer;
12177   if (gotfree_input_line)
12178     {
12179       free (gotfree_input_line);
12180
12181       if (exp->X_op == O_constant || exp->X_op == O_register)
12182         exp->X_op = O_illegal;
12183     }
12184
12185   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
12186
12187   RESTORE_END_STRING (disp_end);
12188
12189   return ret;
12190 }
12191
12192 static int
12193 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
12194                             i386_operand_type types, const char *disp_start)
12195 {
12196   int ret = 1;
12197
12198   /* We do this to make sure that the section symbol is in
12199      the symbol table.  We will ultimately change the relocation
12200      to be relative to the beginning of the section.  */
12201   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
12202       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
12203       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
12204     {
12205       if (exp->X_op != O_symbol)
12206         goto inv_disp;
12207
12208       if (S_IS_LOCAL (exp->X_add_symbol)
12209           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
12210           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
12211         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
12212       exp->X_op = O_subtract;
12213       exp->X_op_symbol = GOT_symbol;
12214       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
12215         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
12216       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
12217         i.reloc[this_operand] = BFD_RELOC_64;
12218       else
12219         i.reloc[this_operand] = BFD_RELOC_32;
12220     }
12221
12222   else if (exp->X_op == O_absent
12223            || exp->X_op == O_illegal
12224            || exp->X_op == O_big)
12225     {
12226     inv_disp:
12227       as_bad (_("missing or invalid displacement expression `%s'"),
12228               disp_start);
12229       ret = 0;
12230     }
12231
12232   else if (exp->X_op == O_constant)
12233     {
12234       /* Sizing gets taken care of by optimize_disp().
12235
12236          If not 64bit, sign/zero extend val, to account for wraparound
12237          when !BFD64.  */
12238       if (expr_mode == expr_operator_present
12239           && flag_code != CODE_64BIT && !object_64bit)
12240         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12241     }
12242
12243 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
12244   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
12245            && exp_seg != absolute_section
12246            && exp_seg != text_section
12247            && exp_seg != data_section
12248            && exp_seg != bss_section
12249            && exp_seg != undefined_section
12250            && !bfd_is_com_section (exp_seg))
12251     {
12252       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
12253       ret = 0;
12254     }
12255 #endif
12256
12257   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
12258     i.types[this_operand].bitfield.disp8 = 1;
12259
12260   /* Check if this is a displacement only operand.  */
12261   if (!i.types[this_operand].bitfield.baseindex)
12262     i.types[this_operand] =
12263       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
12264                        operand_type_and (i.types[this_operand], types));
12265
12266   return ret;
12267 }
12268
12269 /* Return the active addressing mode, taking address override and
12270    registers forming the address into consideration.  Update the
12271    address override prefix if necessary.  */
12272
12273 static enum flag_code
12274 i386_addressing_mode (void)
12275 {
12276   enum flag_code addr_mode;
12277
12278   if (i.prefix[ADDR_PREFIX])
12279     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
12280   else if (flag_code == CODE_16BIT
12281            && is_cpu (current_templates->start, CpuMPX)
12282            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
12283               from md_assemble() by "is not a valid base/index expression"
12284               when there is a base and/or index.  */
12285            && !i.types[this_operand].bitfield.baseindex)
12286     {
12287       /* MPX insn memory operands with neither base nor index must be forced
12288          to use 32-bit addressing in 16-bit mode.  */
12289       addr_mode = CODE_32BIT;
12290       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12291       ++i.prefixes;
12292       gas_assert (!i.types[this_operand].bitfield.disp16);
12293       gas_assert (!i.types[this_operand].bitfield.disp32);
12294     }
12295   else
12296     {
12297       addr_mode = flag_code;
12298
12299 #if INFER_ADDR_PREFIX
12300       if (i.mem_operands == 0)
12301         {
12302           /* Infer address prefix from the first memory operand.  */
12303           const reg_entry *addr_reg = i.base_reg;
12304
12305           if (addr_reg == NULL)
12306             addr_reg = i.index_reg;
12307
12308           if (addr_reg)
12309             {
12310               if (addr_reg->reg_type.bitfield.dword)
12311                 addr_mode = CODE_32BIT;
12312               else if (flag_code != CODE_64BIT
12313                        && addr_reg->reg_type.bitfield.word)
12314                 addr_mode = CODE_16BIT;
12315
12316               if (addr_mode != flag_code)
12317                 {
12318                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12319                   i.prefixes += 1;
12320                   /* Change the size of any displacement too.  At most one
12321                      of Disp16 or Disp32 is set.
12322                      FIXME.  There doesn't seem to be any real need for
12323                      separate Disp16 and Disp32 flags.  The same goes for
12324                      Imm16 and Imm32.  Removing them would probably clean
12325                      up the code quite a lot.  */
12326                   if (flag_code != CODE_64BIT
12327                       && (i.types[this_operand].bitfield.disp16
12328                           || i.types[this_operand].bitfield.disp32))
12329                     {
12330                       static const i386_operand_type disp16_32 = {
12331                         .bitfield = { .disp16 = 1, .disp32 = 1 }
12332                       };
12333
12334                       i.types[this_operand]
12335                         = operand_type_xor (i.types[this_operand], disp16_32);
12336                     }
12337                 }
12338             }
12339         }
12340 #endif
12341     }
12342
12343   return addr_mode;
12344 }
12345
12346 /* Make sure the memory operand we've been dealt is valid.
12347    Return 1 on success, 0 on a failure.  */
12348
12349 static int
12350 i386_index_check (const char *operand_string)
12351 {
12352   const char *kind = "base/index";
12353   enum flag_code addr_mode = i386_addressing_mode ();
12354   const insn_template *t = current_templates->end - 1;
12355
12356   if (t->opcode_modifier.isstring)
12357     {
12358       /* Memory operands of string insns are special in that they only allow
12359          a single register (rDI, rSI, or rBX) as their memory address.  */
12360       const reg_entry *expected_reg;
12361       static const char di_si[][2][4] =
12362         {
12363           { "esi", "edi" },
12364           { "si", "di" },
12365           { "rsi", "rdi" }
12366         };
12367       static const char bx[][4] = { "ebx", "bx", "rbx" };
12368
12369       kind = "string address";
12370
12371       if (t->opcode_modifier.prefixok == PrefixRep)
12372         {
12373           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
12374           int op = 0;
12375
12376           if (!t->operand_types[0].bitfield.baseindex
12377               || ((!i.mem_operands != !intel_syntax)
12378                   && t->operand_types[1].bitfield.baseindex))
12379             op = 1;
12380           expected_reg
12381             = (const reg_entry *) str_hash_find (reg_hash,
12382                                                  di_si[addr_mode][op == es_op]);
12383         }
12384       else
12385         expected_reg
12386           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
12387
12388       if (i.base_reg != expected_reg
12389           || i.index_reg
12390           || operand_type_check (i.types[this_operand], disp))
12391         {
12392           /* The second memory operand must have the same size as
12393              the first one.  */
12394           if (i.mem_operands
12395               && i.base_reg
12396               && !((addr_mode == CODE_64BIT
12397                     && i.base_reg->reg_type.bitfield.qword)
12398                    || (addr_mode == CODE_32BIT
12399                        ? i.base_reg->reg_type.bitfield.dword
12400                        : i.base_reg->reg_type.bitfield.word)))
12401             goto bad_address;
12402
12403           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
12404                    operand_string,
12405                    intel_syntax ? '[' : '(',
12406                    register_prefix,
12407                    expected_reg->reg_name,
12408                    intel_syntax ? ']' : ')');
12409           return 1;
12410         }
12411       else
12412         return 1;
12413
12414     bad_address:
12415       as_bad (_("`%s' is not a valid %s expression"),
12416               operand_string, kind);
12417       return 0;
12418     }
12419   else
12420     {
12421       t = current_templates->start;
12422
12423       if (addr_mode != CODE_16BIT)
12424         {
12425           /* 32-bit/64-bit checks.  */
12426           if (i.disp_encoding == disp_encoding_16bit)
12427             {
12428             bad_disp:
12429               as_bad (_("invalid `%s' prefix"),
12430                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
12431               return 0;
12432             }
12433
12434           if ((i.base_reg
12435                && ((addr_mode == CODE_64BIT
12436                     ? !i.base_reg->reg_type.bitfield.qword
12437                     : !i.base_reg->reg_type.bitfield.dword)
12438                    || (i.index_reg && i.base_reg->reg_num == RegIP)
12439                    || i.base_reg->reg_num == RegIZ))
12440               || (i.index_reg
12441                   && !i.index_reg->reg_type.bitfield.xmmword
12442                   && !i.index_reg->reg_type.bitfield.ymmword
12443                   && !i.index_reg->reg_type.bitfield.zmmword
12444                   && ((addr_mode == CODE_64BIT
12445                        ? !i.index_reg->reg_type.bitfield.qword
12446                        : !i.index_reg->reg_type.bitfield.dword)
12447                       || !i.index_reg->reg_type.bitfield.baseindex)))
12448             goto bad_address;
12449
12450           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
12451           if (t->mnem_off == MN_bndmk
12452               || t->mnem_off == MN_bndldx
12453               || t->mnem_off == MN_bndstx
12454               || t->opcode_modifier.sib == SIBMEM)
12455             {
12456               /* They cannot use RIP-relative addressing. */
12457               if (i.base_reg && i.base_reg->reg_num == RegIP)
12458                 {
12459                   as_bad (_("`%s' cannot be used here"), operand_string);
12460                   return 0;
12461                 }
12462
12463               /* bndldx and bndstx ignore their scale factor. */
12464               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
12465                   && i.log2_scale_factor)
12466                 as_warn (_("register scaling is being ignored here"));
12467             }
12468         }
12469       else
12470         {
12471           /* 16-bit checks.  */
12472           if (i.disp_encoding == disp_encoding_32bit)
12473             goto bad_disp;
12474
12475           if ((i.base_reg
12476                && (!i.base_reg->reg_type.bitfield.word
12477                    || !i.base_reg->reg_type.bitfield.baseindex))
12478               || (i.index_reg
12479                   && (!i.index_reg->reg_type.bitfield.word
12480                       || !i.index_reg->reg_type.bitfield.baseindex
12481                       || !(i.base_reg
12482                            && i.base_reg->reg_num < 6
12483                            && i.index_reg->reg_num >= 6
12484                            && i.log2_scale_factor == 0))))
12485             goto bad_address;
12486         }
12487     }
12488   return 1;
12489 }
12490
12491 /* Handle vector immediates.  */
12492
12493 static int
12494 RC_SAE_immediate (const char *imm_start)
12495 {
12496   const char *pstr = imm_start;
12497
12498   if (*pstr != '{')
12499     return 0;
12500
12501   pstr = RC_SAE_specifier (pstr + 1);
12502   if (pstr == NULL)
12503     return 0;
12504
12505   if (*pstr++ != '}')
12506     {
12507       as_bad (_("Missing '}': '%s'"), imm_start);
12508       return 0;
12509     }
12510   /* RC/SAE immediate string should contain nothing more.  */;
12511   if (*pstr != 0)
12512     {
12513       as_bad (_("Junk after '}': '%s'"), imm_start);
12514       return 0;
12515     }
12516
12517   /* Internally this doesn't count as an operand.  */
12518   --i.operands;
12519
12520   return 1;
12521 }
12522
12523 static INLINE bool starts_memory_operand (char c)
12524 {
12525   return ISDIGIT (c)
12526          || is_name_beginner (c)
12527          || strchr ("([\"+-!~", c);
12528 }
12529
12530 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
12531    on error.  */
12532
12533 static int
12534 i386_att_operand (char *operand_string)
12535 {
12536   const reg_entry *r;
12537   char *end_op;
12538   char *op_string = operand_string;
12539
12540   if (is_space_char (*op_string))
12541     ++op_string;
12542
12543   /* We check for an absolute prefix (differentiating,
12544      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
12545   if (*op_string == ABSOLUTE_PREFIX
12546       && current_templates->start->opcode_modifier.jump)
12547     {
12548       ++op_string;
12549       if (is_space_char (*op_string))
12550         ++op_string;
12551       i.jumpabsolute = true;
12552     }
12553
12554   /* Check if operand is a register.  */
12555   if ((r = parse_register (op_string, &end_op)) != NULL)
12556     {
12557       i386_operand_type temp;
12558
12559       if (r == &bad_reg)
12560         return 0;
12561
12562       /* Check for a segment override by searching for ':' after a
12563          segment register.  */
12564       op_string = end_op;
12565       if (is_space_char (*op_string))
12566         ++op_string;
12567       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
12568         {
12569           i.seg[i.mem_operands] = r;
12570
12571           /* Skip the ':' and whitespace.  */
12572           ++op_string;
12573           if (is_space_char (*op_string))
12574             ++op_string;
12575
12576           /* Handle case of %es:*foo.  */
12577           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
12578               && current_templates->start->opcode_modifier.jump)
12579             {
12580               ++op_string;
12581               if (is_space_char (*op_string))
12582                 ++op_string;
12583               i.jumpabsolute = true;
12584             }
12585
12586           if (!starts_memory_operand (*op_string))
12587             {
12588               as_bad (_("bad memory operand `%s'"), op_string);
12589               return 0;
12590             }
12591           goto do_memory_reference;
12592         }
12593
12594       /* Handle vector operations.  */
12595       if (*op_string == '{')
12596         {
12597           op_string = check_VecOperations (op_string);
12598           if (op_string == NULL)
12599             return 0;
12600         }
12601
12602       if (*op_string)
12603         {
12604           as_bad (_("junk `%s' after register"), op_string);
12605           return 0;
12606         }
12607
12608        /* Reject pseudo registers for .insn.  */
12609       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
12610         {
12611           as_bad (_("`%s%s' cannot be used here"),
12612                   register_prefix, r->reg_name);
12613           return 0;
12614         }
12615
12616       temp = r->reg_type;
12617       temp.bitfield.baseindex = 0;
12618       i.types[this_operand] = operand_type_or (i.types[this_operand],
12619                                                temp);
12620       i.types[this_operand].bitfield.unspecified = 0;
12621       i.op[this_operand].regs = r;
12622       i.reg_operands++;
12623
12624       /* A GPR may follow an RC or SAE immediate only if a (vector) register
12625          operand was also present earlier on.  */
12626       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
12627           && i.reg_operands == 1)
12628         {
12629           unsigned int j;
12630
12631           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
12632             if (i.rounding.type == RC_NamesTable[j].type)
12633               break;
12634           as_bad (_("`%s': misplaced `{%s}'"),
12635                   insn_name (current_templates->start), RC_NamesTable[j].name);
12636           return 0;
12637         }
12638     }
12639   else if (*op_string == REGISTER_PREFIX)
12640     {
12641       as_bad (_("bad register name `%s'"), op_string);
12642       return 0;
12643     }
12644   else if (*op_string == IMMEDIATE_PREFIX)
12645     {
12646       ++op_string;
12647       if (i.jumpabsolute)
12648         {
12649           as_bad (_("immediate operand illegal with absolute jump"));
12650           return 0;
12651         }
12652       if (!i386_immediate (op_string))
12653         return 0;
12654       if (i.rounding.type != rc_none)
12655         {
12656           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
12657                   insn_name (current_templates->start));
12658           return 0;
12659         }
12660     }
12661   else if (RC_SAE_immediate (operand_string))
12662     {
12663       /* If it is a RC or SAE immediate, do the necessary placement check:
12664          Only another immediate or a GPR may precede it.  */
12665       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
12666           || (i.reg_operands == 1
12667               && i.op[0].regs->reg_type.bitfield.class != Reg))
12668         {
12669           as_bad (_("`%s': misplaced `%s'"),
12670                   insn_name (current_templates->start), operand_string);
12671           return 0;
12672         }
12673     }
12674   else if (starts_memory_operand (*op_string))
12675     {
12676       /* This is a memory reference of some sort.  */
12677       char *base_string;
12678
12679       /* Start and end of displacement string expression (if found).  */
12680       char *displacement_string_start;
12681       char *displacement_string_end;
12682
12683     do_memory_reference:
12684       /* Check for base index form.  We detect the base index form by
12685          looking for an ')' at the end of the operand, searching
12686          for the '(' matching it, and finding a REGISTER_PREFIX or ','
12687          after the '('.  */
12688       base_string = op_string + strlen (op_string);
12689
12690       /* Handle vector operations.  */
12691       --base_string;
12692       if (is_space_char (*base_string))
12693         --base_string;
12694
12695       if (*base_string == '}')
12696         {
12697           char *vop_start = NULL;
12698
12699           while (base_string-- > op_string)
12700             {
12701               if (*base_string == '"')
12702                 break;
12703               if (*base_string != '{')
12704                 continue;
12705
12706               vop_start = base_string;
12707
12708               --base_string;
12709               if (is_space_char (*base_string))
12710                 --base_string;
12711
12712               if (*base_string != '}')
12713                 break;
12714
12715               vop_start = NULL;
12716             }
12717
12718           if (!vop_start)
12719             {
12720               as_bad (_("unbalanced figure braces"));
12721               return 0;
12722             }
12723
12724           if (check_VecOperations (vop_start) == NULL)
12725             return 0;
12726         }
12727
12728       /* If we only have a displacement, set-up for it to be parsed later.  */
12729       displacement_string_start = op_string;
12730       displacement_string_end = base_string + 1;
12731
12732       if (*base_string == ')')
12733         {
12734           char *temp_string;
12735           unsigned int parens_not_balanced = 0;
12736           bool in_quotes = false;
12737
12738           /* We've already checked that the number of left & right ()'s are
12739              equal, and that there's a matching set of double quotes.  */
12740           end_op = base_string;
12741           for (temp_string = op_string; temp_string < end_op; temp_string++)
12742             {
12743               if (*temp_string == '\\' && temp_string[1] == '"')
12744                 ++temp_string;
12745               else if (*temp_string == '"')
12746                 in_quotes = !in_quotes;
12747               else if (!in_quotes)
12748                 {
12749                   if (*temp_string == '(' && !parens_not_balanced++)
12750                     base_string = temp_string;
12751                   if (*temp_string == ')')
12752                     --parens_not_balanced;
12753                 }
12754             }
12755
12756           temp_string = base_string;
12757
12758           /* Skip past '(' and whitespace.  */
12759           gas_assert (*base_string == '(');
12760           ++base_string;
12761           if (is_space_char (*base_string))
12762             ++base_string;
12763
12764           if (*base_string == ','
12765               || ((i.base_reg = parse_register (base_string, &end_op))
12766                   != NULL))
12767             {
12768               displacement_string_end = temp_string;
12769
12770               i.types[this_operand].bitfield.baseindex = 1;
12771
12772               if (i.base_reg)
12773                 {
12774                   if (i.base_reg == &bad_reg)
12775                     return 0;
12776                   base_string = end_op;
12777                   if (is_space_char (*base_string))
12778                     ++base_string;
12779                 }
12780
12781               /* There may be an index reg or scale factor here.  */
12782               if (*base_string == ',')
12783                 {
12784                   ++base_string;
12785                   if (is_space_char (*base_string))
12786                     ++base_string;
12787
12788                   if ((i.index_reg = parse_register (base_string, &end_op))
12789                       != NULL)
12790                     {
12791                       if (i.index_reg == &bad_reg)
12792                         return 0;
12793                       base_string = end_op;
12794                       if (is_space_char (*base_string))
12795                         ++base_string;
12796                       if (*base_string == ',')
12797                         {
12798                           ++base_string;
12799                           if (is_space_char (*base_string))
12800                             ++base_string;
12801                         }
12802                       else if (*base_string != ')')
12803                         {
12804                           as_bad (_("expecting `,' or `)' "
12805                                     "after index register in `%s'"),
12806                                   operand_string);
12807                           return 0;
12808                         }
12809                     }
12810                   else if (*base_string == REGISTER_PREFIX)
12811                     {
12812                       end_op = strchr (base_string, ',');
12813                       if (end_op)
12814                         *end_op = '\0';
12815                       as_bad (_("bad register name `%s'"), base_string);
12816                       return 0;
12817                     }
12818
12819                   /* Check for scale factor.  */
12820                   if (*base_string != ')')
12821                     {
12822                       char *end_scale = i386_scale (base_string);
12823
12824                       if (!end_scale)
12825                         return 0;
12826
12827                       base_string = end_scale;
12828                       if (is_space_char (*base_string))
12829                         ++base_string;
12830                       if (*base_string != ')')
12831                         {
12832                           as_bad (_("expecting `)' "
12833                                     "after scale factor in `%s'"),
12834                                   operand_string);
12835                           return 0;
12836                         }
12837                     }
12838                   else if (!i.index_reg)
12839                     {
12840                       as_bad (_("expecting index register or scale factor "
12841                                 "after `,'; got '%c'"),
12842                               *base_string);
12843                       return 0;
12844                     }
12845                 }
12846               else if (*base_string != ')')
12847                 {
12848                   as_bad (_("expecting `,' or `)' "
12849                             "after base register in `%s'"),
12850                           operand_string);
12851                   return 0;
12852                 }
12853             }
12854           else if (*base_string == REGISTER_PREFIX)
12855             {
12856               end_op = strchr (base_string, ',');
12857               if (end_op)
12858                 *end_op = '\0';
12859               as_bad (_("bad register name `%s'"), base_string);
12860               return 0;
12861             }
12862         }
12863
12864       /* If there's an expression beginning the operand, parse it,
12865          assuming displacement_string_start and
12866          displacement_string_end are meaningful.  */
12867       if (displacement_string_start != displacement_string_end)
12868         {
12869           if (!i386_displacement (displacement_string_start,
12870                                   displacement_string_end))
12871             return 0;
12872         }
12873
12874       /* Special case for (%dx) while doing input/output op.  */
12875       if (i.base_reg
12876           && i.base_reg->reg_type.bitfield.instance == RegD
12877           && i.base_reg->reg_type.bitfield.word
12878           && i.index_reg == 0
12879           && i.log2_scale_factor == 0
12880           && i.seg[i.mem_operands] == 0
12881           && !operand_type_check (i.types[this_operand], disp))
12882         {
12883           i.types[this_operand] = i.base_reg->reg_type;
12884           i.input_output_operand = true;
12885           return 1;
12886         }
12887
12888       if (i386_index_check (operand_string) == 0)
12889         return 0;
12890       i.flags[this_operand] |= Operand_Mem;
12891       i.mem_operands++;
12892     }
12893   else
12894     {
12895       /* It's not a memory operand; argh!  */
12896       as_bad (_("invalid char %s beginning operand %d `%s'"),
12897               output_invalid (*op_string),
12898               this_operand + 1,
12899               op_string);
12900       return 0;
12901     }
12902   return 1;                     /* Normal return.  */
12903 }
12904 \f
12905 /* Calculate the maximum variable size (i.e., excluding fr_fix)
12906    that an rs_machine_dependent frag may reach.  */
12907
12908 unsigned int
12909 i386_frag_max_var (fragS *frag)
12910 {
12911   /* The only relaxable frags are for jumps.
12912      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
12913   gas_assert (frag->fr_type == rs_machine_dependent);
12914   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
12915 }
12916
12917 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12918 static int
12919 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
12920 {
12921   /* STT_GNU_IFUNC symbol must go through PLT.  */
12922   if ((symbol_get_bfdsym (fr_symbol)->flags
12923        & BSF_GNU_INDIRECT_FUNCTION) != 0)
12924     return 0;
12925
12926   if (!S_IS_EXTERNAL (fr_symbol))
12927     /* Symbol may be weak or local.  */
12928     return !S_IS_WEAK (fr_symbol);
12929
12930   /* Global symbols with non-default visibility can't be preempted. */
12931   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
12932     return 1;
12933
12934   if (fr_var != NO_RELOC)
12935     switch ((enum bfd_reloc_code_real) fr_var)
12936       {
12937       case BFD_RELOC_386_PLT32:
12938       case BFD_RELOC_X86_64_PLT32:
12939         /* Symbol with PLT relocation may be preempted. */
12940         return 0;
12941       default:
12942         abort ();
12943       }
12944
12945   /* Global symbols with default visibility in a shared library may be
12946      preempted by another definition.  */
12947   return !shared;
12948 }
12949 #endif
12950
12951 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
12952    Note also work for Skylake and Cascadelake.
12953 ---------------------------------------------------------------------
12954 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12955 | ------  | ----------- | ------- | -------- |
12956 |   Jo    |      N      |    N    |     Y    |
12957 |   Jno   |      N      |    N    |     Y    |
12958 |  Jc/Jb  |      Y      |    N    |     Y    |
12959 | Jae/Jnb |      Y      |    N    |     Y    |
12960 |  Je/Jz  |      Y      |    Y    |     Y    |
12961 | Jne/Jnz |      Y      |    Y    |     Y    |
12962 | Jna/Jbe |      Y      |    N    |     Y    |
12963 | Ja/Jnbe |      Y      |    N    |     Y    |
12964 |   Js    |      N      |    N    |     Y    |
12965 |   Jns   |      N      |    N    |     Y    |
12966 |  Jp/Jpe |      N      |    N    |     Y    |
12967 | Jnp/Jpo |      N      |    N    |     Y    |
12968 | Jl/Jnge |      Y      |    Y    |     Y    |
12969 | Jge/Jnl |      Y      |    Y    |     Y    |
12970 | Jle/Jng |      Y      |    Y    |     Y    |
12971 | Jg/Jnle |      Y      |    Y    |     Y    |
12972 ---------------------------------------------------------------------  */
12973 static int
12974 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12975 {
12976   if (mf_cmp == mf_cmp_alu_cmp)
12977     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
12978             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
12979   if (mf_cmp == mf_cmp_incdec)
12980     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
12981             || mf_jcc == mf_jcc_jle);
12982   if (mf_cmp == mf_cmp_test_and)
12983     return 1;
12984   return 0;
12985 }
12986
12987 /* Return the next non-empty frag.  */
12988
12989 static fragS *
12990 i386_next_non_empty_frag (fragS *fragP)
12991 {
12992   /* There may be a frag with a ".fill 0" when there is no room in
12993      the current frag for frag_grow in output_insn.  */
12994   for (fragP = fragP->fr_next;
12995        (fragP != NULL
12996         && fragP->fr_type == rs_fill
12997         && fragP->fr_fix == 0);
12998        fragP = fragP->fr_next)
12999     ;
13000   return fragP;
13001 }
13002
13003 /* Return the next jcc frag after BRANCH_PADDING.  */
13004
13005 static fragS *
13006 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
13007 {
13008   fragS *branch_fragP;
13009   if (!pad_fragP)
13010     return NULL;
13011
13012   if (pad_fragP->fr_type == rs_machine_dependent
13013       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
13014           == BRANCH_PADDING))
13015     {
13016       branch_fragP = i386_next_non_empty_frag (pad_fragP);
13017       if (branch_fragP->fr_type != rs_machine_dependent)
13018         return NULL;
13019       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
13020           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
13021                                    pad_fragP->tc_frag_data.mf_type))
13022         return branch_fragP;
13023     }
13024
13025   return NULL;
13026 }
13027
13028 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
13029
13030 static void
13031 i386_classify_machine_dependent_frag (fragS *fragP)
13032 {
13033   fragS *cmp_fragP;
13034   fragS *pad_fragP;
13035   fragS *branch_fragP;
13036   fragS *next_fragP;
13037   unsigned int max_prefix_length;
13038
13039   if (fragP->tc_frag_data.classified)
13040     return;
13041
13042   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
13043      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
13044   for (next_fragP = fragP;
13045        next_fragP != NULL;
13046        next_fragP = next_fragP->fr_next)
13047     {
13048       next_fragP->tc_frag_data.classified = 1;
13049       if (next_fragP->fr_type == rs_machine_dependent)
13050         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
13051           {
13052           case BRANCH_PADDING:
13053             /* The BRANCH_PADDING frag must be followed by a branch
13054                frag.  */
13055             branch_fragP = i386_next_non_empty_frag (next_fragP);
13056             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
13057             break;
13058           case FUSED_JCC_PADDING:
13059             /* Check if this is a fused jcc:
13060                FUSED_JCC_PADDING
13061                CMP like instruction
13062                BRANCH_PADDING
13063                COND_JUMP
13064                */
13065             cmp_fragP = i386_next_non_empty_frag (next_fragP);
13066             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
13067             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
13068             if (branch_fragP)
13069               {
13070                 /* The BRANCH_PADDING frag is merged with the
13071                    FUSED_JCC_PADDING frag.  */
13072                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
13073                 /* CMP like instruction size.  */
13074                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
13075                 frag_wane (pad_fragP);
13076                 /* Skip to branch_fragP.  */
13077                 next_fragP = branch_fragP;
13078               }
13079             else if (next_fragP->tc_frag_data.max_prefix_length)
13080               {
13081                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
13082                    a fused jcc.  */
13083                 next_fragP->fr_subtype
13084                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
13085                 next_fragP->tc_frag_data.max_bytes
13086                   = next_fragP->tc_frag_data.max_prefix_length;
13087                 /* This will be updated in the BRANCH_PREFIX scan.  */
13088                 next_fragP->tc_frag_data.max_prefix_length = 0;
13089               }
13090             else
13091               frag_wane (next_fragP);
13092             break;
13093           }
13094     }
13095
13096   /* Stop if there is no BRANCH_PREFIX.  */
13097   if (!align_branch_prefix_size)
13098     return;
13099
13100   /* Scan for BRANCH_PREFIX.  */
13101   for (; fragP != NULL; fragP = fragP->fr_next)
13102     {
13103       if (fragP->fr_type != rs_machine_dependent
13104           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
13105               != BRANCH_PREFIX))
13106         continue;
13107
13108       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
13109          COND_JUMP_PREFIX.  */
13110       max_prefix_length = 0;
13111       for (next_fragP = fragP;
13112            next_fragP != NULL;
13113            next_fragP = next_fragP->fr_next)
13114         {
13115           if (next_fragP->fr_type == rs_fill)
13116             /* Skip rs_fill frags.  */
13117             continue;
13118           else if (next_fragP->fr_type != rs_machine_dependent)
13119             /* Stop for all other frags.  */
13120             break;
13121
13122           /* rs_machine_dependent frags.  */
13123           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13124               == BRANCH_PREFIX)
13125             {
13126               /* Count BRANCH_PREFIX frags.  */
13127               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
13128                 {
13129                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
13130                   frag_wane (next_fragP);
13131                 }
13132               else
13133                 max_prefix_length
13134                   += next_fragP->tc_frag_data.max_bytes;
13135             }
13136           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13137                     == BRANCH_PADDING)
13138                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13139                        == FUSED_JCC_PADDING))
13140             {
13141               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
13142               fragP->tc_frag_data.u.padding_fragP = next_fragP;
13143               break;
13144             }
13145           else
13146             /* Stop for other rs_machine_dependent frags.  */
13147             break;
13148         }
13149
13150       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
13151
13152       /* Skip to the next frag.  */
13153       fragP = next_fragP;
13154     }
13155 }
13156
13157 /* Compute padding size for
13158
13159         FUSED_JCC_PADDING
13160         CMP like instruction
13161         BRANCH_PADDING
13162         COND_JUMP/UNCOND_JUMP
13163
13164    or
13165
13166         BRANCH_PADDING
13167         COND_JUMP/UNCOND_JUMP
13168  */
13169
13170 static int
13171 i386_branch_padding_size (fragS *fragP, offsetT address)
13172 {
13173   unsigned int offset, size, padding_size;
13174   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
13175
13176   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
13177   if (!address)
13178     address = fragP->fr_address;
13179   address += fragP->fr_fix;
13180
13181   /* CMP like instrunction size.  */
13182   size = fragP->tc_frag_data.cmp_size;
13183
13184   /* The base size of the branch frag.  */
13185   size += branch_fragP->fr_fix;
13186
13187   /* Add opcode and displacement bytes for the rs_machine_dependent
13188      branch frag.  */
13189   if (branch_fragP->fr_type == rs_machine_dependent)
13190     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
13191
13192   /* Check if branch is within boundary and doesn't end at the last
13193      byte.  */
13194   offset = address & ((1U << align_branch_power) - 1);
13195   if ((offset + size) >= (1U << align_branch_power))
13196     /* Padding needed to avoid crossing boundary.  */
13197     padding_size = (1U << align_branch_power) - offset;
13198   else
13199     /* No padding needed.  */
13200     padding_size = 0;
13201
13202   /* The return value may be saved in tc_frag_data.length which is
13203      unsigned byte.  */
13204   if (!fits_in_unsigned_byte (padding_size))
13205     abort ();
13206
13207   return padding_size;
13208 }
13209
13210 /* i386_generic_table_relax_frag()
13211
13212    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
13213    grow/shrink padding to align branch frags.  Hand others to
13214    relax_frag().  */
13215
13216 long
13217 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
13218 {
13219   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13220       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
13221     {
13222       long padding_size = i386_branch_padding_size (fragP, 0);
13223       long grow = padding_size - fragP->tc_frag_data.length;
13224
13225       /* When the BRANCH_PREFIX frag is used, the computed address
13226          must match the actual address and there should be no padding.  */
13227       if (fragP->tc_frag_data.padding_address
13228           && (fragP->tc_frag_data.padding_address != fragP->fr_address
13229               || padding_size))
13230         abort ();
13231
13232       /* Update the padding size.  */
13233       if (grow)
13234         fragP->tc_frag_data.length = padding_size;
13235
13236       return grow;
13237     }
13238   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13239     {
13240       fragS *padding_fragP, *next_fragP;
13241       long padding_size, left_size, last_size;
13242
13243       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13244       if (!padding_fragP)
13245         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
13246         return (fragP->tc_frag_data.length
13247                 - fragP->tc_frag_data.last_length);
13248
13249       /* Compute the relative address of the padding frag in the very
13250         first time where the BRANCH_PREFIX frag sizes are zero.  */
13251       if (!fragP->tc_frag_data.padding_address)
13252         fragP->tc_frag_data.padding_address
13253           = padding_fragP->fr_address - (fragP->fr_address - stretch);
13254
13255       /* First update the last length from the previous interation.  */
13256       left_size = fragP->tc_frag_data.prefix_length;
13257       for (next_fragP = fragP;
13258            next_fragP != padding_fragP;
13259            next_fragP = next_fragP->fr_next)
13260         if (next_fragP->fr_type == rs_machine_dependent
13261             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13262                 == BRANCH_PREFIX))
13263           {
13264             if (left_size)
13265               {
13266                 int max = next_fragP->tc_frag_data.max_bytes;
13267                 if (max)
13268                   {
13269                     int size;
13270                     if (max > left_size)
13271                       size = left_size;
13272                     else
13273                       size = max;
13274                     left_size -= size;
13275                     next_fragP->tc_frag_data.last_length = size;
13276                   }
13277               }
13278             else
13279               next_fragP->tc_frag_data.last_length = 0;
13280           }
13281
13282       /* Check the padding size for the padding frag.  */
13283       padding_size = i386_branch_padding_size
13284         (padding_fragP, (fragP->fr_address
13285                          + fragP->tc_frag_data.padding_address));
13286
13287       last_size = fragP->tc_frag_data.prefix_length;
13288       /* Check if there is change from the last interation.  */
13289       if (padding_size == last_size)
13290         {
13291           /* Update the expected address of the padding frag.  */
13292           padding_fragP->tc_frag_data.padding_address
13293             = (fragP->fr_address + padding_size
13294                + fragP->tc_frag_data.padding_address);
13295           return 0;
13296         }
13297
13298       if (padding_size > fragP->tc_frag_data.max_prefix_length)
13299         {
13300           /* No padding if there is no sufficient room.  Clear the
13301              expected address of the padding frag.  */
13302           padding_fragP->tc_frag_data.padding_address = 0;
13303           padding_size = 0;
13304         }
13305       else
13306         /* Store the expected address of the padding frag.  */
13307         padding_fragP->tc_frag_data.padding_address
13308           = (fragP->fr_address + padding_size
13309              + fragP->tc_frag_data.padding_address);
13310
13311       fragP->tc_frag_data.prefix_length = padding_size;
13312
13313       /* Update the length for the current interation.  */
13314       left_size = padding_size;
13315       for (next_fragP = fragP;
13316            next_fragP != padding_fragP;
13317            next_fragP = next_fragP->fr_next)
13318         if (next_fragP->fr_type == rs_machine_dependent
13319             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13320                 == BRANCH_PREFIX))
13321           {
13322             if (left_size)
13323               {
13324                 int max = next_fragP->tc_frag_data.max_bytes;
13325                 if (max)
13326                   {
13327                     int size;
13328                     if (max > left_size)
13329                       size = left_size;
13330                     else
13331                       size = max;
13332                     left_size -= size;
13333                     next_fragP->tc_frag_data.length = size;
13334                   }
13335               }
13336             else
13337               next_fragP->tc_frag_data.length = 0;
13338           }
13339
13340       return (fragP->tc_frag_data.length
13341               - fragP->tc_frag_data.last_length);
13342     }
13343   return relax_frag (segment, fragP, stretch);
13344 }
13345
13346 /* md_estimate_size_before_relax()
13347
13348    Called just before relax() for rs_machine_dependent frags.  The x86
13349    assembler uses these frags to handle variable size jump
13350    instructions.
13351
13352    Any symbol that is now undefined will not become defined.
13353    Return the correct fr_subtype in the frag.
13354    Return the initial "guess for variable size of frag" to caller.
13355    The guess is actually the growth beyond the fixed part.  Whatever
13356    we do to grow the fixed or variable part contributes to our
13357    returned value.  */
13358
13359 int
13360 md_estimate_size_before_relax (fragS *fragP, segT segment)
13361 {
13362   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13363       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
13364       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
13365     {
13366       i386_classify_machine_dependent_frag (fragP);
13367       return fragP->tc_frag_data.length;
13368     }
13369
13370   /* We've already got fragP->fr_subtype right;  all we have to do is
13371      check for un-relaxable symbols.  On an ELF system, we can't relax
13372      an externally visible symbol, because it may be overridden by a
13373      shared library.  */
13374   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
13375 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13376       || (IS_ELF
13377           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
13378                                                 fragP->fr_var))
13379 #endif
13380 #if defined (OBJ_COFF) && defined (TE_PE)
13381       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
13382           && S_IS_WEAK (fragP->fr_symbol))
13383 #endif
13384       )
13385     {
13386       /* Symbol is undefined in this segment, or we need to keep a
13387          reloc so that weak symbols can be overridden.  */
13388       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
13389       enum bfd_reloc_code_real reloc_type;
13390       unsigned char *opcode;
13391       int old_fr_fix;
13392       fixS *fixP = NULL;
13393
13394       if (fragP->fr_var != NO_RELOC)
13395         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
13396       else if (size == 2)
13397         reloc_type = BFD_RELOC_16_PCREL;
13398 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13399       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
13400                && need_plt32_p (fragP->fr_symbol))
13401         reloc_type = BFD_RELOC_X86_64_PLT32;
13402 #endif
13403       else
13404         reloc_type = BFD_RELOC_32_PCREL;
13405
13406       old_fr_fix = fragP->fr_fix;
13407       opcode = (unsigned char *) fragP->fr_opcode;
13408
13409       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
13410         {
13411         case UNCOND_JUMP:
13412           /* Make jmp (0xeb) a (d)word displacement jump.  */
13413           opcode[0] = 0xe9;
13414           fragP->fr_fix += size;
13415           fixP = fix_new (fragP, old_fr_fix, size,
13416                           fragP->fr_symbol,
13417                           fragP->fr_offset, 1,
13418                           reloc_type);
13419           break;
13420
13421         case COND_JUMP86:
13422           if (size == 2
13423               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
13424             {
13425               /* Negate the condition, and branch past an
13426                  unconditional jump.  */
13427               opcode[0] ^= 1;
13428               opcode[1] = 3;
13429               /* Insert an unconditional jump.  */
13430               opcode[2] = 0xe9;
13431               /* We added two extra opcode bytes, and have a two byte
13432                  offset.  */
13433               fragP->fr_fix += 2 + 2;
13434               fix_new (fragP, old_fr_fix + 2, 2,
13435                        fragP->fr_symbol,
13436                        fragP->fr_offset, 1,
13437                        reloc_type);
13438               break;
13439             }
13440           /* Fall through.  */
13441
13442         case COND_JUMP:
13443           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
13444             {
13445               fragP->fr_fix += 1;
13446               fixP = fix_new (fragP, old_fr_fix, 1,
13447                               fragP->fr_symbol,
13448                               fragP->fr_offset, 1,
13449                               BFD_RELOC_8_PCREL);
13450               fixP->fx_signed = 1;
13451               break;
13452             }
13453
13454           /* This changes the byte-displacement jump 0x7N
13455              to the (d)word-displacement jump 0x0f,0x8N.  */
13456           opcode[1] = opcode[0] + 0x10;
13457           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13458           /* We've added an opcode byte.  */
13459           fragP->fr_fix += 1 + size;
13460           fixP = fix_new (fragP, old_fr_fix + 1, size,
13461                           fragP->fr_symbol,
13462                           fragP->fr_offset, 1,
13463                           reloc_type);
13464           break;
13465
13466         default:
13467           BAD_CASE (fragP->fr_subtype);
13468           break;
13469         }
13470
13471       /* All jumps handled here are signed, but don't unconditionally use a
13472          signed limit check for 32 and 16 bit jumps as we want to allow wrap
13473          around at 4G (outside of 64-bit mode) and 64k.  */
13474       if (size == 4 && flag_code == CODE_64BIT)
13475         fixP->fx_signed = 1;
13476
13477       frag_wane (fragP);
13478       return fragP->fr_fix - old_fr_fix;
13479     }
13480
13481   /* Guess size depending on current relax state.  Initially the relax
13482      state will correspond to a short jump and we return 1, because
13483      the variable part of the frag (the branch offset) is one byte
13484      long.  However, we can relax a section more than once and in that
13485      case we must either set fr_subtype back to the unrelaxed state,
13486      or return the value for the appropriate branch.  */
13487   return md_relax_table[fragP->fr_subtype].rlx_length;
13488 }
13489
13490 /* Called after relax() is finished.
13491
13492    In:  Address of frag.
13493         fr_type == rs_machine_dependent.
13494         fr_subtype is what the address relaxed to.
13495
13496    Out: Any fixSs and constants are set up.
13497         Caller will turn frag into a ".space 0".  */
13498
13499 void
13500 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
13501                  fragS *fragP)
13502 {
13503   unsigned char *opcode;
13504   unsigned char *where_to_put_displacement = NULL;
13505   offsetT target_address;
13506   offsetT opcode_address;
13507   unsigned int extension = 0;
13508   offsetT displacement_from_opcode_start;
13509
13510   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13511       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
13512       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13513     {
13514       /* Generate nop padding.  */
13515       unsigned int size = fragP->tc_frag_data.length;
13516       if (size)
13517         {
13518           if (size > fragP->tc_frag_data.max_bytes)
13519             abort ();
13520
13521           if (flag_debug)
13522             {
13523               const char *msg;
13524               const char *branch = "branch";
13525               const char *prefix = "";
13526               fragS *padding_fragP;
13527               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
13528                   == BRANCH_PREFIX)
13529                 {
13530                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13531                   switch (fragP->tc_frag_data.default_prefix)
13532                     {
13533                     default:
13534                       abort ();
13535                       break;
13536                     case CS_PREFIX_OPCODE:
13537                       prefix = " cs";
13538                       break;
13539                     case DS_PREFIX_OPCODE:
13540                       prefix = " ds";
13541                       break;
13542                     case ES_PREFIX_OPCODE:
13543                       prefix = " es";
13544                       break;
13545                     case FS_PREFIX_OPCODE:
13546                       prefix = " fs";
13547                       break;
13548                     case GS_PREFIX_OPCODE:
13549                       prefix = " gs";
13550                       break;
13551                     case SS_PREFIX_OPCODE:
13552                       prefix = " ss";
13553                       break;
13554                     }
13555                   if (padding_fragP)
13556                     msg = _("%s:%u: add %d%s at 0x%llx to align "
13557                             "%s within %d-byte boundary\n");
13558                   else
13559                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
13560                             "align %s within %d-byte boundary\n");
13561                 }
13562               else
13563                 {
13564                   padding_fragP = fragP;
13565                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
13566                           "%s within %d-byte boundary\n");
13567                 }
13568
13569               if (padding_fragP)
13570                 switch (padding_fragP->tc_frag_data.branch_type)
13571                   {
13572                   case align_branch_jcc:
13573                     branch = "jcc";
13574                     break;
13575                   case align_branch_fused:
13576                     branch = "fused jcc";
13577                     break;
13578                   case align_branch_jmp:
13579                     branch = "jmp";
13580                     break;
13581                   case align_branch_call:
13582                     branch = "call";
13583                     break;
13584                   case align_branch_indirect:
13585                     branch = "indiret branch";
13586                     break;
13587                   case align_branch_ret:
13588                     branch = "ret";
13589                     break;
13590                   default:
13591                     break;
13592                   }
13593
13594               fprintf (stdout, msg,
13595                        fragP->fr_file, fragP->fr_line, size, prefix,
13596                        (long long) fragP->fr_address, branch,
13597                        1 << align_branch_power);
13598             }
13599           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13600             memset (fragP->fr_opcode,
13601                     fragP->tc_frag_data.default_prefix, size);
13602           else
13603             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
13604                                 size, 0);
13605           fragP->fr_fix += size;
13606         }
13607       return;
13608     }
13609
13610   opcode = (unsigned char *) fragP->fr_opcode;
13611
13612   /* Address we want to reach in file space.  */
13613   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
13614
13615   /* Address opcode resides at in file space.  */
13616   opcode_address = fragP->fr_address + fragP->fr_fix;
13617
13618   /* Displacement from opcode start to fill into instruction.  */
13619   displacement_from_opcode_start = target_address - opcode_address;
13620
13621   if ((fragP->fr_subtype & BIG) == 0)
13622     {
13623       /* Don't have to change opcode.  */
13624       extension = 1;            /* 1 opcode + 1 displacement  */
13625       where_to_put_displacement = &opcode[1];
13626     }
13627   else
13628     {
13629       if (no_cond_jump_promotion
13630           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
13631         as_warn_where (fragP->fr_file, fragP->fr_line,
13632                        _("long jump required"));
13633
13634       switch (fragP->fr_subtype)
13635         {
13636         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
13637           extension = 4;                /* 1 opcode + 4 displacement  */
13638           opcode[0] = 0xe9;
13639           where_to_put_displacement = &opcode[1];
13640           break;
13641
13642         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
13643           extension = 2;                /* 1 opcode + 2 displacement  */
13644           opcode[0] = 0xe9;
13645           where_to_put_displacement = &opcode[1];
13646           break;
13647
13648         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
13649         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
13650           extension = 5;                /* 2 opcode + 4 displacement  */
13651           opcode[1] = opcode[0] + 0x10;
13652           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13653           where_to_put_displacement = &opcode[2];
13654           break;
13655
13656         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
13657           extension = 3;                /* 2 opcode + 2 displacement  */
13658           opcode[1] = opcode[0] + 0x10;
13659           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13660           where_to_put_displacement = &opcode[2];
13661           break;
13662
13663         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
13664           extension = 4;
13665           opcode[0] ^= 1;
13666           opcode[1] = 3;
13667           opcode[2] = 0xe9;
13668           where_to_put_displacement = &opcode[3];
13669           break;
13670
13671         default:
13672           BAD_CASE (fragP->fr_subtype);
13673           break;
13674         }
13675     }
13676
13677   /* If size if less then four we are sure that the operand fits,
13678      but if it's 4, then it could be that the displacement is larger
13679      then -/+ 2GB.  */
13680   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
13681       && object_64bit
13682       && ((addressT) (displacement_from_opcode_start - extension
13683                       + ((addressT) 1 << 31))
13684           > (((addressT) 2 << 31) - 1)))
13685     {
13686       as_bad_where (fragP->fr_file, fragP->fr_line,
13687                     _("jump target out of range"));
13688       /* Make us emit 0.  */
13689       displacement_from_opcode_start = extension;
13690     }
13691   /* Now put displacement after opcode.  */
13692   md_number_to_chars ((char *) where_to_put_displacement,
13693                       (valueT) (displacement_from_opcode_start - extension),
13694                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
13695   fragP->fr_fix += extension;
13696 }
13697 \f
13698 /* Apply a fixup (fixP) to segment data, once it has been determined
13699    by our caller that we have all the info we need to fix it up.
13700
13701    Parameter valP is the pointer to the value of the bits.
13702
13703    On the 386, immediates, displacements, and data pointers are all in
13704    the same (little-endian) format, so we don't need to care about which
13705    we are handling.  */
13706
13707 void
13708 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
13709 {
13710   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
13711   valueT value = *valP;
13712
13713 #if !defined (TE_Mach)
13714   if (fixP->fx_pcrel)
13715     {
13716       switch (fixP->fx_r_type)
13717         {
13718         default:
13719           break;
13720
13721         case BFD_RELOC_64:
13722           fixP->fx_r_type = BFD_RELOC_64_PCREL;
13723           break;
13724         case BFD_RELOC_32:
13725         case BFD_RELOC_X86_64_32S:
13726           fixP->fx_r_type = BFD_RELOC_32_PCREL;
13727           break;
13728         case BFD_RELOC_16:
13729           fixP->fx_r_type = BFD_RELOC_16_PCREL;
13730           break;
13731         case BFD_RELOC_8:
13732           fixP->fx_r_type = BFD_RELOC_8_PCREL;
13733           break;
13734         }
13735     }
13736
13737   if (fixP->fx_addsy != NULL
13738       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
13739           || fixP->fx_r_type == BFD_RELOC_64_PCREL
13740           || fixP->fx_r_type == BFD_RELOC_16_PCREL
13741           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
13742       && !use_rela_relocations)
13743     {
13744       /* This is a hack.  There should be a better way to handle this.
13745          This covers for the fact that bfd_install_relocation will
13746          subtract the current location (for partial_inplace, PC relative
13747          relocations); see more below.  */
13748 #ifndef OBJ_AOUT
13749       if (IS_ELF
13750 #ifdef TE_PE
13751           || OUTPUT_FLAVOR == bfd_target_coff_flavour
13752 #endif
13753           )
13754         value += fixP->fx_where + fixP->fx_frag->fr_address;
13755 #endif
13756 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13757       if (IS_ELF)
13758         {
13759           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
13760
13761           if ((sym_seg == seg
13762                || (symbol_section_p (fixP->fx_addsy)
13763                    && sym_seg != absolute_section))
13764               && !generic_force_reloc (fixP))
13765             {
13766               /* Yes, we add the values in twice.  This is because
13767                  bfd_install_relocation subtracts them out again.  I think
13768                  bfd_install_relocation is broken, but I don't dare change
13769                  it.  FIXME.  */
13770               value += fixP->fx_where + fixP->fx_frag->fr_address;
13771             }
13772         }
13773 #endif
13774 #if defined (OBJ_COFF) && defined (TE_PE)
13775       /* For some reason, the PE format does not store a
13776          section address offset for a PC relative symbol.  */
13777       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
13778           || S_IS_WEAK (fixP->fx_addsy))
13779         value += md_pcrel_from (fixP);
13780 #endif
13781     }
13782 #if defined (OBJ_COFF) && defined (TE_PE)
13783   if (fixP->fx_addsy != NULL
13784       && S_IS_WEAK (fixP->fx_addsy)
13785       /* PR 16858: Do not modify weak function references.  */
13786       && ! fixP->fx_pcrel)
13787     {
13788 #if !defined (TE_PEP)
13789       /* For x86 PE weak function symbols are neither PC-relative
13790          nor do they set S_IS_FUNCTION.  So the only reliable way
13791          to detect them is to check the flags of their containing
13792          section.  */
13793       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
13794           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
13795         ;
13796       else
13797 #endif
13798       value -= S_GET_VALUE (fixP->fx_addsy);
13799     }
13800 #endif
13801
13802   /* Fix a few things - the dynamic linker expects certain values here,
13803      and we must not disappoint it.  */
13804 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13805   if (IS_ELF && fixP->fx_addsy)
13806     switch (fixP->fx_r_type)
13807       {
13808       case BFD_RELOC_386_PLT32:
13809       case BFD_RELOC_X86_64_PLT32:
13810         /* Make the jump instruction point to the address of the operand.
13811            At runtime we merely add the offset to the actual PLT entry.
13812            NB: Subtract the offset size only for jump instructions.  */
13813         if (fixP->fx_pcrel)
13814           value = -4;
13815         break;
13816
13817       case BFD_RELOC_386_TLS_GD:
13818       case BFD_RELOC_386_TLS_LDM:
13819       case BFD_RELOC_386_TLS_IE_32:
13820       case BFD_RELOC_386_TLS_IE:
13821       case BFD_RELOC_386_TLS_GOTIE:
13822       case BFD_RELOC_386_TLS_GOTDESC:
13823       case BFD_RELOC_X86_64_TLSGD:
13824       case BFD_RELOC_X86_64_TLSLD:
13825       case BFD_RELOC_X86_64_GOTTPOFF:
13826       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13827         value = 0; /* Fully resolved at runtime.  No addend.  */
13828         /* Fallthrough */
13829       case BFD_RELOC_386_TLS_LE:
13830       case BFD_RELOC_386_TLS_LDO_32:
13831       case BFD_RELOC_386_TLS_LE_32:
13832       case BFD_RELOC_X86_64_DTPOFF32:
13833       case BFD_RELOC_X86_64_DTPOFF64:
13834       case BFD_RELOC_X86_64_TPOFF32:
13835       case BFD_RELOC_X86_64_TPOFF64:
13836         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13837         break;
13838
13839       case BFD_RELOC_386_TLS_DESC_CALL:
13840       case BFD_RELOC_X86_64_TLSDESC_CALL:
13841         value = 0; /* Fully resolved at runtime.  No addend.  */
13842         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13843         fixP->fx_done = 0;
13844         return;
13845
13846       case BFD_RELOC_VTABLE_INHERIT:
13847       case BFD_RELOC_VTABLE_ENTRY:
13848         fixP->fx_done = 0;
13849         return;
13850
13851       default:
13852         break;
13853       }
13854 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
13855
13856   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
13857   if (!object_64bit)
13858     value = extend_to_32bit_address (value);
13859
13860   *valP = value;
13861 #endif /* !defined (TE_Mach)  */
13862
13863   /* Are we finished with this relocation now?  */
13864   if (fixP->fx_addsy == NULL)
13865     {
13866       fixP->fx_done = 1;
13867       switch (fixP->fx_r_type)
13868         {
13869         case BFD_RELOC_X86_64_32S:
13870           fixP->fx_signed = 1;
13871           break;
13872
13873         default:
13874           break;
13875         }
13876     }
13877 #if defined (OBJ_COFF) && defined (TE_PE)
13878   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
13879     {
13880       fixP->fx_done = 0;
13881       /* Remember value for tc_gen_reloc.  */
13882       fixP->fx_addnumber = value;
13883       /* Clear out the frag for now.  */
13884       value = 0;
13885     }
13886 #endif
13887   else if (use_rela_relocations)
13888     {
13889       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
13890         fixP->fx_no_overflow = 1;
13891       /* Remember value for tc_gen_reloc.  */
13892       fixP->fx_addnumber = value;
13893       value = 0;
13894     }
13895
13896   md_number_to_chars (p, value, fixP->fx_size);
13897 }
13898 \f
13899 const char *
13900 md_atof (int type, char *litP, int *sizeP)
13901 {
13902   /* This outputs the LITTLENUMs in REVERSE order;
13903      in accord with the bigendian 386.  */
13904   return ieee_md_atof (type, litP, sizeP, false);
13905 }
13906 \f
13907 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
13908
13909 static char *
13910 output_invalid (int c)
13911 {
13912   if (ISPRINT (c))
13913     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13914               "'%c'", c);
13915   else
13916     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13917               "(0x%x)", (unsigned char) c);
13918   return output_invalid_buf;
13919 }
13920
13921 /* Verify that @r can be used in the current context.  */
13922
13923 static bool check_register (const reg_entry *r)
13924 {
13925   if (allow_pseudo_reg)
13926     return true;
13927
13928   if (operand_type_all_zero (&r->reg_type))
13929     return false;
13930
13931   if ((r->reg_type.bitfield.dword
13932        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
13933        || r->reg_type.bitfield.class == RegCR
13934        || r->reg_type.bitfield.class == RegDR)
13935       && !cpu_arch_flags.bitfield.cpui386)
13936     return false;
13937
13938   if (r->reg_type.bitfield.class == RegTR
13939       && (flag_code == CODE_64BIT
13940           || !cpu_arch_flags.bitfield.cpui386
13941           || cpu_arch_isa_flags.bitfield.cpui586
13942           || cpu_arch_isa_flags.bitfield.cpui686))
13943     return false;
13944
13945   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
13946     return false;
13947
13948   if (!cpu_arch_flags.bitfield.cpuavx512f)
13949     {
13950       if (r->reg_type.bitfield.zmmword
13951           || r->reg_type.bitfield.class == RegMask)
13952         return false;
13953
13954       if (!cpu_arch_flags.bitfield.cpuavx)
13955         {
13956           if (r->reg_type.bitfield.ymmword)
13957             return false;
13958
13959           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13960             return false;
13961         }
13962     }
13963
13964   if (vector_size < VSZ512 && r->reg_type.bitfield.zmmword)
13965     return false;
13966
13967   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
13968     return false;
13969
13970   if (r->reg_type.bitfield.tmmword
13971       && (!cpu_arch_flags.bitfield.cpuamx_tile
13972           || flag_code != CODE_64BIT))
13973     return false;
13974
13975   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13976     return false;
13977
13978   /* Don't allow fake index register unless allow_index_reg isn't 0. */
13979   if (!allow_index_reg && r->reg_num == RegIZ)
13980     return false;
13981
13982   /* Upper 16 vector registers are only available with VREX in 64bit
13983      mode, and require EVEX encoding.  */
13984   if (r->reg_flags & RegVRex)
13985     {
13986       if (!cpu_arch_flags.bitfield.cpuavx512f
13987           || flag_code != CODE_64BIT)
13988         return false;
13989
13990       if (i.vec_encoding == vex_encoding_default)
13991         i.vec_encoding = vex_encoding_evex;
13992       else if (i.vec_encoding != vex_encoding_evex)
13993         i.vec_encoding = vex_encoding_error;
13994     }
13995
13996   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
13997       && (!cpu_arch_flags.bitfield.cpu64
13998           || r->reg_type.bitfield.class != RegCR
13999           || dot_insn ())
14000       && flag_code != CODE_64BIT)
14001     return false;
14002
14003   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
14004       && !intel_syntax)
14005     return false;
14006
14007   return true;
14008 }
14009
14010 /* REG_STRING starts *before* REGISTER_PREFIX.  */
14011
14012 static const reg_entry *
14013 parse_real_register (const char *reg_string, char **end_op)
14014 {
14015   const char *s = reg_string;
14016   char *p;
14017   char reg_name_given[MAX_REG_NAME_SIZE + 1];
14018   const reg_entry *r;
14019
14020   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
14021   if (*s == REGISTER_PREFIX)
14022     ++s;
14023
14024   if (is_space_char (*s))
14025     ++s;
14026
14027   p = reg_name_given;
14028   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
14029     {
14030       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
14031         return (const reg_entry *) NULL;
14032       s++;
14033     }
14034
14035   if (is_part_of_name (*s))
14036     return (const reg_entry *) NULL;
14037
14038   *end_op = (char *) s;
14039
14040   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
14041
14042   /* Handle floating point regs, allowing spaces in the (i) part.  */
14043   if (r == reg_st0)
14044     {
14045       if (!cpu_arch_flags.bitfield.cpu8087
14046           && !cpu_arch_flags.bitfield.cpu287
14047           && !cpu_arch_flags.bitfield.cpu387
14048           && !allow_pseudo_reg)
14049         return (const reg_entry *) NULL;
14050
14051       if (is_space_char (*s))
14052         ++s;
14053       if (*s == '(')
14054         {
14055           ++s;
14056           if (is_space_char (*s))
14057             ++s;
14058           if (*s >= '0' && *s <= '7')
14059             {
14060               int fpr = *s - '0';
14061               ++s;
14062               if (is_space_char (*s))
14063                 ++s;
14064               if (*s == ')')
14065                 {
14066                   *end_op = (char *) s + 1;
14067                   know (r[fpr].reg_num == fpr);
14068                   return r + fpr;
14069                 }
14070             }
14071           /* We have "%st(" then garbage.  */
14072           return (const reg_entry *) NULL;
14073         }
14074     }
14075
14076   return r && check_register (r) ? r : NULL;
14077 }
14078
14079 /* REG_STRING starts *before* REGISTER_PREFIX.  */
14080
14081 static const reg_entry *
14082 parse_register (const char *reg_string, char **end_op)
14083 {
14084   const reg_entry *r;
14085
14086   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
14087     r = parse_real_register (reg_string, end_op);
14088   else
14089     r = NULL;
14090   if (!r)
14091     {
14092       char *save = input_line_pointer;
14093       char *buf = xstrdup (reg_string), *name;
14094       symbolS *symbolP;
14095
14096       input_line_pointer = buf;
14097       get_symbol_name (&name);
14098       symbolP = symbol_find (name);
14099       while (symbolP && symbol_equated_p (symbolP))
14100         {
14101           const expressionS *e = symbol_get_value_expression(symbolP);
14102
14103           if (e->X_add_number)
14104             break;
14105           symbolP = e->X_add_symbol;
14106         }
14107       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
14108         {
14109           const expressionS *e = symbol_get_value_expression (symbolP);
14110
14111           if (e->X_op == O_register)
14112             {
14113               know (e->X_add_number >= 0
14114                     && (valueT) e->X_add_number < i386_regtab_size);
14115               r = i386_regtab + e->X_add_number;
14116               *end_op = (char *) reg_string + (input_line_pointer - buf);
14117             }
14118           if (r && !check_register (r))
14119             {
14120               as_bad (_("register '%s%s' cannot be used here"),
14121                       register_prefix, r->reg_name);
14122               r = &bad_reg;
14123             }
14124         }
14125       input_line_pointer = save;
14126       free (buf);
14127     }
14128   return r;
14129 }
14130
14131 int
14132 i386_parse_name (char *name, expressionS *e, char *nextcharP)
14133 {
14134   const reg_entry *r = NULL;
14135   char *end = input_line_pointer;
14136
14137   /* We only know the terminating character here.  It being double quote could
14138      be the closing one of a quoted symbol name, or an opening one from a
14139      following string (or another quoted symbol name).  Since the latter can't
14140      be valid syntax for anything, bailing in either case is good enough.  */
14141   if (*nextcharP == '"')
14142     return 0;
14143
14144   *end = *nextcharP;
14145   if (*name == REGISTER_PREFIX || allow_naked_reg)
14146     r = parse_real_register (name, &input_line_pointer);
14147   if (r && end <= input_line_pointer)
14148     {
14149       *nextcharP = *input_line_pointer;
14150       *input_line_pointer = 0;
14151       e->X_op = O_register;
14152       e->X_add_number = r - i386_regtab;
14153       return 1;
14154     }
14155   input_line_pointer = end;
14156   *end = 0;
14157   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
14158 }
14159
14160 void
14161 md_operand (expressionS *e)
14162 {
14163   char *end;
14164   const reg_entry *r;
14165
14166   switch (*input_line_pointer)
14167     {
14168     case REGISTER_PREFIX:
14169       r = parse_real_register (input_line_pointer, &end);
14170       if (r)
14171         {
14172           e->X_op = O_register;
14173           e->X_add_number = r - i386_regtab;
14174           input_line_pointer = end;
14175         }
14176       break;
14177
14178     case '[':
14179       gas_assert (intel_syntax);
14180       end = input_line_pointer++;
14181       expression (e);
14182       if (*input_line_pointer == ']')
14183         {
14184           ++input_line_pointer;
14185           e->X_op_symbol = make_expr_symbol (e);
14186           e->X_add_symbol = NULL;
14187           e->X_add_number = 0;
14188           e->X_op = O_index;
14189         }
14190       else
14191         {
14192           e->X_op = O_absent;
14193           input_line_pointer = end;
14194         }
14195       break;
14196     }
14197 }
14198
14199 #ifdef BFD64
14200 /* To maintain consistency with !BFD64 builds of gas record, whether any
14201    (binary) operator was involved in an expression.  As expressions are
14202    evaluated in only 32 bits when !BFD64, we use this to decide whether to
14203    truncate results.  */
14204 bool i386_record_operator (operatorT op,
14205                            const expressionS *left,
14206                            const expressionS *right)
14207 {
14208   if (op == O_absent)
14209     return false;
14210
14211   if (!left)
14212     {
14213       /* Since the expression parser applies unary operators fine to bignum
14214          operands, we don't need to be concerned of respective operands not
14215          fitting in 32 bits.  */
14216       if (right->X_op == O_constant && right->X_unsigned
14217           && !fits_in_unsigned_long (right->X_add_number))
14218         return false;
14219     }
14220   /* This isn't entirely right: The pattern can also result when constant
14221      expressions are folded (e.g. 0xffffffff + 1).  */
14222   else if ((left->X_op == O_constant && left->X_unsigned
14223             && !fits_in_unsigned_long (left->X_add_number))
14224            || (right->X_op == O_constant && right->X_unsigned
14225                && !fits_in_unsigned_long (right->X_add_number)))
14226     expr_mode = expr_large_value;
14227
14228   if (expr_mode != expr_large_value)
14229     expr_mode = expr_operator_present;
14230
14231   return false;
14232 }
14233 #endif
14234 \f
14235 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14236 const char *md_shortopts = "kVQ:sqnO::";
14237 #else
14238 const char *md_shortopts = "qnO::";
14239 #endif
14240
14241 #define OPTION_32 (OPTION_MD_BASE + 0)
14242 #define OPTION_64 (OPTION_MD_BASE + 1)
14243 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
14244 #define OPTION_MARCH (OPTION_MD_BASE + 3)
14245 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
14246 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
14247 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
14248 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
14249 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
14250 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
14251 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
14252 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
14253 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
14254 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
14255 #define OPTION_X32 (OPTION_MD_BASE + 14)
14256 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
14257 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
14258 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
14259 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
14260 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
14261 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
14262 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
14263 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
14264 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
14265 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
14266 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
14267 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
14268 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
14269 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
14270 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
14271 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
14272 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
14273 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
14274 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
14275 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
14276
14277 struct option md_longopts[] =
14278 {
14279   {"32", no_argument, NULL, OPTION_32},
14280 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14281      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14282   {"64", no_argument, NULL, OPTION_64},
14283 #endif
14284 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14285   {"x32", no_argument, NULL, OPTION_X32},
14286   {"mshared", no_argument, NULL, OPTION_MSHARED},
14287   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
14288 #endif
14289   {"divide", no_argument, NULL, OPTION_DIVIDE},
14290   {"march", required_argument, NULL, OPTION_MARCH},
14291   {"mtune", required_argument, NULL, OPTION_MTUNE},
14292   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
14293   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
14294   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
14295   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
14296   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
14297   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
14298   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
14299   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
14300   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
14301   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
14302   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
14303   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
14304   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
14305 # if defined (TE_PE) || defined (TE_PEP)
14306   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
14307 #endif
14308   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
14309   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
14310   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
14311   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
14312   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
14313   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
14314   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
14315   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
14316   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
14317   {"mlfence-before-indirect-branch", required_argument, NULL,
14318    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
14319   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
14320   {"mamd64", no_argument, NULL, OPTION_MAMD64},
14321   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
14322   {NULL, no_argument, NULL, 0}
14323 };
14324 size_t md_longopts_size = sizeof (md_longopts);
14325
14326 int
14327 md_parse_option (int c, const char *arg)
14328 {
14329   unsigned int j;
14330   char *arch, *next, *saved, *type;
14331
14332   switch (c)
14333     {
14334     case 'n':
14335       optimize_align_code = 0;
14336       break;
14337
14338     case 'q':
14339       quiet_warnings = 1;
14340       break;
14341
14342 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14343       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
14344          should be emitted or not.  FIXME: Not implemented.  */
14345     case 'Q':
14346       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
14347         return 0;
14348       break;
14349
14350       /* -V: SVR4 argument to print version ID.  */
14351     case 'V':
14352       print_version_id ();
14353       break;
14354
14355       /* -k: Ignore for FreeBSD compatibility.  */
14356     case 'k':
14357       break;
14358
14359     case 's':
14360       /* -s: On i386 Solaris, this tells the native assembler to use
14361          .stab instead of .stab.excl.  We always use .stab anyhow.  */
14362       break;
14363
14364     case OPTION_MSHARED:
14365       shared = 1;
14366       break;
14367
14368     case OPTION_X86_USED_NOTE:
14369       if (strcasecmp (arg, "yes") == 0)
14370         x86_used_note = 1;
14371       else if (strcasecmp (arg, "no") == 0)
14372         x86_used_note = 0;
14373       else
14374         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
14375       break;
14376
14377
14378 #endif
14379 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14380      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14381     case OPTION_64:
14382       {
14383         const char **list, **l;
14384
14385         list = bfd_target_list ();
14386         for (l = list; *l != NULL; l++)
14387           if (startswith (*l, "elf64-x86-64")
14388               || strcmp (*l, "coff-x86-64") == 0
14389               || strcmp (*l, "pe-x86-64") == 0
14390               || strcmp (*l, "pei-x86-64") == 0
14391               || strcmp (*l, "mach-o-x86-64") == 0)
14392             {
14393               default_arch = "x86_64";
14394               break;
14395             }
14396         if (*l == NULL)
14397           as_fatal (_("no compiled in support for x86_64"));
14398         free (list);
14399       }
14400       break;
14401 #endif
14402
14403 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14404     case OPTION_X32:
14405       if (IS_ELF)
14406         {
14407           const char **list, **l;
14408
14409           list = bfd_target_list ();
14410           for (l = list; *l != NULL; l++)
14411             if (startswith (*l, "elf32-x86-64"))
14412               {
14413                 default_arch = "x86_64:32";
14414                 break;
14415               }
14416           if (*l == NULL)
14417             as_fatal (_("no compiled in support for 32bit x86_64"));
14418           free (list);
14419         }
14420       else
14421         as_fatal (_("32bit x86_64 is only supported for ELF"));
14422       break;
14423 #endif
14424
14425     case OPTION_32:
14426       {
14427         const char **list, **l;
14428
14429         list = bfd_target_list ();
14430         for (l = list; *l != NULL; l++)
14431           if (strstr (*l, "-i386")
14432               || strstr (*l, "-go32"))
14433             {
14434               default_arch = "i386";
14435               break;
14436             }
14437         if (*l == NULL)
14438           as_fatal (_("no compiled in support for ix86"));
14439         free (list);
14440       }
14441       break;
14442
14443     case OPTION_DIVIDE:
14444 #ifdef SVR4_COMMENT_CHARS
14445       {
14446         char *n, *t;
14447         const char *s;
14448
14449         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
14450         t = n;
14451         for (s = i386_comment_chars; *s != '\0'; s++)
14452           if (*s != '/')
14453             *t++ = *s;
14454         *t = '\0';
14455         i386_comment_chars = n;
14456       }
14457 #endif
14458       break;
14459
14460     case OPTION_MARCH:
14461       saved = xstrdup (arg);
14462       arch = saved;
14463       /* Allow -march=+nosse.  */
14464       if (*arch == '+')
14465         arch++;
14466       do
14467         {
14468           char *vsz;
14469
14470           if (*arch == '.')
14471             as_fatal (_("invalid -march= option: `%s'"), arg);
14472           next = strchr (arch, '+');
14473           if (next)
14474             *next++ = '\0';
14475           vsz = strchr (arch, '/');
14476           if (vsz)
14477             *vsz++ = '\0';
14478           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14479             {
14480               if (vsz && cpu_arch[j].vsz != vsz_set)
14481                 continue;
14482
14483               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
14484                   && strcmp (arch, cpu_arch[j].name) == 0)
14485                 {
14486                   /* Processor.  */
14487                   if (! cpu_arch[j].enable.bitfield.cpui386)
14488                     continue;
14489
14490                   cpu_arch_name = cpu_arch[j].name;
14491                   free (cpu_sub_arch_name);
14492                   cpu_sub_arch_name = NULL;
14493                   cpu_arch_flags = cpu_arch[j].enable;
14494                   cpu_arch_isa = cpu_arch[j].type;
14495                   cpu_arch_isa_flags = cpu_arch[j].enable;
14496                   if (!cpu_arch_tune_set)
14497                     {
14498                       cpu_arch_tune = cpu_arch_isa;
14499                       cpu_arch_tune_flags = cpu_arch_isa_flags;
14500                     }
14501                   vector_size = VSZ_DEFAULT;
14502                   break;
14503                 }
14504               else if (cpu_arch[j].type == PROCESSOR_NONE
14505                        && strcmp (arch, cpu_arch[j].name) == 0
14506                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
14507                 {
14508                   /* ISA extension.  */
14509                   i386_cpu_flags flags;
14510
14511                   flags = cpu_flags_or (cpu_arch_flags,
14512                                         cpu_arch[j].enable);
14513
14514                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14515                     {
14516                       extend_cpu_sub_arch_name (arch);
14517                       cpu_arch_flags = flags;
14518                       cpu_arch_isa_flags = flags;
14519                     }
14520                   else
14521                     cpu_arch_isa_flags
14522                       = cpu_flags_or (cpu_arch_isa_flags,
14523                                       cpu_arch[j].enable);
14524
14525                   switch (cpu_arch[j].vsz)
14526                     {
14527                     default:
14528                       break;
14529
14530                     case vsz_set:
14531                       if (vsz)
14532                         {
14533                           char *end;
14534                           unsigned long val = strtoul (vsz, &end, 0);
14535
14536                           if (*end)
14537                             val = 0;
14538                           switch (val)
14539                             {
14540                             case 512: vector_size = VSZ512; break;
14541                             case 256: vector_size = VSZ256; break;
14542                             case 128: vector_size = VSZ128; break;
14543                             default:
14544                               as_warn (_("Unrecognized vector size specifier ignored"));
14545                               break;
14546                             }
14547                           break;
14548                         }
14549                         /* Fall through.  */
14550                     case vsz_reset:
14551                       vector_size = VSZ_DEFAULT;
14552                       break;
14553                     }
14554
14555                   break;
14556                 }
14557             }
14558
14559           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
14560             {
14561               /* Disable an ISA extension.  */
14562               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14563                 if (cpu_arch[j].type == PROCESSOR_NONE
14564                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
14565                   {
14566                     i386_cpu_flags flags;
14567
14568                     flags = cpu_flags_and_not (cpu_arch_flags,
14569                                                cpu_arch[j].disable);
14570                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14571                       {
14572                         extend_cpu_sub_arch_name (arch);
14573                         cpu_arch_flags = flags;
14574                         cpu_arch_isa_flags = flags;
14575                       }
14576                     if (cpu_arch[j].vsz == vsz_set)
14577                       vector_size = VSZ_DEFAULT;
14578                     break;
14579                   }
14580             }
14581
14582           if (j >= ARRAY_SIZE (cpu_arch))
14583             as_fatal (_("invalid -march= option: `%s'"), arg);
14584
14585           arch = next;
14586         }
14587       while (next != NULL);
14588       free (saved);
14589       break;
14590
14591     case OPTION_MTUNE:
14592       if (*arg == '.')
14593         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14594       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14595         {
14596           if (cpu_arch[j].type != PROCESSOR_NONE
14597               && strcmp (arg, cpu_arch[j].name) == 0)
14598             {
14599               cpu_arch_tune_set = 1;
14600               cpu_arch_tune = cpu_arch [j].type;
14601               cpu_arch_tune_flags = cpu_arch[j].enable;
14602               break;
14603             }
14604         }
14605       if (j >= ARRAY_SIZE (cpu_arch))
14606         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14607       break;
14608
14609     case OPTION_MMNEMONIC:
14610       if (strcasecmp (arg, "att") == 0)
14611         intel_mnemonic = 0;
14612       else if (strcasecmp (arg, "intel") == 0)
14613         intel_mnemonic = 1;
14614       else
14615         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
14616       break;
14617
14618     case OPTION_MSYNTAX:
14619       if (strcasecmp (arg, "att") == 0)
14620         intel_syntax = 0;
14621       else if (strcasecmp (arg, "intel") == 0)
14622         intel_syntax = 1;
14623       else
14624         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
14625       break;
14626
14627     case OPTION_MINDEX_REG:
14628       allow_index_reg = 1;
14629       break;
14630
14631     case OPTION_MNAKED_REG:
14632       allow_naked_reg = 1;
14633       break;
14634
14635     case OPTION_MSSE2AVX:
14636       sse2avx = 1;
14637       break;
14638
14639     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
14640       use_unaligned_vector_move = 1;
14641       break;
14642
14643     case OPTION_MSSE_CHECK:
14644       if (strcasecmp (arg, "error") == 0)
14645         sse_check = check_error;
14646       else if (strcasecmp (arg, "warning") == 0)
14647         sse_check = check_warning;
14648       else if (strcasecmp (arg, "none") == 0)
14649         sse_check = check_none;
14650       else
14651         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
14652       break;
14653
14654     case OPTION_MOPERAND_CHECK:
14655       if (strcasecmp (arg, "error") == 0)
14656         operand_check = check_error;
14657       else if (strcasecmp (arg, "warning") == 0)
14658         operand_check = check_warning;
14659       else if (strcasecmp (arg, "none") == 0)
14660         operand_check = check_none;
14661       else
14662         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
14663       break;
14664
14665     case OPTION_MAVXSCALAR:
14666       if (strcasecmp (arg, "128") == 0)
14667         avxscalar = vex128;
14668       else if (strcasecmp (arg, "256") == 0)
14669         avxscalar = vex256;
14670       else
14671         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
14672       break;
14673
14674     case OPTION_MVEXWIG:
14675       if (strcmp (arg, "0") == 0)
14676         vexwig = vexw0;
14677       else if (strcmp (arg, "1") == 0)
14678         vexwig = vexw1;
14679       else
14680         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
14681       break;
14682
14683     case OPTION_MADD_BND_PREFIX:
14684       add_bnd_prefix = 1;
14685       break;
14686
14687     case OPTION_MEVEXLIG:
14688       if (strcmp (arg, "128") == 0)
14689         evexlig = evexl128;
14690       else if (strcmp (arg, "256") == 0)
14691         evexlig = evexl256;
14692       else  if (strcmp (arg, "512") == 0)
14693         evexlig = evexl512;
14694       else
14695         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
14696       break;
14697
14698     case OPTION_MEVEXRCIG:
14699       if (strcmp (arg, "rne") == 0)
14700         evexrcig = rne;
14701       else if (strcmp (arg, "rd") == 0)
14702         evexrcig = rd;
14703       else if (strcmp (arg, "ru") == 0)
14704         evexrcig = ru;
14705       else if (strcmp (arg, "rz") == 0)
14706         evexrcig = rz;
14707       else
14708         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
14709       break;
14710
14711     case OPTION_MEVEXWIG:
14712       if (strcmp (arg, "0") == 0)
14713         evexwig = evexw0;
14714       else if (strcmp (arg, "1") == 0)
14715         evexwig = evexw1;
14716       else
14717         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
14718       break;
14719
14720 # if defined (TE_PE) || defined (TE_PEP)
14721     case OPTION_MBIG_OBJ:
14722       use_big_obj = 1;
14723       break;
14724 #endif
14725
14726     case OPTION_MOMIT_LOCK_PREFIX:
14727       if (strcasecmp (arg, "yes") == 0)
14728         omit_lock_prefix = 1;
14729       else if (strcasecmp (arg, "no") == 0)
14730         omit_lock_prefix = 0;
14731       else
14732         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
14733       break;
14734
14735     case OPTION_MFENCE_AS_LOCK_ADD:
14736       if (strcasecmp (arg, "yes") == 0)
14737         avoid_fence = 1;
14738       else if (strcasecmp (arg, "no") == 0)
14739         avoid_fence = 0;
14740       else
14741         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
14742       break;
14743
14744     case OPTION_MLFENCE_AFTER_LOAD:
14745       if (strcasecmp (arg, "yes") == 0)
14746         lfence_after_load = 1;
14747       else if (strcasecmp (arg, "no") == 0)
14748         lfence_after_load = 0;
14749       else
14750         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
14751       break;
14752
14753     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
14754       if (strcasecmp (arg, "all") == 0)
14755         {
14756           lfence_before_indirect_branch = lfence_branch_all;
14757           if (lfence_before_ret == lfence_before_ret_none)
14758             lfence_before_ret = lfence_before_ret_shl;
14759         }
14760       else if (strcasecmp (arg, "memory") == 0)
14761         lfence_before_indirect_branch = lfence_branch_memory;
14762       else if (strcasecmp (arg, "register") == 0)
14763         lfence_before_indirect_branch = lfence_branch_register;
14764       else if (strcasecmp (arg, "none") == 0)
14765         lfence_before_indirect_branch = lfence_branch_none;
14766       else
14767         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
14768                   arg);
14769       break;
14770
14771     case OPTION_MLFENCE_BEFORE_RET:
14772       if (strcasecmp (arg, "or") == 0)
14773         lfence_before_ret = lfence_before_ret_or;
14774       else if (strcasecmp (arg, "not") == 0)
14775         lfence_before_ret = lfence_before_ret_not;
14776       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
14777         lfence_before_ret = lfence_before_ret_shl;
14778       else if (strcasecmp (arg, "none") == 0)
14779         lfence_before_ret = lfence_before_ret_none;
14780       else
14781         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
14782                   arg);
14783       break;
14784
14785     case OPTION_MRELAX_RELOCATIONS:
14786       if (strcasecmp (arg, "yes") == 0)
14787         generate_relax_relocations = 1;
14788       else if (strcasecmp (arg, "no") == 0)
14789         generate_relax_relocations = 0;
14790       else
14791         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
14792       break;
14793
14794     case OPTION_MALIGN_BRANCH_BOUNDARY:
14795       {
14796         char *end;
14797         long int align = strtoul (arg, &end, 0);
14798         if (*end == '\0')
14799           {
14800             if (align == 0)
14801               {
14802                 align_branch_power = 0;
14803                 break;
14804               }
14805             else if (align >= 16)
14806               {
14807                 int align_power;
14808                 for (align_power = 0;
14809                      (align & 1) == 0;
14810                      align >>= 1, align_power++)
14811                   continue;
14812                 /* Limit alignment power to 31.  */
14813                 if (align == 1 && align_power < 32)
14814                   {
14815                     align_branch_power = align_power;
14816                     break;
14817                   }
14818               }
14819           }
14820         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
14821       }
14822       break;
14823
14824     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
14825       {
14826         char *end;
14827         int align = strtoul (arg, &end, 0);
14828         /* Some processors only support 5 prefixes.  */
14829         if (*end == '\0' && align >= 0 && align < 6)
14830           {
14831             align_branch_prefix_size = align;
14832             break;
14833           }
14834         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
14835                   arg);
14836       }
14837       break;
14838
14839     case OPTION_MALIGN_BRANCH:
14840       align_branch = 0;
14841       saved = xstrdup (arg);
14842       type = saved;
14843       do
14844         {
14845           next = strchr (type, '+');
14846           if (next)
14847             *next++ = '\0';
14848           if (strcasecmp (type, "jcc") == 0)
14849             align_branch |= align_branch_jcc_bit;
14850           else if (strcasecmp (type, "fused") == 0)
14851             align_branch |= align_branch_fused_bit;
14852           else if (strcasecmp (type, "jmp") == 0)
14853             align_branch |= align_branch_jmp_bit;
14854           else if (strcasecmp (type, "call") == 0)
14855             align_branch |= align_branch_call_bit;
14856           else if (strcasecmp (type, "ret") == 0)
14857             align_branch |= align_branch_ret_bit;
14858           else if (strcasecmp (type, "indirect") == 0)
14859             align_branch |= align_branch_indirect_bit;
14860           else
14861             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
14862           type = next;
14863         }
14864       while (next != NULL);
14865       free (saved);
14866       break;
14867
14868     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
14869       align_branch_power = 5;
14870       align_branch_prefix_size = 5;
14871       align_branch = (align_branch_jcc_bit
14872                       | align_branch_fused_bit
14873                       | align_branch_jmp_bit);
14874       break;
14875
14876     case OPTION_MAMD64:
14877       isa64 = amd64;
14878       break;
14879
14880     case OPTION_MINTEL64:
14881       isa64 = intel64;
14882       break;
14883
14884     case 'O':
14885       if (arg == NULL)
14886         {
14887           optimize = 1;
14888           /* Turn off -Os.  */
14889           optimize_for_space = 0;
14890         }
14891       else if (*arg == 's')
14892         {
14893           optimize_for_space = 1;
14894           /* Turn on all encoding optimizations.  */
14895           optimize = INT_MAX;
14896         }
14897       else
14898         {
14899           optimize = atoi (arg);
14900           /* Turn off -Os.  */
14901           optimize_for_space = 0;
14902         }
14903       break;
14904
14905     default:
14906       return 0;
14907     }
14908   return 1;
14909 }
14910
14911 #define MESSAGE_TEMPLATE \
14912 "                                                                                "
14913
14914 static char *
14915 output_message (FILE *stream, char *p, char *message, char *start,
14916                 int *left_p, const char *name, int len)
14917 {
14918   int size = sizeof (MESSAGE_TEMPLATE);
14919   int left = *left_p;
14920
14921   /* Reserve 2 spaces for ", " or ",\0" */
14922   left -= len + 2;
14923
14924   /* Check if there is any room.  */
14925   if (left >= 0)
14926     {
14927       if (p != start)
14928         {
14929           *p++ = ',';
14930           *p++ = ' ';
14931         }
14932       p = mempcpy (p, name, len);
14933     }
14934   else
14935     {
14936       /* Output the current message now and start a new one.  */
14937       *p++ = ',';
14938       *p = '\0';
14939       fprintf (stream, "%s\n", message);
14940       p = start;
14941       left = size - (start - message) - len - 2;
14942
14943       gas_assert (left >= 0);
14944
14945       p = mempcpy (p, name, len);
14946     }
14947
14948   *left_p = left;
14949   return p;
14950 }
14951
14952 static void
14953 show_arch (FILE *stream, int ext, int check)
14954 {
14955   static char message[] = MESSAGE_TEMPLATE;
14956   char *start = message + 27;
14957   char *p;
14958   int size = sizeof (MESSAGE_TEMPLATE);
14959   int left;
14960   const char *name;
14961   int len;
14962   unsigned int j;
14963
14964   p = start;
14965   left = size - (start - message);
14966
14967   if (!ext && check)
14968     {
14969       p = output_message (stream, p, message, start, &left,
14970                           STRING_COMMA_LEN ("default"));
14971       p = output_message (stream, p, message, start, &left,
14972                           STRING_COMMA_LEN ("push"));
14973       p = output_message (stream, p, message, start, &left,
14974                           STRING_COMMA_LEN ("pop"));
14975     }
14976
14977   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14978     {
14979       /* Should it be skipped?  */
14980       if (cpu_arch [j].skip)
14981         continue;
14982
14983       name = cpu_arch [j].name;
14984       len = cpu_arch [j].len;
14985       if (cpu_arch[j].type == PROCESSOR_NONE)
14986         {
14987           /* It is an extension.  Skip if we aren't asked to show it.  */
14988           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
14989             continue;
14990         }
14991       else if (ext)
14992         {
14993           /* It is an processor.  Skip if we show only extension.  */
14994           continue;
14995         }
14996       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
14997         {
14998           /* It is an impossible processor - skip.  */
14999           continue;
15000         }
15001
15002       p = output_message (stream, p, message, start, &left, name, len);
15003     }
15004
15005   /* Display disabled extensions.  */
15006   if (ext)
15007     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
15008       {
15009         char *str;
15010
15011         if (cpu_arch[j].type != PROCESSOR_NONE
15012             || !cpu_flags_all_zero (&cpu_arch[j].enable))
15013           continue;
15014         str = xasprintf ("no%s", cpu_arch[j].name);
15015         p = output_message (stream, p, message, start, &left, str,
15016                             strlen (str));
15017         free (str);
15018       }
15019
15020   *p = '\0';
15021   fprintf (stream, "%s\n", message);
15022 }
15023
15024 void
15025 md_show_usage (FILE *stream)
15026 {
15027 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15028   fprintf (stream, _("\
15029   -Qy, -Qn                ignored\n\
15030   -V                      print assembler version number\n\
15031   -k                      ignored\n"));
15032 #endif
15033   fprintf (stream, _("\
15034   -n                      do not optimize code alignment\n\
15035   -O{012s}                attempt some code optimizations\n\
15036   -q                      quieten some warnings\n"));
15037 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15038   fprintf (stream, _("\
15039   -s                      ignored\n"));
15040 #endif
15041 #ifdef BFD64
15042 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15043   fprintf (stream, _("\
15044   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
15045 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
15046   fprintf (stream, _("\
15047   --32/--64               generate 32bit/64bit object\n"));
15048 # endif
15049 #endif
15050 #ifdef SVR4_COMMENT_CHARS
15051   fprintf (stream, _("\
15052   --divide                do not treat `/' as a comment character\n"));
15053 #else
15054   fprintf (stream, _("\
15055   --divide                ignored\n"));
15056 #endif
15057   fprintf (stream, _("\
15058   -march=CPU[,+EXTENSION...]\n\
15059                           generate code for CPU and EXTENSION, CPU is one of:\n"));
15060   show_arch (stream, 0, 1);
15061   fprintf (stream, _("\
15062                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
15063   show_arch (stream, 1, 0);
15064   fprintf (stream, _("\
15065   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
15066   show_arch (stream, 0, 0);
15067   fprintf (stream, _("\
15068   -msse2avx               encode SSE instructions with VEX prefix\n"));
15069   fprintf (stream, _("\
15070   -muse-unaligned-vector-move\n\
15071                           encode aligned vector move as unaligned vector move\n"));
15072   fprintf (stream, _("\
15073   -msse-check=[none|error|warning] (default: warning)\n\
15074                           check SSE instructions\n"));
15075   fprintf (stream, _("\
15076   -moperand-check=[none|error|warning] (default: warning)\n\
15077                           check operand combinations for validity\n"));
15078   fprintf (stream, _("\
15079   -mavxscalar=[128|256] (default: 128)\n\
15080                           encode scalar AVX instructions with specific vector\n\
15081                            length\n"));
15082   fprintf (stream, _("\
15083   -mvexwig=[0|1] (default: 0)\n\
15084                           encode VEX instructions with specific VEX.W value\n\
15085                            for VEX.W bit ignored instructions\n"));
15086   fprintf (stream, _("\
15087   -mevexlig=[128|256|512] (default: 128)\n\
15088                           encode scalar EVEX instructions with specific vector\n\
15089                            length\n"));
15090   fprintf (stream, _("\
15091   -mevexwig=[0|1] (default: 0)\n\
15092                           encode EVEX instructions with specific EVEX.W value\n\
15093                            for EVEX.W bit ignored instructions\n"));
15094   fprintf (stream, _("\
15095   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
15096                           encode EVEX instructions with specific EVEX.RC value\n\
15097                            for SAE-only ignored instructions\n"));
15098   fprintf (stream, _("\
15099   -mmnemonic=[att|intel] "));
15100   if (SYSV386_COMPAT)
15101     fprintf (stream, _("(default: att)\n"));
15102   else
15103     fprintf (stream, _("(default: intel)\n"));
15104   fprintf (stream, _("\
15105                           use AT&T/Intel mnemonic\n"));
15106   fprintf (stream, _("\
15107   -msyntax=[att|intel] (default: att)\n\
15108                           use AT&T/Intel syntax\n"));
15109   fprintf (stream, _("\
15110   -mindex-reg             support pseudo index registers\n"));
15111   fprintf (stream, _("\
15112   -mnaked-reg             don't require `%%' prefix for registers\n"));
15113   fprintf (stream, _("\
15114   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
15115 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15116   fprintf (stream, _("\
15117   -mshared                disable branch optimization for shared code\n"));
15118   fprintf (stream, _("\
15119   -mx86-used-note=[no|yes] "));
15120   if (DEFAULT_X86_USED_NOTE)
15121     fprintf (stream, _("(default: yes)\n"));
15122   else
15123     fprintf (stream, _("(default: no)\n"));
15124   fprintf (stream, _("\
15125                           generate x86 used ISA and feature properties\n"));
15126 #endif
15127 #if defined (TE_PE) || defined (TE_PEP)
15128   fprintf (stream, _("\
15129   -mbig-obj               generate big object files\n"));
15130 #endif
15131   fprintf (stream, _("\
15132   -momit-lock-prefix=[no|yes] (default: no)\n\
15133                           strip all lock prefixes\n"));
15134   fprintf (stream, _("\
15135   -mfence-as-lock-add=[no|yes] (default: no)\n\
15136                           encode lfence, mfence and sfence as\n\
15137                            lock addl $0x0, (%%{re}sp)\n"));
15138   fprintf (stream, _("\
15139   -mrelax-relocations=[no|yes] "));
15140   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
15141     fprintf (stream, _("(default: yes)\n"));
15142   else
15143     fprintf (stream, _("(default: no)\n"));
15144   fprintf (stream, _("\
15145                           generate relax relocations\n"));
15146   fprintf (stream, _("\
15147   -malign-branch-boundary=NUM (default: 0)\n\
15148                           align branches within NUM byte boundary\n"));
15149   fprintf (stream, _("\
15150   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
15151                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
15152                            indirect\n\
15153                           specify types of branches to align\n"));
15154   fprintf (stream, _("\
15155   -malign-branch-prefix-size=NUM (default: 5)\n\
15156                           align branches with NUM prefixes per instruction\n"));
15157   fprintf (stream, _("\
15158   -mbranches-within-32B-boundaries\n\
15159                           align branches within 32 byte boundary\n"));
15160   fprintf (stream, _("\
15161   -mlfence-after-load=[no|yes] (default: no)\n\
15162                           generate lfence after load\n"));
15163   fprintf (stream, _("\
15164   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
15165                           generate lfence before indirect near branch\n"));
15166   fprintf (stream, _("\
15167   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
15168                           generate lfence before ret\n"));
15169   fprintf (stream, _("\
15170   -mamd64                 accept only AMD64 ISA [default]\n"));
15171   fprintf (stream, _("\
15172   -mintel64               accept only Intel64 ISA\n"));
15173 }
15174
15175 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
15176      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
15177      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
15178
15179 /* Pick the target format to use.  */
15180
15181 const char *
15182 i386_target_format (void)
15183 {
15184   if (startswith (default_arch, "x86_64"))
15185     {
15186       update_code_flag (CODE_64BIT, 1);
15187       if (default_arch[6] == '\0')
15188         x86_elf_abi = X86_64_ABI;
15189       else
15190         x86_elf_abi = X86_64_X32_ABI;
15191     }
15192   else if (!strcmp (default_arch, "i386"))
15193     update_code_flag (CODE_32BIT, 1);
15194   else if (!strcmp (default_arch, "iamcu"))
15195     {
15196       update_code_flag (CODE_32BIT, 1);
15197       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
15198         {
15199           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
15200           cpu_arch_name = "iamcu";
15201           free (cpu_sub_arch_name);
15202           cpu_sub_arch_name = NULL;
15203           cpu_arch_flags = iamcu_flags;
15204           cpu_arch_isa = PROCESSOR_IAMCU;
15205           cpu_arch_isa_flags = iamcu_flags;
15206           if (!cpu_arch_tune_set)
15207             {
15208               cpu_arch_tune = cpu_arch_isa;
15209               cpu_arch_tune_flags = cpu_arch_isa_flags;
15210             }
15211         }
15212       else if (cpu_arch_isa != PROCESSOR_IAMCU)
15213         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
15214                   cpu_arch_name);
15215     }
15216   else
15217     as_fatal (_("unknown architecture"));
15218
15219   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
15220     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
15221   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
15222     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
15223
15224   switch (OUTPUT_FLAVOR)
15225     {
15226 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
15227     case bfd_target_aout_flavour:
15228       return AOUT_TARGET_FORMAT;
15229 #endif
15230 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
15231 # if defined (TE_PE) || defined (TE_PEP)
15232     case bfd_target_coff_flavour:
15233       if (flag_code == CODE_64BIT)
15234         {
15235           object_64bit = 1;
15236           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
15237         }
15238       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
15239 # elif defined (TE_GO32)
15240     case bfd_target_coff_flavour:
15241       return "coff-go32";
15242 # else
15243     case bfd_target_coff_flavour:
15244       return "coff-i386";
15245 # endif
15246 #endif
15247 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
15248     case bfd_target_elf_flavour:
15249       {
15250         const char *format;
15251
15252         switch (x86_elf_abi)
15253           {
15254           default:
15255             format = ELF_TARGET_FORMAT;
15256 #ifndef TE_SOLARIS
15257             tls_get_addr = "___tls_get_addr";
15258 #endif
15259             break;
15260           case X86_64_ABI:
15261             use_rela_relocations = 1;
15262             object_64bit = 1;
15263 #ifndef TE_SOLARIS
15264             tls_get_addr = "__tls_get_addr";
15265 #endif
15266             format = ELF_TARGET_FORMAT64;
15267             break;
15268           case X86_64_X32_ABI:
15269             use_rela_relocations = 1;
15270             object_64bit = 1;
15271 #ifndef TE_SOLARIS
15272             tls_get_addr = "__tls_get_addr";
15273 #endif
15274             disallow_64bit_reloc = 1;
15275             format = ELF_TARGET_FORMAT32;
15276             break;
15277           }
15278         if (cpu_arch_isa == PROCESSOR_IAMCU)
15279           {
15280             if (x86_elf_abi != I386_ABI)
15281               as_fatal (_("Intel MCU is 32bit only"));
15282             return ELF_TARGET_IAMCU_FORMAT;
15283           }
15284         else
15285           return format;
15286       }
15287 #endif
15288 #if defined (OBJ_MACH_O)
15289     case bfd_target_mach_o_flavour:
15290       if (flag_code == CODE_64BIT)
15291         {
15292           use_rela_relocations = 1;
15293           object_64bit = 1;
15294           return "mach-o-x86-64";
15295         }
15296       else
15297         return "mach-o-i386";
15298 #endif
15299     default:
15300       abort ();
15301       return NULL;
15302     }
15303 }
15304
15305 #endif /* OBJ_MAYBE_ more than one  */
15306 \f
15307 symbolS *
15308 md_undefined_symbol (char *name)
15309 {
15310   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
15311       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
15312       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
15313       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
15314     {
15315       if (!GOT_symbol)
15316         {
15317           if (symbol_find (name))
15318             as_bad (_("GOT already in symbol table"));
15319           GOT_symbol = symbol_new (name, undefined_section,
15320                                    &zero_address_frag, 0);
15321         };
15322       return GOT_symbol;
15323     }
15324   return 0;
15325 }
15326
15327 /* Round up a section size to the appropriate boundary.  */
15328
15329 valueT
15330 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
15331 {
15332 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
15333   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
15334     {
15335       /* For a.out, force the section size to be aligned.  If we don't do
15336          this, BFD will align it for us, but it will not write out the
15337          final bytes of the section.  This may be a bug in BFD, but it is
15338          easier to fix it here since that is how the other a.out targets
15339          work.  */
15340       int align;
15341
15342       align = bfd_section_alignment (segment);
15343       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
15344     }
15345 #endif
15346
15347   return size;
15348 }
15349
15350 /* On the i386, PC-relative offsets are relative to the start of the
15351    next instruction.  That is, the address of the offset, plus its
15352    size, since the offset is always the last part of the insn.  */
15353
15354 long
15355 md_pcrel_from (fixS *fixP)
15356 {
15357   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
15358 }
15359
15360 #ifndef I386COFF
15361
15362 static void
15363 s_bss (int ignore ATTRIBUTE_UNUSED)
15364 {
15365   int temp;
15366
15367 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15368   if (IS_ELF)
15369     obj_elf_section_change_hook ();
15370 #endif
15371   temp = get_absolute_expression ();
15372   subseg_set (bss_section, (subsegT) temp);
15373   demand_empty_rest_of_line ();
15374 }
15375
15376 #endif
15377
15378 /* Remember constant directive.  */
15379
15380 void
15381 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
15382 {
15383   if (last_insn.kind != last_insn_directive
15384       && (bfd_section_flags (now_seg) & SEC_CODE))
15385     {
15386       last_insn.seg = now_seg;
15387       last_insn.kind = last_insn_directive;
15388       last_insn.name = "constant directive";
15389       last_insn.file = as_where (&last_insn.line);
15390       if (lfence_before_ret != lfence_before_ret_none)
15391         {
15392           if (lfence_before_indirect_branch != lfence_branch_none)
15393             as_warn (_("constant directive skips -mlfence-before-ret "
15394                        "and -mlfence-before-indirect-branch"));
15395           else
15396             as_warn (_("constant directive skips -mlfence-before-ret"));
15397         }
15398       else if (lfence_before_indirect_branch != lfence_branch_none)
15399         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
15400     }
15401 }
15402
15403 int
15404 i386_validate_fix (fixS *fixp)
15405 {
15406   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
15407     {
15408       reloc_howto_type *howto;
15409
15410       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
15411       as_bad_where (fixp->fx_file, fixp->fx_line,
15412                     _("invalid %s relocation against register"),
15413                     howto ? howto->name : "<unknown>");
15414       return 0;
15415     }
15416
15417 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15418   if (fixp->fx_r_type == BFD_RELOC_SIZE32
15419       || fixp->fx_r_type == BFD_RELOC_SIZE64)
15420     return IS_ELF && fixp->fx_addsy
15421            && (!S_IS_DEFINED (fixp->fx_addsy)
15422                || S_IS_EXTERNAL (fixp->fx_addsy));
15423 #endif
15424
15425   if (fixp->fx_subsy)
15426     {
15427       if (fixp->fx_subsy == GOT_symbol)
15428         {
15429           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
15430             {
15431               if (!object_64bit)
15432                 abort ();
15433 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15434               if (fixp->fx_tcbit2)
15435                 fixp->fx_r_type = (fixp->fx_tcbit
15436                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
15437                                    : BFD_RELOC_X86_64_GOTPCRELX);
15438               else
15439 #endif
15440                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
15441             }
15442           else
15443             {
15444               if (!object_64bit)
15445                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
15446               else
15447                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
15448             }
15449           fixp->fx_subsy = 0;
15450         }
15451     }
15452 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15453   else
15454     {
15455       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
15456          to section.  Since PLT32 relocation must be against symbols,
15457          turn such PLT32 relocation into PC32 relocation.  */
15458       if (fixp->fx_addsy
15459           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
15460               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
15461           && symbol_section_p (fixp->fx_addsy))
15462         fixp->fx_r_type = BFD_RELOC_32_PCREL;
15463       if (!object_64bit)
15464         {
15465           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
15466               && fixp->fx_tcbit2)
15467             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
15468         }
15469     }
15470 #endif
15471
15472   return 1;
15473 }
15474
15475 arelent *
15476 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
15477 {
15478   arelent *rel;
15479   bfd_reloc_code_real_type code;
15480
15481   switch (fixp->fx_r_type)
15482     {
15483 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15484       symbolS *sym;
15485
15486     case BFD_RELOC_SIZE32:
15487     case BFD_RELOC_SIZE64:
15488       if (fixp->fx_addsy
15489           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
15490           && (!fixp->fx_subsy
15491               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
15492         sym = fixp->fx_addsy;
15493       else if (fixp->fx_subsy
15494                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
15495                && (!fixp->fx_addsy
15496                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
15497         sym = fixp->fx_subsy;
15498       else
15499         sym = NULL;
15500       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
15501         {
15502           /* Resolve size relocation against local symbol to size of
15503              the symbol plus addend.  */
15504           valueT value = S_GET_SIZE (sym);
15505
15506           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
15507             value = bfd_section_size (S_GET_SEGMENT (sym));
15508           if (sym == fixp->fx_subsy)
15509             {
15510               value = -value;
15511               if (fixp->fx_addsy)
15512                 value += S_GET_VALUE (fixp->fx_addsy);
15513             }
15514           else if (fixp->fx_subsy)
15515             value -= S_GET_VALUE (fixp->fx_subsy);
15516           value += fixp->fx_offset;
15517           if (fixp->fx_r_type == BFD_RELOC_SIZE32
15518               && object_64bit
15519               && !fits_in_unsigned_long (value))
15520             as_bad_where (fixp->fx_file, fixp->fx_line,
15521                           _("symbol size computation overflow"));
15522           fixp->fx_addsy = NULL;
15523           fixp->fx_subsy = NULL;
15524           md_apply_fix (fixp, (valueT *) &value, NULL);
15525           return NULL;
15526         }
15527       if (!fixp->fx_addsy || fixp->fx_subsy)
15528         {
15529           as_bad_where (fixp->fx_file, fixp->fx_line,
15530                         "unsupported expression involving @size");
15531           return NULL;
15532         }
15533 #endif
15534       /* Fall through.  */
15535
15536     case BFD_RELOC_X86_64_PLT32:
15537     case BFD_RELOC_X86_64_GOT32:
15538     case BFD_RELOC_X86_64_GOTPCREL:
15539     case BFD_RELOC_X86_64_GOTPCRELX:
15540     case BFD_RELOC_X86_64_REX_GOTPCRELX:
15541     case BFD_RELOC_386_PLT32:
15542     case BFD_RELOC_386_GOT32:
15543     case BFD_RELOC_386_GOT32X:
15544     case BFD_RELOC_386_GOTOFF:
15545     case BFD_RELOC_386_GOTPC:
15546     case BFD_RELOC_386_TLS_GD:
15547     case BFD_RELOC_386_TLS_LDM:
15548     case BFD_RELOC_386_TLS_LDO_32:
15549     case BFD_RELOC_386_TLS_IE_32:
15550     case BFD_RELOC_386_TLS_IE:
15551     case BFD_RELOC_386_TLS_GOTIE:
15552     case BFD_RELOC_386_TLS_LE_32:
15553     case BFD_RELOC_386_TLS_LE:
15554     case BFD_RELOC_386_TLS_GOTDESC:
15555     case BFD_RELOC_386_TLS_DESC_CALL:
15556     case BFD_RELOC_X86_64_TLSGD:
15557     case BFD_RELOC_X86_64_TLSLD:
15558     case BFD_RELOC_X86_64_DTPOFF32:
15559     case BFD_RELOC_X86_64_DTPOFF64:
15560     case BFD_RELOC_X86_64_GOTTPOFF:
15561     case BFD_RELOC_X86_64_TPOFF32:
15562     case BFD_RELOC_X86_64_TPOFF64:
15563     case BFD_RELOC_X86_64_GOTOFF64:
15564     case BFD_RELOC_X86_64_GOTPC32:
15565     case BFD_RELOC_X86_64_GOT64:
15566     case BFD_RELOC_X86_64_GOTPCREL64:
15567     case BFD_RELOC_X86_64_GOTPC64:
15568     case BFD_RELOC_X86_64_GOTPLT64:
15569     case BFD_RELOC_X86_64_PLTOFF64:
15570     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15571     case BFD_RELOC_X86_64_TLSDESC_CALL:
15572     case BFD_RELOC_RVA:
15573     case BFD_RELOC_VTABLE_ENTRY:
15574     case BFD_RELOC_VTABLE_INHERIT:
15575 #ifdef TE_PE
15576     case BFD_RELOC_32_SECREL:
15577     case BFD_RELOC_16_SECIDX:
15578 #endif
15579       code = fixp->fx_r_type;
15580       break;
15581     case BFD_RELOC_X86_64_32S:
15582       if (!fixp->fx_pcrel)
15583         {
15584           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
15585           code = fixp->fx_r_type;
15586           break;
15587         }
15588       /* Fall through.  */
15589     default:
15590       if (fixp->fx_pcrel)
15591         {
15592           switch (fixp->fx_size)
15593             {
15594             default:
15595               as_bad_where (fixp->fx_file, fixp->fx_line,
15596                             _("can not do %d byte pc-relative relocation"),
15597                             fixp->fx_size);
15598               code = BFD_RELOC_32_PCREL;
15599               break;
15600             case 1: code = BFD_RELOC_8_PCREL;  break;
15601             case 2: code = BFD_RELOC_16_PCREL; break;
15602             case 4: code = BFD_RELOC_32_PCREL; break;
15603 #ifdef BFD64
15604             case 8: code = BFD_RELOC_64_PCREL; break;
15605 #endif
15606             }
15607         }
15608       else
15609         {
15610           switch (fixp->fx_size)
15611             {
15612             default:
15613               as_bad_where (fixp->fx_file, fixp->fx_line,
15614                             _("can not do %d byte relocation"),
15615                             fixp->fx_size);
15616               code = BFD_RELOC_32;
15617               break;
15618             case 1: code = BFD_RELOC_8;  break;
15619             case 2: code = BFD_RELOC_16; break;
15620             case 4: code = BFD_RELOC_32; break;
15621 #ifdef BFD64
15622             case 8: code = BFD_RELOC_64; break;
15623 #endif
15624             }
15625         }
15626       break;
15627     }
15628
15629   if ((code == BFD_RELOC_32
15630        || code == BFD_RELOC_32_PCREL
15631        || code == BFD_RELOC_X86_64_32S)
15632       && GOT_symbol
15633       && fixp->fx_addsy == GOT_symbol)
15634     {
15635       if (!object_64bit)
15636         code = BFD_RELOC_386_GOTPC;
15637       else
15638         code = BFD_RELOC_X86_64_GOTPC32;
15639     }
15640   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
15641       && GOT_symbol
15642       && fixp->fx_addsy == GOT_symbol)
15643     {
15644       code = BFD_RELOC_X86_64_GOTPC64;
15645     }
15646
15647   rel = XNEW (arelent);
15648   rel->sym_ptr_ptr = XNEW (asymbol *);
15649   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
15650
15651   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
15652
15653   if (!use_rela_relocations)
15654     {
15655       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
15656          vtable entry to be used in the relocation's section offset.  */
15657       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
15658         rel->address = fixp->fx_offset;
15659 #if defined (OBJ_COFF) && defined (TE_PE)
15660       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
15661         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
15662       else
15663 #endif
15664       rel->addend = 0;
15665     }
15666   /* Use the rela in 64bit mode.  */
15667   else
15668     {
15669       if (disallow_64bit_reloc)
15670         switch (code)
15671           {
15672           case BFD_RELOC_X86_64_DTPOFF64:
15673           case BFD_RELOC_X86_64_TPOFF64:
15674           case BFD_RELOC_64_PCREL:
15675           case BFD_RELOC_X86_64_GOTOFF64:
15676           case BFD_RELOC_X86_64_GOT64:
15677           case BFD_RELOC_X86_64_GOTPCREL64:
15678           case BFD_RELOC_X86_64_GOTPC64:
15679           case BFD_RELOC_X86_64_GOTPLT64:
15680           case BFD_RELOC_X86_64_PLTOFF64:
15681             as_bad_where (fixp->fx_file, fixp->fx_line,
15682                           _("cannot represent relocation type %s in x32 mode"),
15683                           bfd_get_reloc_code_name (code));
15684             break;
15685           default:
15686             break;
15687           }
15688
15689       if (!fixp->fx_pcrel)
15690         rel->addend = fixp->fx_offset;
15691       else
15692         switch (code)
15693           {
15694           case BFD_RELOC_X86_64_PLT32:
15695           case BFD_RELOC_X86_64_GOT32:
15696           case BFD_RELOC_X86_64_GOTPCREL:
15697           case BFD_RELOC_X86_64_GOTPCRELX:
15698           case BFD_RELOC_X86_64_REX_GOTPCRELX:
15699           case BFD_RELOC_X86_64_TLSGD:
15700           case BFD_RELOC_X86_64_TLSLD:
15701           case BFD_RELOC_X86_64_GOTTPOFF:
15702           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15703           case BFD_RELOC_X86_64_TLSDESC_CALL:
15704             rel->addend = fixp->fx_offset - fixp->fx_size;
15705             break;
15706           default:
15707             rel->addend = (section->vma
15708                            - fixp->fx_size
15709                            + fixp->fx_addnumber
15710                            + md_pcrel_from (fixp));
15711             break;
15712           }
15713     }
15714
15715   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
15716   if (rel->howto == NULL)
15717     {
15718       as_bad_where (fixp->fx_file, fixp->fx_line,
15719                     _("cannot represent relocation type %s"),
15720                     bfd_get_reloc_code_name (code));
15721       /* Set howto to a garbage value so that we can keep going.  */
15722       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
15723       gas_assert (rel->howto != NULL);
15724     }
15725
15726   return rel;
15727 }
15728
15729 #include "tc-i386-intel.c"
15730
15731 void
15732 tc_x86_parse_to_dw2regnum (expressionS *exp)
15733 {
15734   int saved_naked_reg;
15735   char saved_register_dot;
15736
15737   saved_naked_reg = allow_naked_reg;
15738   allow_naked_reg = 1;
15739   saved_register_dot = register_chars['.'];
15740   register_chars['.'] = '.';
15741   allow_pseudo_reg = 1;
15742   expression_and_evaluate (exp);
15743   allow_pseudo_reg = 0;
15744   register_chars['.'] = saved_register_dot;
15745   allow_naked_reg = saved_naked_reg;
15746
15747   if (exp->X_op == O_register && exp->X_add_number >= 0)
15748     {
15749       if ((addressT) exp->X_add_number < i386_regtab_size)
15750         {
15751           exp->X_op = O_constant;
15752           exp->X_add_number = i386_regtab[exp->X_add_number]
15753                               .dw2_regnum[flag_code >> 1];
15754         }
15755       else
15756         exp->X_op = O_illegal;
15757     }
15758 }
15759
15760 void
15761 tc_x86_frame_initial_instructions (void)
15762 {
15763   static unsigned int sp_regno[2];
15764
15765   if (!sp_regno[flag_code >> 1])
15766     {
15767       char *saved_input = input_line_pointer;
15768       char sp[][4] = {"esp", "rsp"};
15769       expressionS exp;
15770
15771       input_line_pointer = sp[flag_code >> 1];
15772       tc_x86_parse_to_dw2regnum (&exp);
15773       gas_assert (exp.X_op == O_constant);
15774       sp_regno[flag_code >> 1] = exp.X_add_number;
15775       input_line_pointer = saved_input;
15776     }
15777
15778   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
15779   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
15780 }
15781
15782 int
15783 x86_dwarf2_addr_size (void)
15784 {
15785 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
15786   if (x86_elf_abi == X86_64_X32_ABI)
15787     return 4;
15788 #endif
15789   return bfd_arch_bits_per_address (stdoutput) / 8;
15790 }
15791
15792 int
15793 i386_elf_section_type (const char *str, size_t len)
15794 {
15795   if (flag_code == CODE_64BIT
15796       && len == sizeof ("unwind") - 1
15797       && startswith (str, "unwind"))
15798     return SHT_X86_64_UNWIND;
15799
15800   return -1;
15801 }
15802
15803 #ifdef TE_SOLARIS
15804 void
15805 i386_solaris_fix_up_eh_frame (segT sec)
15806 {
15807   if (flag_code == CODE_64BIT)
15808     elf_section_type (sec) = SHT_X86_64_UNWIND;
15809 }
15810 #endif
15811
15812 #ifdef TE_PE
15813 void
15814 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
15815 {
15816   expressionS exp;
15817
15818   exp.X_op = O_secrel;
15819   exp.X_add_symbol = symbol;
15820   exp.X_add_number = 0;
15821   emit_expr (&exp, size);
15822 }
15823 #endif
15824
15825 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15826 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
15827
15828 bfd_vma
15829 x86_64_section_letter (int letter, const char **ptr_msg)
15830 {
15831   if (flag_code == CODE_64BIT)
15832     {
15833       if (letter == 'l')
15834         return SHF_X86_64_LARGE;
15835
15836       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
15837     }
15838   else
15839     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
15840   return -1;
15841 }
15842
15843 static void
15844 handle_large_common (int small ATTRIBUTE_UNUSED)
15845 {
15846   if (flag_code != CODE_64BIT)
15847     {
15848       s_comm_internal (0, elf_common_parse);
15849       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
15850     }
15851   else
15852     {
15853       static segT lbss_section;
15854       asection *saved_com_section_ptr = elf_com_section_ptr;
15855       asection *saved_bss_section = bss_section;
15856
15857       if (lbss_section == NULL)
15858         {
15859           flagword applicable;
15860           segT seg = now_seg;
15861           subsegT subseg = now_subseg;
15862
15863           /* The .lbss section is for local .largecomm symbols.  */
15864           lbss_section = subseg_new (".lbss", 0);
15865           applicable = bfd_applicable_section_flags (stdoutput);
15866           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
15867           seg_info (lbss_section)->bss = 1;
15868
15869           subseg_set (seg, subseg);
15870         }
15871
15872       elf_com_section_ptr = &_bfd_elf_large_com_section;
15873       bss_section = lbss_section;
15874
15875       s_comm_internal (0, elf_common_parse);
15876
15877       elf_com_section_ptr = saved_com_section_ptr;
15878       bss_section = saved_bss_section;
15879     }
15880 }
15881 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */