gas/config/tc-i386.c

   1 /* tc-i386.c -- Assemble code for the Intel 80386
   2    Copyright (C) 1989-2023 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to the Free
  18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
  19    02110-1301, USA.  */
  20
  21 /* Intel 80386 machine specific gas.
  22    Written by Eliot Dresselhaus (eliot@mgm.mit.edu).
  23    x86_64 support by Jan Hubicka (jh@suse.cz)
  24    VIA PadLock support by Michal Ludvig (mludvig@suse.cz)
  25    Bugs & suggestions are completely welcome.  This is free software.
  26    Please help us make it better.  */
  27
  28 #include "as.h"
  29 #include "safe-ctype.h"
  30 #include "subsegs.h"
  31 #include "dwarf2dbg.h"
  32 #include "dw2gencfi.h"
  33 #include "gen-sframe.h"
  34 #include "sframe.h"
  35 #include "elf/x86-64.h"
  36 #include "opcodes/i386-init.h"
  37 #include "opcodes/i386-mnem.h"
  38 #include <limits.h>
  39
  40 #ifndef INFER_ADDR_PREFIX
  41 #define INFER_ADDR_PREFIX 1
  42 #endif
  43
  44 #ifndef DEFAULT_ARCH
  45 #define DEFAULT_ARCH "i386"
  46 #endif
  47
  48 #ifndef INLINE
  49 #if __GNUC__ >= 2
  50 #define INLINE __inline__
  51 #else
  52 #define INLINE
  53 #endif
  54 #endif
  55
  56 /* Prefixes will be emitted in the order defined below.
  57    WAIT_PREFIX must be the first prefix since FWAIT is really is an
  58    instruction, and so must come before any prefixes.
  59    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
  60    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
  61 #define WAIT_PREFIX     0
  62 #define SEG_PREFIX      1
  63 #define ADDR_PREFIX     2
  64 #define DATA_PREFIX     3
  65 #define REP_PREFIX      4
  66 #define HLE_PREFIX      REP_PREFIX
  67 #define BND_PREFIX      REP_PREFIX
  68 #define LOCK_PREFIX     5
  69 #define REX_PREFIX      6       /* must come last.  */
  70 #define MAX_PREFIXES    7       /* max prefixes per opcode */
  71
  72 /* we define the syntax here (modulo base,index,scale syntax) */
  73 #define REGISTER_PREFIX '%'
  74 #define IMMEDIATE_PREFIX '$'
  75 #define ABSOLUTE_PREFIX '*'
  76
  77 /* these are the instruction mnemonic suffixes in AT&T syntax or
  78    memory operand size in Intel syntax.  */
  79 #define WORD_MNEM_SUFFIX  'w'
  80 #define BYTE_MNEM_SUFFIX  'b'
  81 #define SHORT_MNEM_SUFFIX 's'
  82 #define LONG_MNEM_SUFFIX  'l'
  83 #define QWORD_MNEM_SUFFIX  'q'
  84
  85 #define END_OF_INSN '\0'
  86
  87 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
  88
  89 /* This matches the C -> StaticRounding alias in the opcode table.  */
  90 #define commutative staticrounding
  91
  92 /*
  93   'templates' is for grouping together 'template' structures for opcodes
  94   of the same name.  This is only used for storing the insns in the grand
  95   ole hash table of insns.
  96   The templates themselves start at START and range up to (but not including)
  97   END.
  98   */
  99 typedef struct
 100 {
 101   const insn_template *start;
 102   const insn_template *end;
 103 }
 104 templates;
 105
 106 /* 386 operand encoding bytes:  see 386 book for details of this.  */
 107 typedef struct
 108 {
 109   unsigned int regmem;  /* codes register or memory operand */
 110   unsigned int reg;     /* codes register operand (or extended opcode) */
 111   unsigned int mode;    /* how to interpret regmem & reg */
 112 }
 113 modrm_byte;
 114
 115 /* x86-64 extension prefix.  */
 116 typedef int rex_byte;
 117
 118 /* 386 opcode byte to code indirect addressing.  */
 119 typedef struct
 120 {
 121   unsigned base;
 122   unsigned index;
 123   unsigned scale;
 124 }
 125 sib_byte;
 126
 127 /* x86 arch names, types and features */
 128 typedef struct
 129 {
 130   const char *name;             /* arch name */
 131   unsigned int len:8;           /* arch string length */
 132   bool skip:1;                  /* show_arch should skip this. */
 133   enum processor_type type;     /* arch type */
 134   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
 135   i386_cpu_flags enable;                /* cpu feature enable flags */
 136   i386_cpu_flags disable;       /* cpu feature disable flags */
 137 }
 138 arch_entry;
 139
 140 static void update_code_flag (int, int);
 141 static void s_insn (int);
 142 static void set_code_flag (int);
 143 static void set_16bit_gcc_code_flag (int);
 144 static void set_intel_syntax (int);
 145 static void set_intel_mnemonic (int);
 146 static void set_allow_index_reg (int);
 147 static void set_check (int);
 148 static void set_cpu_arch (int);
 149 #ifdef TE_PE
 150 static void pe_directive_secrel (int);
 151 static void pe_directive_secidx (int);
 152 #endif
 153 static void signed_cons (int);
 154 static char *output_invalid (int c);
 155 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
 156                                     const char *);
 157 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
 158                                        const char *);
 159 static int i386_att_operand (char *);
 160 static int i386_intel_operand (char *, int);
 161 static int i386_intel_simplify (expressionS *);
 162 static int i386_intel_parse_name (const char *, expressionS *);
 163 static const reg_entry *parse_register (const char *, char **);
 164 static const char *parse_insn (const char *, char *, bool);
 165 static char *parse_operands (char *, const char *);
 166 static void swap_operands (void);
 167 static void swap_2_operands (unsigned int, unsigned int);
 168 static enum flag_code i386_addressing_mode (void);
 169 static void optimize_imm (void);
 170 static bool optimize_disp (const insn_template *t);
 171 static const insn_template *match_template (char);
 172 static int check_string (void);
 173 static int process_suffix (void);
 174 static int check_byte_reg (void);
 175 static int check_long_reg (void);
 176 static int check_qword_reg (void);
 177 static int check_word_reg (void);
 178 static int finalize_imm (void);
 179 static int process_operands (void);
 180 static const reg_entry *build_modrm_byte (void);
 181 static void output_insn (void);
 182 static void output_imm (fragS *, offsetT);
 183 static void output_disp (fragS *, offsetT);
 184 #ifndef I386COFF
 185 static void s_bss (int);
 186 #endif
 187 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 188 static void handle_large_common (int small ATTRIBUTE_UNUSED);
 189
 190 /* GNU_PROPERTY_X86_ISA_1_USED.  */
 191 static unsigned int x86_isa_1_used;
 192 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
 193 static unsigned int x86_feature_2_used;
 194 /* Generate x86 used ISA and feature properties.  */
 195 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
 196 #endif
 197
 198 static const char *default_arch = DEFAULT_ARCH;
 199
 200 /* parse_register() returns this when a register alias cannot be used.  */
 201 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
 202                                    { Dw2Inval, Dw2Inval } };
 203
 204 static const reg_entry *reg_eax;
 205 static const reg_entry *reg_ds;
 206 static const reg_entry *reg_es;
 207 static const reg_entry *reg_ss;
 208 static const reg_entry *reg_st0;
 209 static const reg_entry *reg_k0;
 210
 211 /* VEX prefix.  */
 212 typedef struct
 213 {
 214   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
 215   unsigned char bytes[4];
 216   unsigned int length;
 217   /* Destination or source register specifier.  */
 218   const reg_entry *register_specifier;
 219 } vex_prefix;
 220
 221 /* 'md_assemble ()' gathers together information and puts it into a
 222    i386_insn.  */
 223
 224 union i386_op
 225   {
 226     expressionS *disps;
 227     expressionS *imms;
 228     const reg_entry *regs;
 229   };
 230
 231 enum i386_error
 232   {
 233     no_error, /* Must be first.  */
 234     operand_size_mismatch,
 235     operand_type_mismatch,
 236     register_type_mismatch,
 237     number_of_operands_mismatch,
 238     invalid_instruction_suffix,
 239     bad_imm4,
 240     unsupported_with_intel_mnemonic,
 241     unsupported_syntax,
 242     unsupported,
 243     unsupported_on_arch,
 244     unsupported_64bit,
 245     invalid_sib_address,
 246     invalid_vsib_address,
 247     invalid_vector_register_set,
 248     invalid_tmm_register_set,
 249     invalid_dest_and_src_register_set,
 250     unsupported_vector_index_register,
 251     unsupported_broadcast,
 252     broadcast_needed,
 253     unsupported_masking,
 254     mask_not_on_destination,
 255     no_default_mask,
 256     unsupported_rc_sae,
 257     invalid_register_operand,
 258   };
 259
 260 struct _i386_insn
 261   {
 262     /* TM holds the template for the insn were currently assembling.  */
 263     insn_template tm;
 264
 265     /* SUFFIX holds the instruction size suffix for byte, word, dword
 266        or qword, if given.  */
 267     char suffix;
 268
 269     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
 270     unsigned char opcode_length;
 271
 272     /* OPERANDS gives the number of given operands.  */
 273     unsigned int operands;
 274
 275     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
 276        of given register, displacement, memory operands and immediate
 277        operands.  */
 278     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
 279
 280     /* TYPES [i] is the type (see above #defines) which tells us how to
 281        use OP[i] for the corresponding operand.  */
 282     i386_operand_type types[MAX_OPERANDS];
 283
 284     /* Displacement expression, immediate expression, or register for each
 285        operand.  */
 286     union i386_op op[MAX_OPERANDS];
 287
 288     /* Flags for operands.  */
 289     unsigned int flags[MAX_OPERANDS];
 290 #define Operand_PCrel 1
 291 #define Operand_Mem   2
 292 #define Operand_Signed 4 /* .insn only */
 293
 294     /* Relocation type for operand */
 295     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
 296
 297     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
 298        the base index byte below.  */
 299     const reg_entry *base_reg;
 300     const reg_entry *index_reg;
 301     unsigned int log2_scale_factor;
 302
 303     /* SEG gives the seg_entries of this insn.  They are zero unless
 304        explicit segment overrides are given.  */
 305     const reg_entry *seg[2];
 306
 307     /* PREFIX holds all the given prefix opcodes (usually null).
 308        PREFIXES is the number of prefix opcodes.  */
 309     unsigned int prefixes;
 310     unsigned char prefix[MAX_PREFIXES];
 311
 312     /* .insn allows for reserved opcode spaces.  */
 313     unsigned char insn_opcode_space;
 314
 315     /* .insn also allows (requires) specifying immediate size.  */
 316     unsigned char imm_bits[MAX_OPERANDS];
 317
 318     /* Register is in low 3 bits of opcode.  */
 319     bool short_form;
 320
 321     /* The operand to a branch insn indicates an absolute branch.  */
 322     bool jumpabsolute;
 323
 324     /* The operand to a branch insn indicates a far branch.  */
 325     bool far_branch;
 326
 327     /* There is a memory operand of (%dx) which should be only used
 328        with input/output instructions.  */
 329     bool input_output_operand;
 330
 331     /* Extended states.  */
 332     enum
 333       {
 334         /* Use MMX state.  */
 335         xstate_mmx = 1 << 0,
 336         /* Use XMM state.  */
 337         xstate_xmm = 1 << 1,
 338         /* Use YMM state.  */
 339         xstate_ymm = 1 << 2 | xstate_xmm,
 340         /* Use ZMM state.  */
 341         xstate_zmm = 1 << 3 | xstate_ymm,
 342         /* Use TMM state.  */
 343         xstate_tmm = 1 << 4,
 344         /* Use MASK state.  */
 345         xstate_mask = 1 << 5
 346       } xstate;
 347
 348     /* Has GOTPC or TLS relocation.  */
 349     bool has_gotpc_tls_reloc;
 350
 351     /* RM and SIB are the modrm byte and the sib byte where the
 352        addressing modes of this insn are encoded.  */
 353     modrm_byte rm;
 354     rex_byte rex;
 355     rex_byte vrex;
 356     sib_byte sib;
 357     vex_prefix vex;
 358
 359     /* Masking attributes.
 360
 361        The struct describes masking, applied to OPERAND in the instruction.
 362        REG is a pointer to the corresponding mask register.  ZEROING tells
 363        whether merging or zeroing mask is used.  */
 364     struct Mask_Operation
 365     {
 366       const reg_entry *reg;
 367       unsigned int zeroing;
 368       /* The operand where this operation is associated.  */
 369       unsigned int operand;
 370     } mask;
 371
 372     /* Rounding control and SAE attributes.  */
 373     struct RC_Operation
 374     {
 375       enum rc_type
 376         {
 377           rc_none = -1,
 378           rne,
 379           rd,
 380           ru,
 381           rz,
 382           saeonly
 383         } type;
 384       /* In Intel syntax the operand modifier form is supposed to be used, but
 385          we continue to accept the immediate forms as well.  */
 386       bool modifier;
 387     } rounding;
 388
 389     /* Broadcasting attributes.
 390
 391        The struct describes broadcasting, applied to OPERAND.  TYPE is
 392        expresses the broadcast factor.  */
 393     struct Broadcast_Operation
 394     {
 395       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
 396       unsigned int type;
 397
 398       /* Index of broadcasted operand.  */
 399       unsigned int operand;
 400
 401       /* Number of bytes to broadcast.  */
 402       unsigned int bytes;
 403     } broadcast;
 404
 405     /* Compressed disp8*N attribute.  */
 406     unsigned int memshift;
 407
 408     /* Prefer load or store in encoding.  */
 409     enum
 410       {
 411         dir_encoding_default = 0,
 412         dir_encoding_load,
 413         dir_encoding_store,
 414         dir_encoding_swap
 415       } dir_encoding;
 416
 417     /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
 418     enum
 419       {
 420         disp_encoding_default = 0,
 421         disp_encoding_8bit,
 422         disp_encoding_16bit,
 423         disp_encoding_32bit
 424       } disp_encoding;
 425
 426     /* Prefer the REX byte in encoding.  */
 427     bool rex_encoding;
 428
 429     /* Disable instruction size optimization.  */
 430     bool no_optimize;
 431
 432     /* How to encode vector instructions.  */
 433     enum
 434       {
 435         vex_encoding_default = 0,
 436         vex_encoding_vex,
 437         vex_encoding_vex3,
 438         vex_encoding_evex,
 439         vex_encoding_error
 440       } vec_encoding;
 441
 442     /* REP prefix.  */
 443     const char *rep_prefix;
 444
 445     /* HLE prefix.  */
 446     const char *hle_prefix;
 447
 448     /* Have BND prefix.  */
 449     const char *bnd_prefix;
 450
 451     /* Have NOTRACK prefix.  */
 452     const char *notrack_prefix;
 453
 454     /* Error message.  */
 455     enum i386_error error;
 456   };
 457
 458 typedef struct _i386_insn i386_insn;
 459
 460 /* Link RC type with corresponding string, that'll be looked for in
 461    asm.  */
 462 struct RC_name
 463 {
 464   enum rc_type type;
 465   const char *name;
 466   unsigned int len;
 467 };
 468
 469 static const struct RC_name RC_NamesTable[] =
 470 {
 471   {  rne, STRING_COMMA_LEN ("rn-sae") },
 472   {  rd,  STRING_COMMA_LEN ("rd-sae") },
 473   {  ru,  STRING_COMMA_LEN ("ru-sae") },
 474   {  rz,  STRING_COMMA_LEN ("rz-sae") },
 475   {  saeonly,  STRING_COMMA_LEN ("sae") },
 476 };
 477
 478 /* To be indexed by segment register number.  */
 479 static const unsigned char i386_seg_prefixes[] = {
 480   ES_PREFIX_OPCODE,
 481   CS_PREFIX_OPCODE,
 482   SS_PREFIX_OPCODE,
 483   DS_PREFIX_OPCODE,
 484   FS_PREFIX_OPCODE,
 485   GS_PREFIX_OPCODE
 486 };
 487
 488 /* List of chars besides those in app.c:symbol_chars that can start an
 489    operand.  Used to prevent the scrubber eating vital white-space.  */
 490 const char extra_symbol_chars[] = "*%-([{}"
 491 #ifdef LEX_AT
 492         "@"
 493 #endif
 494 #ifdef LEX_QM
 495         "?"
 496 #endif
 497         ;
 498
 499 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF))     \
 500      && !defined (TE_GNU)                               \
 501      && !defined (TE_LINUX)                             \
 502      && !defined (TE_Haiku)                             \
 503      && !defined (TE_FreeBSD)                           \
 504      && !defined (TE_DragonFly)                         \
 505      && !defined (TE_NetBSD))
 506 /* This array holds the chars that always start a comment.  If the
 507    pre-processor is disabled, these aren't very useful.  The option
 508    --divide will remove '/' from this list.  */
 509 const char *i386_comment_chars = "#/";
 510 #define SVR4_COMMENT_CHARS 1
 511 #define PREFIX_SEPARATOR '\\'
 512
 513 #else
 514 const char *i386_comment_chars = "#";
 515 #define PREFIX_SEPARATOR '/'
 516 #endif
 517
 518 /* This array holds the chars that only start a comment at the beginning of
 519    a line.  If the line seems to have the form '# 123 filename'
 520    .line and .file directives will appear in the pre-processed output.
 521    Note that input_file.c hand checks for '#' at the beginning of the
 522    first line of the input file.  This is because the compiler outputs
 523    #NO_APP at the beginning of its output.
 524    Also note that comments started like this one will always work if
 525    '/' isn't otherwise defined.  */
 526 const char line_comment_chars[] = "#/";
 527
 528 const char line_separator_chars[] = ";";
 529
 530 /* Chars that can be used to separate mant from exp in floating point
 531    nums.  */
 532 const char EXP_CHARS[] = "eE";
 533
 534 /* Chars that mean this number is a floating point constant
 535    As in 0f12.456
 536    or    0d1.2345e12.  */
 537 const char FLT_CHARS[] = "fFdDxXhHbB";
 538
 539 /* Tables for lexical analysis.  */
 540 static char mnemonic_chars[256];
 541 static char register_chars[256];
 542 static char operand_chars[256];
 543
 544 /* Lexical macros.  */
 545 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 546 #define is_register_char(x) (register_chars[(unsigned char) x])
 547 #define is_space_char(x) ((x) == ' ')
 548
 549 /* All non-digit non-letter characters that may occur in an operand and
 550    which aren't already in extra_symbol_chars[].  */
 551 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]";
 552
 553 /* md_assemble() always leaves the strings it's passed unaltered.  To
 554    effect this we maintain a stack of saved characters that we've smashed
 555    with '\0's (indicating end of strings for various sub-fields of the
 556    assembler instruction).  */
 557 static char save_stack[32];
 558 static char *save_stack_p;
 559 #define END_STRING_AND_SAVE(s) \
 560         do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
 561 #define RESTORE_END_STRING(s) \
 562         do { *(s) = *--save_stack_p; } while (0)
 563
 564 /* The instruction we're assembling.  */
 565 static i386_insn i;
 566
 567 /* Possible templates for current insn.  */
 568 static const templates *current_templates;
 569
 570 /* Per instruction expressionS buffers: max displacements & immediates.  */
 571 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
 572 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
 573
 574 /* Current operand we are working on.  */
 575 static int this_operand = -1;
 576
 577 /* Are we processing a .insn directive?  */
 578 #define dot_insn() (i.tm.mnem_off == MN__insn)
 579
 580 /* We support four different modes.  FLAG_CODE variable is used to distinguish
 581    these.  */
 582
 583 enum flag_code {
 584         CODE_32BIT,
 585         CODE_16BIT,
 586         CODE_64BIT };
 587
 588 static enum flag_code flag_code;
 589 static unsigned int object_64bit;
 590 static unsigned int disallow_64bit_reloc;
 591 static int use_rela_relocations = 0;
 592 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
 593 static const char *tls_get_addr;
 594
 595 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
 596      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
 597      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
 598
 599 /* The ELF ABI to use.  */
 600 enum x86_elf_abi
 601 {
 602   I386_ABI,
 603   X86_64_ABI,
 604   X86_64_X32_ABI
 605 };
 606
 607 static enum x86_elf_abi x86_elf_abi = I386_ABI;
 608 #endif
 609
 610 #if defined (TE_PE) || defined (TE_PEP)
 611 /* Use big object file format.  */
 612 static int use_big_obj = 0;
 613 #endif
 614
 615 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
 616 /* 1 if generating code for a shared library.  */
 617 static int shared = 0;
 618
 619 unsigned int x86_sframe_cfa_sp_reg;
 620 /* The other CFA base register for SFrame stack trace info.  */
 621 unsigned int x86_sframe_cfa_fp_reg;
 622 unsigned int x86_sframe_cfa_ra_reg;
 623
 624 #endif
 625
 626 /* 1 for intel syntax,
 627    0 if att syntax.  */
 628 static int intel_syntax = 0;
 629
 630 static enum x86_64_isa
 631 {
 632   amd64 = 1,    /* AMD64 ISA.  */
 633   intel64       /* Intel64 ISA.  */
 634 } isa64;
 635
 636 /* 1 for intel mnemonic,
 637    0 if att mnemonic.  */
 638 static int intel_mnemonic = !SYSV386_COMPAT;
 639
 640 /* 1 if pseudo registers are permitted.  */
 641 static int allow_pseudo_reg = 0;
 642
 643 /* 1 if register prefix % not required.  */
 644 static int allow_naked_reg = 0;
 645
 646 /* 1 if the assembler should add BND prefix for all control-transferring
 647    instructions supporting it, even if this prefix wasn't specified
 648    explicitly.  */
 649 static int add_bnd_prefix = 0;
 650
 651 /* 1 if pseudo index register, eiz/riz, is allowed .  */
 652 static int allow_index_reg = 0;
 653
 654 /* 1 if the assembler should ignore LOCK prefix, even if it was
 655    specified explicitly.  */
 656 static int omit_lock_prefix = 0;
 657
 658 /* 1 if the assembler should encode lfence, mfence, and sfence as
 659    "lock addl $0, (%{re}sp)".  */
 660 static int avoid_fence = 0;
 661
 662 /* 1 if lfence should be inserted after every load.  */
 663 static int lfence_after_load = 0;
 664
 665 /* Non-zero if lfence should be inserted before indirect branch.  */
 666 static enum lfence_before_indirect_branch_kind
 667   {
 668     lfence_branch_none = 0,
 669     lfence_branch_register,
 670     lfence_branch_memory,
 671     lfence_branch_all
 672   }
 673 lfence_before_indirect_branch;
 674
 675 /* Non-zero if lfence should be inserted before ret.  */
 676 static enum lfence_before_ret_kind
 677   {
 678     lfence_before_ret_none = 0,
 679     lfence_before_ret_not,
 680     lfence_before_ret_or,
 681     lfence_before_ret_shl
 682   }
 683 lfence_before_ret;
 684
 685 /* Types of previous instruction is .byte or prefix.  */
 686 static struct
 687   {
 688     segT seg;
 689     const char *file;
 690     const char *name;
 691     unsigned int line;
 692     enum last_insn_kind
 693       {
 694         last_insn_other = 0,
 695         last_insn_directive,
 696         last_insn_prefix
 697       } kind;
 698   } last_insn;
 699
 700 /* 1 if the assembler should generate relax relocations.  */
 701
 702 static int generate_relax_relocations
 703   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
 704
 705 static enum check_kind
 706   {
 707     check_none = 0,
 708     check_warning,
 709     check_error
 710   }
 711 sse_check, operand_check = check_warning;
 712
 713 /* Non-zero if branches should be aligned within power of 2 boundary.  */
 714 static int align_branch_power = 0;
 715
 716 /* Types of branches to align.  */
 717 enum align_branch_kind
 718   {
 719     align_branch_none = 0,
 720     align_branch_jcc = 1,
 721     align_branch_fused = 2,
 722     align_branch_jmp = 3,
 723     align_branch_call = 4,
 724     align_branch_indirect = 5,
 725     align_branch_ret = 6
 726   };
 727
 728 /* Type bits of branches to align.  */
 729 enum align_branch_bit
 730   {
 731     align_branch_jcc_bit = 1 << align_branch_jcc,
 732     align_branch_fused_bit = 1 << align_branch_fused,
 733     align_branch_jmp_bit = 1 << align_branch_jmp,
 734     align_branch_call_bit = 1 << align_branch_call,
 735     align_branch_indirect_bit = 1 << align_branch_indirect,
 736     align_branch_ret_bit = 1 << align_branch_ret
 737   };
 738
 739 static unsigned int align_branch = (align_branch_jcc_bit
 740                                     | align_branch_fused_bit
 741                                     | align_branch_jmp_bit);
 742
 743 /* Types of condition jump used by macro-fusion.  */
 744 enum mf_jcc_kind
 745   {
 746     mf_jcc_jo = 0,  /* base opcode 0x70  */
 747     mf_jcc_jc,      /* base opcode 0x72  */
 748     mf_jcc_je,      /* base opcode 0x74  */
 749     mf_jcc_jna,     /* base opcode 0x76  */
 750     mf_jcc_js,      /* base opcode 0x78  */
 751     mf_jcc_jp,      /* base opcode 0x7a  */
 752     mf_jcc_jl,      /* base opcode 0x7c  */
 753     mf_jcc_jle,     /* base opcode 0x7e  */
 754   };
 755
 756 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
 757 enum mf_cmp_kind
 758   {
 759     mf_cmp_test_and,  /* test/cmp */
 760     mf_cmp_alu_cmp,  /* add/sub/cmp */
 761     mf_cmp_incdec  /* inc/dec */
 762   };
 763
 764 /* The maximum padding size for fused jcc.  CMP like instruction can
 765    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
 766    prefixes.   */
 767 #define MAX_FUSED_JCC_PADDING_SIZE 20
 768
 769 /* The maximum number of prefixes added for an instruction.  */
 770 static unsigned int align_branch_prefix_size = 5;
 771
 772 /* Optimization:
 773    1. Clear the REX_W bit with register operand if possible.
 774    2. Above plus use 128bit vector instruction to clear the full vector
 775       register.
 776  */
 777 static int optimize = 0;
 778
 779 /* Optimization:
 780    1. Clear the REX_W bit with register operand if possible.
 781    2. Above plus use 128bit vector instruction to clear the full vector
 782       register.
 783    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
 784       "testb $imm7,%r8".
 785  */
 786 static int optimize_for_space = 0;
 787
 788 /* Register prefix used for error message.  */
 789 static const char *register_prefix = "%";
 790
 791 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
 792    leave, push, and pop instructions so that gcc has the same stack
 793    frame as in 32 bit mode.  */
 794 static char stackop_size = '\0';
 795
 796 /* Non-zero to optimize code alignment.  */
 797 int optimize_align_code = 1;
 798
 799 /* Non-zero to quieten some warnings.  */
 800 static int quiet_warnings = 0;
 801
 802 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
 803 static bool pre_386_16bit_warned;
 804
 805 /* CPU name.  */
 806 static const char *cpu_arch_name = NULL;
 807 static char *cpu_sub_arch_name = NULL;
 808
 809 /* CPU feature flags.  */
 810 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 811
 812 /* If we have selected a cpu we are generating instructions for.  */
 813 static int cpu_arch_tune_set = 0;
 814
 815 /* Cpu we are generating instructions for.  */
 816 enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
 817
 818 /* CPU feature flags of cpu we are generating instructions for.  */
 819 static i386_cpu_flags cpu_arch_tune_flags;
 820
 821 /* CPU instruction set architecture used.  */
 822 enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
 823
 824 /* CPU feature flags of instruction set architecture used.  */
 825 i386_cpu_flags cpu_arch_isa_flags;
 826
 827 /* If set, conditional jumps are not automatically promoted to handle
 828    larger than a byte offset.  */
 829 static bool no_cond_jump_promotion = false;
 830
 831 /* This will be set from an expression parser hook if there's any
 832    applicable operator involved in an expression.  */
 833 static enum {
 834   expr_operator_none,
 835   expr_operator_present,
 836   expr_large_value,
 837 } expr_mode;
 838
 839 /* Encode SSE instructions with VEX prefix.  */
 840 static unsigned int sse2avx;
 841
 842 /* Encode aligned vector move as unaligned vector move.  */
 843 static unsigned int use_unaligned_vector_move;
 844
 845 /* Maximum permitted vector size. */
 846 #define VSZ_DEFAULT VSZ512
 847 static unsigned int vector_size = VSZ_DEFAULT;
 848
 849 /* Encode scalar AVX instructions with specific vector length.  */
 850 static enum
 851   {
 852     vex128 = 0,
 853     vex256
 854   } avxscalar;
 855
 856 /* Encode VEX WIG instructions with specific vex.w.  */
 857 static enum
 858   {
 859     vexw0 = 0,
 860     vexw1
 861   } vexwig;
 862
 863 /* Encode scalar EVEX LIG instructions with specific vector length.  */
 864 static enum
 865   {
 866     evexl128 = 0,
 867     evexl256,
 868     evexl512
 869   } evexlig;
 870
 871 /* Encode EVEX WIG instructions with specific evex.w.  */
 872 static enum
 873   {
 874     evexw0 = 0,
 875     evexw1
 876   } evexwig;
 877
 878 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
 879 static enum rc_type evexrcig = rne;
 880
 881 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
 882 static symbolS *GOT_symbol;
 883
 884 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
 885 unsigned int x86_dwarf2_return_column;
 886
 887 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
 888 int x86_cie_data_alignment;
 889
 890 /* Interface to relax_segment.
 891    There are 3 major relax states for 386 jump insns because the
 892    different types of jumps add different sizes to frags when we're
 893    figuring out what sort of jump to choose to reach a given label.
 894
 895    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
 896    branches which are handled by md_estimate_size_before_relax() and
 897    i386_generic_table_relax_frag().  */
 898
 899 /* Types.  */
 900 #define UNCOND_JUMP 0
 901 #define COND_JUMP 1
 902 #define COND_JUMP86 2
 903 #define BRANCH_PADDING 3
 904 #define BRANCH_PREFIX 4
 905 #define FUSED_JCC_PADDING 5
 906
 907 /* Sizes.  */
 908 #define CODE16  1
 909 #define SMALL   0
 910 #define SMALL16 (SMALL | CODE16)
 911 #define BIG     2
 912 #define BIG16   (BIG | CODE16)
 913
 914 #ifndef INLINE
 915 #ifdef __GNUC__
 916 #define INLINE __inline__
 917 #else
 918 #define INLINE
 919 #endif
 920 #endif
 921
 922 #define ENCODE_RELAX_STATE(type, size) \
 923   ((relax_substateT) (((type) << 2) | (size)))
 924 #define TYPE_FROM_RELAX_STATE(s) \
 925   ((s) >> 2)
 926 #define DISP_SIZE_FROM_RELAX_STATE(s) \
 927     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
 928
 929 /* This table is used by relax_frag to promote short jumps to long
 930    ones where necessary.  SMALL (short) jumps may be promoted to BIG
 931    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
 932    don't allow a short jump in a 32 bit code segment to be promoted to
 933    a 16 bit offset jump because it's slower (requires data size
 934    prefix), and doesn't work, unless the destination is in the bottom
 935    64k of the code segment (The top 16 bits of eip are zeroed).  */
 936
 937 const relax_typeS md_relax_table[] =
 938 {
 939   /* The fields are:
 940      1) most positive reach of this state,
 941      2) most negative reach of this state,
 942      3) how many bytes this mode will have in the variable part of the frag
 943      4) which index into the table to try if we can't fit into this one.  */
 944
 945   /* UNCOND_JUMP states.  */
 946   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
 947   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
 948   /* dword jmp adds 4 bytes to frag:
 949      0 extra opcode bytes, 4 displacement bytes.  */
 950   {0, 0, 4, 0},
 951   /* word jmp adds 2 byte2 to frag:
 952      0 extra opcode bytes, 2 displacement bytes.  */
 953   {0, 0, 2, 0},
 954
 955   /* COND_JUMP states.  */
 956   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
 957   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
 958   /* dword conditionals adds 5 bytes to frag:
 959      1 extra opcode byte, 4 displacement bytes.  */
 960   {0, 0, 5, 0},
 961   /* word conditionals add 3 bytes to frag:
 962      1 extra opcode byte, 2 displacement bytes.  */
 963   {0, 0, 3, 0},
 964
 965   /* COND_JUMP86 states.  */
 966   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
 967   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
 968   /* dword conditionals adds 5 bytes to frag:
 969      1 extra opcode byte, 4 displacement bytes.  */
 970   {0, 0, 5, 0},
 971   /* word conditionals add 4 bytes to frag:
 972      1 displacement byte and a 3 byte long branch insn.  */
 973   {0, 0, 4, 0}
 974 };
 975
 976 #define ARCH(n, t, f, s) \
 977   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
 978     CPU_NONE_FLAGS }
 979 #define SUBARCH(n, e, d, s) \
 980   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
 981     CPU_ ## d ## _FLAGS }
 982 #define VECARCH(n, e, d, v) \
 983   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
 984     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
 985
 986 static const arch_entry cpu_arch[] =
 987 {
 988   /* Do not replace the first two entries - i386_target_format() and
 989      set_cpu_arch() rely on them being there in this order.  */
 990   ARCH (generic32, GENERIC32, GENERIC32, false),
 991   ARCH (generic64, GENERIC64, GENERIC64, false),
 992   ARCH (i8086, UNKNOWN, NONE, false),
 993   ARCH (i186, UNKNOWN, 186, false),
 994   ARCH (i286, UNKNOWN, 286, false),
 995   ARCH (i386, I386, 386, false),
 996   ARCH (i486, I486, 486, false),
 997   ARCH (i586, PENTIUM, 586, false),
 998   ARCH (i686, PENTIUMPRO, 686, false),
 999   ARCH (pentium, PENTIUM, 586, false),
1000   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
1001   ARCH (pentiumii, PENTIUMPRO, P2, false),
1002   ARCH (pentiumiii, PENTIUMPRO, P3, false),
1003   ARCH (pentium4, PENTIUM4, P4, false),
1004   ARCH (prescott, NOCONA, CORE, false),
1005   ARCH (nocona, NOCONA, NOCONA, false),
1006   ARCH (yonah, CORE, CORE, true),
1007   ARCH (core, CORE, CORE, false),
1008   ARCH (merom, CORE2, CORE2, true),
1009   ARCH (core2, CORE2, CORE2, false),
1010   ARCH (corei7, COREI7, COREI7, false),
1011   ARCH (iamcu, IAMCU, IAMCU, false),
1012   ARCH (k6, K6, K6, false),
1013   ARCH (k6_2, K6, K6_2, false),
1014   ARCH (athlon, ATHLON, ATHLON, false),
1015   ARCH (sledgehammer, K8, K8, true),
1016   ARCH (opteron, K8, K8, false),
1017   ARCH (k8, K8, K8, false),
1018   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
1019   ARCH (bdver1, BD, BDVER1, false),
1020   ARCH (bdver2, BD, BDVER2, false),
1021   ARCH (bdver3, BD, BDVER3, false),
1022   ARCH (bdver4, BD, BDVER4, false),
1023   ARCH (znver1, ZNVER, ZNVER1, false),
1024   ARCH (znver2, ZNVER, ZNVER2, false),
1025   ARCH (znver3, ZNVER, ZNVER3, false),
1026   ARCH (znver4, ZNVER, ZNVER4, false),
1027   ARCH (btver1, BT, BTVER1, false),
1028   ARCH (btver2, BT, BTVER2, false),
1029
1030   SUBARCH (8087, 8087, ANY_8087, false),
1031   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
1032   SUBARCH (287, 287, ANY_287, false),
1033   SUBARCH (387, 387, ANY_387, false),
1034   SUBARCH (687, 687, ANY_687, false),
1035   SUBARCH (cmov, CMOV, CMOV, false),
1036   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
1037   SUBARCH (mmx, MMX, ANY_MMX, false),
1038   SUBARCH (sse, SSE, ANY_SSE, false),
1039   SUBARCH (sse2, SSE2, ANY_SSE2, false),
1040   SUBARCH (sse3, SSE3, ANY_SSE3, false),
1041   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
1042   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
1043   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
1044   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
1045   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
1046   VECARCH (avx, AVX, ANY_AVX, reset),
1047   VECARCH (avx2, AVX2, ANY_AVX2, reset),
1048   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
1049   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
1050   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
1051   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
1052   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
1053   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
1054   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
1055   SUBARCH (monitor, MONITOR, MONITOR, false),
1056   SUBARCH (vmx, VMX, ANY_VMX, false),
1057   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
1058   SUBARCH (smx, SMX, SMX, false),
1059   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
1060   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
1061   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
1062   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
1063   SUBARCH (aes, AES, ANY_AES, false),
1064   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
1065   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
1066   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
1067   SUBARCH (rdrnd, RDRND, RDRND, false),
1068   SUBARCH (f16c, F16C, ANY_F16C, false),
1069   SUBARCH (bmi2, BMI2, BMI2, false),
1070   SUBARCH (fma, FMA, ANY_FMA, false),
1071   SUBARCH (fma4, FMA4, ANY_FMA4, false),
1072   SUBARCH (xop, XOP, ANY_XOP, false),
1073   SUBARCH (lwp, LWP, ANY_LWP, false),
1074   SUBARCH (movbe, MOVBE, MOVBE, false),
1075   SUBARCH (cx16, CX16, CX16, false),
1076   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
1077   SUBARCH (ept, EPT, ANY_EPT, false),
1078   SUBARCH (lzcnt, LZCNT, LZCNT, false),
1079   SUBARCH (popcnt, POPCNT, POPCNT, false),
1080   SUBARCH (hle, HLE, HLE, false),
1081   SUBARCH (rtm, RTM, ANY_RTM, false),
1082   SUBARCH (tsx, TSX, TSX, false),
1083   SUBARCH (invpcid, INVPCID, INVPCID, false),
1084   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
1085   SUBARCH (nop, NOP, NOP, false),
1086   SUBARCH (syscall, SYSCALL, SYSCALL, false),
1087   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
1088   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
1089   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
1090   SUBARCH (padlock, PADLOCK, PADLOCK, false),
1091   SUBARCH (pacifica, SVME, ANY_SVME, true),
1092   SUBARCH (svme, SVME, ANY_SVME, false),
1093   SUBARCH (abm, ABM, ABM, false),
1094   SUBARCH (bmi, BMI, BMI, false),
1095   SUBARCH (tbm, TBM, TBM, false),
1096   SUBARCH (adx, ADX, ADX, false),
1097   SUBARCH (rdseed, RDSEED, RDSEED, false),
1098   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
1099   SUBARCH (smap, SMAP, SMAP, false),
1100   SUBARCH (mpx, MPX, ANY_MPX, false),
1101   SUBARCH (sha, SHA, ANY_SHA, false),
1102   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
1103   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
1104   SUBARCH (se1, SE1, SE1, false),
1105   SUBARCH (clwb, CLWB, CLWB, false),
1106   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
1107   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
1108   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
1109   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
1110   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
1111   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
1112   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
1113   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
1114   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
1115   SUBARCH (clzero, CLZERO, CLZERO, false),
1116   SUBARCH (mwaitx, MWAITX, MWAITX, false),
1117   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
1118   SUBARCH (rdpid, RDPID, RDPID, false),
1119   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
1120   SUBARCH (ibt, IBT, IBT, false),
1121   SUBARCH (shstk, SHSTK, SHSTK, false),
1122   SUBARCH (gfni, GFNI, ANY_GFNI, false),
1123   VECARCH (vaes, VAES, ANY_VAES, reset),
1124   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
1125   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
1126   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
1127   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
1128   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
1129   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
1130   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
1131   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
1132   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
1133   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
1134   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
1135   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
1136   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
1137   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
1138            ANY_AVX512_VP2INTERSECT, reset),
1139   SUBARCH (tdx, TDX, TDX, false),
1140   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
1141   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
1142   SUBARCH (rdpru, RDPRU, RDPRU, false),
1143   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
1144   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
1145   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
1146   SUBARCH (kl, KL, ANY_KL, false),
1147   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
1148   SUBARCH (uintr, UINTR, UINTR, false),
1149   SUBARCH (hreset, HRESET, HRESET, false),
1150   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
1151   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
1152   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
1153   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
1154   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
1155   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
1156   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
1157   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
1158   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
1159   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
1160   SUBARCH (fred, FRED, ANY_FRED, false),
1161   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
1162   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
1163   VECARCH (sha512, SHA512, ANY_SHA512, reset),
1164   VECARCH (sm3, SM3, ANY_SM3, reset),
1165   VECARCH (sm4, SM4, ANY_SM4, reset),
1166   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
1167   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
1168 };
1169
1170 #undef SUBARCH
1171 #undef ARCH
1172
1173 #ifdef I386COFF
1174 /* Like s_lcomm_internal in gas/read.c but the alignment string
1175    is allowed to be optional.  */
1176
1177 static symbolS *
1178 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
1179 {
1180   addressT align = 0;
1181
1182   SKIP_WHITESPACE ();
1183
1184   if (needs_align
1185       && *input_line_pointer == ',')
1186     {
1187       align = parse_align (needs_align - 1);
1188
1189       if (align == (addressT) -1)
1190         return NULL;
1191     }
1192   else
1193     {
1194       if (size >= 8)
1195         align = 3;
1196       else if (size >= 4)
1197         align = 2;
1198       else if (size >= 2)
1199         align = 1;
1200       else
1201         align = 0;
1202     }
1203
1204   bss_alloc (symbolP, size, align);
1205   return symbolP;
1206 }
1207
1208 static void
1209 pe_lcomm (int needs_align)
1210 {
1211   s_comm_internal (needs_align * 2, pe_lcomm_internal);
1212 }
1213 #endif
1214
1215 const pseudo_typeS md_pseudo_table[] =
1216 {
1217 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
1218   {"align", s_align_bytes, 0},
1219 #else
1220   {"align", s_align_ptwo, 0},
1221 #endif
1222   {"arch", set_cpu_arch, 0},
1223 #ifndef I386COFF
1224   {"bss", s_bss, 0},
1225 #else
1226   {"lcomm", pe_lcomm, 1},
1227 #endif
1228   {"ffloat", float_cons, 'f'},
1229   {"dfloat", float_cons, 'd'},
1230   {"tfloat", float_cons, 'x'},
1231   {"hfloat", float_cons, 'h'},
1232   {"bfloat16", float_cons, 'b'},
1233   {"value", cons, 2},
1234   {"slong", signed_cons, 4},
1235   {"insn", s_insn, 0},
1236   {"noopt", s_ignore, 0},
1237   {"optim", s_ignore, 0},
1238   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
1239   {"code16", set_code_flag, CODE_16BIT},
1240   {"code32", set_code_flag, CODE_32BIT},
1241 #ifdef BFD64
1242   {"code64", set_code_flag, CODE_64BIT},
1243 #endif
1244   {"intel_syntax", set_intel_syntax, 1},
1245   {"att_syntax", set_intel_syntax, 0},
1246   {"intel_mnemonic", set_intel_mnemonic, 1},
1247   {"att_mnemonic", set_intel_mnemonic, 0},
1248   {"allow_index_reg", set_allow_index_reg, 1},
1249   {"disallow_index_reg", set_allow_index_reg, 0},
1250   {"sse_check", set_check, 0},
1251   {"operand_check", set_check, 1},
1252 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
1253   {"largecomm", handle_large_common, 0},
1254 #else
1255   {"file", dwarf2_directive_file, 0},
1256   {"loc", dwarf2_directive_loc, 0},
1257   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
1258 #endif
1259 #ifdef TE_PE
1260   {"secrel32", pe_directive_secrel, 0},
1261   {"secidx", pe_directive_secidx, 0},
1262 #endif
1263   {0, 0, 0}
1264 };
1265
1266 /* For interface with expression ().  */
1267 extern char *input_line_pointer;
1268
1269 /* Hash table for instruction mnemonic lookup.  */
1270 static htab_t op_hash;
1271
1272 /* Hash table for register lookup.  */
1273 static htab_t reg_hash;
1274 \f
1275   /* Various efficient no-op patterns for aligning code labels.
1276      Note: Don't try to assemble the instructions in the comments.
1277      0L and 0w are not legal.  */
1278 static const unsigned char f32_1[] =
1279   {0x90};                               /* nop                  */
1280 static const unsigned char f32_2[] =
1281   {0x66,0x90};                          /* xchg %ax,%ax         */
1282 static const unsigned char f32_3[] =
1283   {0x8d,0x76,0x00};                     /* leal 0(%esi),%esi    */
1284 static const unsigned char f32_4[] =
1285   {0x8d,0x74,0x26,0x00};                /* leal 0(%esi,1),%esi  */
1286 static const unsigned char f32_6[] =
1287   {0x8d,0xb6,0x00,0x00,0x00,0x00};      /* leal 0L(%esi),%esi   */
1288 static const unsigned char f32_7[] =
1289   {0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal 0L(%esi,1),%esi */
1290 static const unsigned char f16_3[] =
1291   {0x8d,0x74,0x00};                     /* lea 0(%si),%si       */
1292 static const unsigned char f16_4[] =
1293   {0x8d,0xb4,0x00,0x00};                /* lea 0W(%si),%si      */
1294 static const unsigned char jump_disp8[] =
1295   {0xeb};                               /* jmp disp8           */
1296 static const unsigned char jump32_disp32[] =
1297   {0xe9};                               /* jmp disp32          */
1298 static const unsigned char jump16_disp32[] =
1299   {0x66,0xe9};                          /* jmp disp32          */
1300 /* 32-bit NOPs patterns.  */
1301 static const unsigned char *const f32_patt[] = {
1302   f32_1, f32_2, f32_3, f32_4, NULL, f32_6, f32_7
1303 };
1304 /* 16-bit NOPs patterns.  */
1305 static const unsigned char *const f16_patt[] = {
1306   f32_1, f32_2, f16_3, f16_4
1307 };
1308 /* nopl (%[re]ax) */
1309 static const unsigned char alt_3[] =
1310   {0x0f,0x1f,0x00};
1311 /* nopl 0(%[re]ax) */
1312 static const unsigned char alt_4[] =
1313   {0x0f,0x1f,0x40,0x00};
1314 /* nopl 0(%[re]ax,%[re]ax,1) */
1315 static const unsigned char alt_5[] =
1316   {0x0f,0x1f,0x44,0x00,0x00};
1317 /* nopw 0(%[re]ax,%[re]ax,1) */
1318 static const unsigned char alt_6[] =
1319   {0x66,0x0f,0x1f,0x44,0x00,0x00};
1320 /* nopl 0L(%[re]ax) */
1321 static const unsigned char alt_7[] =
1322   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
1323 /* nopl 0L(%[re]ax,%[re]ax,1) */
1324 static const unsigned char alt_8[] =
1325   {0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1326 /* nopw 0L(%[re]ax,%[re]ax,1) */
1327 static const unsigned char alt_9[] =
1328   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1329 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
1330 static const unsigned char alt_10[] =
1331   {0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1332 /* data16 nopw %cs:0L(%eax,%eax,1) */
1333 static const unsigned char alt_11[] =
1334   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
1335 /* 32-bit and 64-bit NOPs patterns.  */
1336 static const unsigned char *const alt_patt[] = {
1337   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
1338   alt_9, alt_10, alt_11
1339 };
1340
1341 /* Genenerate COUNT bytes of NOPs to WHERE from PATT with the maximum
1342    size of a single NOP instruction MAX_SINGLE_NOP_SIZE.  */
1343
1344 static void
1345 i386_output_nops (char *where, const unsigned char *const *patt,
1346                   int count, int max_single_nop_size)
1347
1348 {
1349   /* Place the longer NOP first.  */
1350   int last;
1351   int offset;
1352   const unsigned char *nops;
1353
1354   if (max_single_nop_size < 1)
1355     {
1356       as_fatal (_("i386_output_nops called to generate nops of at most %d bytes!"),
1357                 max_single_nop_size);
1358       return;
1359     }
1360
1361   nops = patt[max_single_nop_size - 1];
1362
1363   /* Use the smaller one if the requsted one isn't available.  */
1364   if (nops == NULL)
1365     {
1366       max_single_nop_size--;
1367       nops = patt[max_single_nop_size - 1];
1368     }
1369
1370   last = count % max_single_nop_size;
1371
1372   count -= last;
1373   for (offset = 0; offset < count; offset += max_single_nop_size)
1374     memcpy (where + offset, nops, max_single_nop_size);
1375
1376   if (last)
1377     {
1378       nops = patt[last - 1];
1379       if (nops == NULL)
1380         {
1381           /* Use the smaller one plus one-byte NOP if the needed one
1382              isn't available.  */
1383           last--;
1384           nops = patt[last - 1];
1385           memcpy (where + offset, nops, last);
1386           where[offset + last] = *patt[0];
1387         }
1388       else
1389         memcpy (where + offset, nops, last);
1390     }
1391 }
1392
1393 static INLINE int
1394 fits_in_imm7 (offsetT num)
1395 {
1396   return (num & 0x7f) == num;
1397 }
1398
1399 static INLINE int
1400 fits_in_imm31 (offsetT num)
1401 {
1402   return (num & 0x7fffffff) == num;
1403 }
1404
1405 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
1406    single NOP instruction LIMIT.  */
1407
1408 void
1409 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
1410 {
1411   const unsigned char *const *patt = NULL;
1412   int max_single_nop_size;
1413   /* Maximum number of NOPs before switching to jump over NOPs.  */
1414   int max_number_of_nops;
1415
1416   switch (fragP->fr_type)
1417     {
1418     case rs_fill_nop:
1419     case rs_align_code:
1420       break;
1421     case rs_machine_dependent:
1422       /* Allow NOP padding for jumps and calls.  */
1423       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
1424           || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
1425         break;
1426       /* Fall through.  */
1427     default:
1428       return;
1429     }
1430
1431   /* We need to decide which NOP sequence to use for 32bit and
1432      64bit. When -mtune= is used:
1433
1434      1. For PROCESSOR_I386, PROCESSOR_I486, PROCESSOR_PENTIUM and
1435      PROCESSOR_GENERIC32, f32_patt will be used.
1436      2. For the rest, alt_patt will be used.
1437
1438      When -mtune= isn't used, alt_patt will be used if
1439      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt will
1440      be used.
1441
1442      When -march= or .arch is used, we can't use anything beyond
1443      cpu_arch_isa_flags.   */
1444
1445   if (flag_code == CODE_16BIT)
1446     {
1447       patt = f16_patt;
1448       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
1449       /* Limit number of NOPs to 2 in 16-bit mode.  */
1450       max_number_of_nops = 2;
1451     }
1452   else
1453     {
1454       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
1455         {
1456           /* PROCESSOR_UNKNOWN means that all ISAs may be used.  */
1457           switch (cpu_arch_tune)
1458             {
1459             case PROCESSOR_UNKNOWN:
1460               /* We use cpu_arch_isa_flags to check if we SHOULD
1461                  optimize with nops.  */
1462               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1463                 patt = alt_patt;
1464               else
1465                 patt = f32_patt;
1466               break;
1467             case PROCESSOR_PENTIUM4:
1468             case PROCESSOR_NOCONA:
1469             case PROCESSOR_CORE:
1470             case PROCESSOR_CORE2:
1471             case PROCESSOR_COREI7:
1472             case PROCESSOR_GENERIC64:
1473             case PROCESSOR_K6:
1474             case PROCESSOR_ATHLON:
1475             case PROCESSOR_K8:
1476             case PROCESSOR_AMDFAM10:
1477             case PROCESSOR_BD:
1478             case PROCESSOR_ZNVER:
1479             case PROCESSOR_BT:
1480               patt = alt_patt;
1481               break;
1482             case PROCESSOR_I386:
1483             case PROCESSOR_I486:
1484             case PROCESSOR_PENTIUM:
1485             case PROCESSOR_PENTIUMPRO:
1486             case PROCESSOR_IAMCU:
1487             case PROCESSOR_GENERIC32:
1488               patt = f32_patt;
1489               break;
1490             case PROCESSOR_NONE:
1491               abort ();
1492             }
1493         }
1494       else
1495         {
1496           switch (fragP->tc_frag_data.tune)
1497             {
1498             case PROCESSOR_UNKNOWN:
1499               /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
1500                  PROCESSOR_UNKNOWN.  */
1501               abort ();
1502               break;
1503
1504             case PROCESSOR_I386:
1505             case PROCESSOR_I486:
1506             case PROCESSOR_PENTIUM:
1507             case PROCESSOR_IAMCU:
1508             case PROCESSOR_K6:
1509             case PROCESSOR_ATHLON:
1510             case PROCESSOR_K8:
1511             case PROCESSOR_AMDFAM10:
1512             case PROCESSOR_BD:
1513             case PROCESSOR_ZNVER:
1514             case PROCESSOR_BT:
1515             case PROCESSOR_GENERIC32:
1516               /* We use cpu_arch_isa_flags to check if we CAN optimize
1517                  with nops.  */
1518               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1519                 patt = alt_patt;
1520               else
1521                 patt = f32_patt;
1522               break;
1523             case PROCESSOR_PENTIUMPRO:
1524             case PROCESSOR_PENTIUM4:
1525             case PROCESSOR_NOCONA:
1526             case PROCESSOR_CORE:
1527             case PROCESSOR_CORE2:
1528             case PROCESSOR_COREI7:
1529               if (fragP->tc_frag_data.isa_flags.bitfield.cpunop)
1530                 patt = alt_patt;
1531               else
1532                 patt = f32_patt;
1533               break;
1534             case PROCESSOR_GENERIC64:
1535               patt = alt_patt;
1536               break;
1537             case PROCESSOR_NONE:
1538               abort ();
1539             }
1540         }
1541
1542       if (patt == f32_patt)
1543         {
1544           max_single_nop_size = sizeof (f32_patt) / sizeof (f32_patt[0]);
1545           /* Limit number of NOPs to 2 for older processors.  */
1546           max_number_of_nops = 2;
1547         }
1548       else
1549         {
1550           max_single_nop_size = sizeof (alt_patt) / sizeof (alt_patt[0]);
1551           /* Limit number of NOPs to 7 for newer processors.  */
1552           max_number_of_nops = 7;
1553         }
1554     }
1555
1556   if (limit == 0)
1557     limit = max_single_nop_size;
1558
1559   if (fragP->fr_type == rs_fill_nop)
1560     {
1561       /* Output NOPs for .nop directive.  */
1562       if (limit > max_single_nop_size)
1563         {
1564           as_bad_where (fragP->fr_file, fragP->fr_line,
1565                         _("invalid single nop size: %d "
1566                           "(expect within [0, %d])"),
1567                         limit, max_single_nop_size);
1568           return;
1569         }
1570     }
1571   else if (fragP->fr_type != rs_machine_dependent)
1572     fragP->fr_var = count;
1573
1574   if ((count / max_single_nop_size) > max_number_of_nops)
1575     {
1576       /* Generate jump over NOPs.  */
1577       offsetT disp = count - 2;
1578       if (fits_in_imm7 (disp))
1579         {
1580           /* Use "jmp disp8" if possible.  */
1581           count = disp;
1582           where[0] = jump_disp8[0];
1583           where[1] = count;
1584           where += 2;
1585         }
1586       else
1587         {
1588           unsigned int size_of_jump;
1589
1590           if (flag_code == CODE_16BIT)
1591             {
1592               where[0] = jump16_disp32[0];
1593               where[1] = jump16_disp32[1];
1594               size_of_jump = 2;
1595             }
1596           else
1597             {
1598               where[0] = jump32_disp32[0];
1599               size_of_jump = 1;
1600             }
1601
1602           count -= size_of_jump + 4;
1603           if (!fits_in_imm31 (count))
1604             {
1605               as_bad_where (fragP->fr_file, fragP->fr_line,
1606                             _("jump over nop padding out of range"));
1607               return;
1608             }
1609
1610           md_number_to_chars (where + size_of_jump, count, 4);
1611           where += size_of_jump + 4;
1612         }
1613     }
1614
1615   /* Generate multiple NOPs.  */
1616   i386_output_nops (where, patt, count, limit);
1617 }
1618
1619 static INLINE int
1620 operand_type_all_zero (const union i386_operand_type *x)
1621 {
1622   switch (ARRAY_SIZE(x->array))
1623     {
1624     case 3:
1625       if (x->array[2])
1626         return 0;
1627       /* Fall through.  */
1628     case 2:
1629       if (x->array[1])
1630         return 0;
1631       /* Fall through.  */
1632     case 1:
1633       return !x->array[0];
1634     default:
1635       abort ();
1636     }
1637 }
1638
1639 static INLINE void
1640 operand_type_set (union i386_operand_type *x, unsigned int v)
1641 {
1642   switch (ARRAY_SIZE(x->array))
1643     {
1644     case 3:
1645       x->array[2] = v;
1646       /* Fall through.  */
1647     case 2:
1648       x->array[1] = v;
1649       /* Fall through.  */
1650     case 1:
1651       x->array[0] = v;
1652       /* Fall through.  */
1653       break;
1654     default:
1655       abort ();
1656     }
1657
1658   x->bitfield.class = ClassNone;
1659   x->bitfield.instance = InstanceNone;
1660 }
1661
1662 static INLINE int
1663 operand_type_equal (const union i386_operand_type *x,
1664                     const union i386_operand_type *y)
1665 {
1666   switch (ARRAY_SIZE(x->array))
1667     {
1668     case 3:
1669       if (x->array[2] != y->array[2])
1670         return 0;
1671       /* Fall through.  */
1672     case 2:
1673       if (x->array[1] != y->array[1])
1674         return 0;
1675       /* Fall through.  */
1676     case 1:
1677       return x->array[0] == y->array[0];
1678       break;
1679     default:
1680       abort ();
1681     }
1682 }
1683
1684 static INLINE bool
1685 is_cpu (const insn_template *t, enum i386_cpu cpu)
1686 {
1687   switch (cpu)
1688     {
1689     case Cpu287:      return t->cpu.bitfield.cpu287;
1690     case Cpu387:      return t->cpu.bitfield.cpu387;
1691     case Cpu3dnow:    return t->cpu.bitfield.cpu3dnow;
1692     case Cpu3dnowA:   return t->cpu.bitfield.cpu3dnowa;
1693     case CpuAVX:      return t->cpu.bitfield.cpuavx;
1694     case CpuHLE:      return t->cpu.bitfield.cpuhle;
1695     case CpuAVX512F:  return t->cpu.bitfield.cpuavx512f;
1696     case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
1697     case Cpu64:       return t->cpu.bitfield.cpu64;
1698     case CpuNo64:     return t->cpu.bitfield.cpuno64;
1699     default:
1700       gas_assert (cpu < CpuAttrEnums);
1701     }
1702   return t->cpu.bitfield.isa == cpu + 1u;
1703 }
1704
1705 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
1706 {
1707   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
1708   i386_cpu_flags f = { .array[0] = 0 };
1709
1710   switch (ARRAY_SIZE(a.array))
1711     {
1712     case 1:
1713       f.array[CpuAttrEnums / bps]
1714         |= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
1715       if (CpuAttrEnums % bps > CpuIsaBits)
1716         f.array[CpuAttrEnums / bps + 1]
1717           = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
1718       break;
1719     default:
1720       abort ();
1721     }
1722
1723   if (a.bitfield.isa)
1724     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
1725
1726   return f;
1727 }
1728
1729 static INLINE int
1730 cpu_flags_all_zero (const union i386_cpu_flags *x)
1731 {
1732   switch (ARRAY_SIZE(x->array))
1733     {
1734     case 5:
1735       if (x->array[4])
1736         return 0;
1737       /* Fall through.  */
1738     case 4:
1739       if (x->array[3])
1740         return 0;
1741       /* Fall through.  */
1742     case 3:
1743       if (x->array[2])
1744         return 0;
1745       /* Fall through.  */
1746     case 2:
1747       if (x->array[1])
1748         return 0;
1749       /* Fall through.  */
1750     case 1:
1751       return !x->array[0];
1752     default:
1753       abort ();
1754     }
1755 }
1756
1757 static INLINE int
1758 cpu_flags_equal (const union i386_cpu_flags *x,
1759                  const union i386_cpu_flags *y)
1760 {
1761   switch (ARRAY_SIZE(x->array))
1762     {
1763     case 5:
1764       if (x->array[4] != y->array[4])
1765         return 0;
1766       /* Fall through.  */
1767     case 4:
1768       if (x->array[3] != y->array[3])
1769         return 0;
1770       /* Fall through.  */
1771     case 3:
1772       if (x->array[2] != y->array[2])
1773         return 0;
1774       /* Fall through.  */
1775     case 2:
1776       if (x->array[1] != y->array[1])
1777         return 0;
1778       /* Fall through.  */
1779     case 1:
1780       return x->array[0] == y->array[0];
1781       break;
1782     default:
1783       abort ();
1784     }
1785 }
1786
1787 static INLINE int
1788 cpu_flags_check_cpu64 (i386_cpu_attr a)
1789 {
1790   return !((flag_code == CODE_64BIT && a.bitfield.cpuno64)
1791            || (flag_code != CODE_64BIT && a.bitfield.cpu64));
1792 }
1793
1794 static INLINE i386_cpu_flags
1795 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
1796 {
1797   switch (ARRAY_SIZE (x.array))
1798     {
1799     case 5:
1800       x.array [4] &= y.array [4];
1801       /* Fall through.  */
1802     case 4:
1803       x.array [3] &= y.array [3];
1804       /* Fall through.  */
1805     case 3:
1806       x.array [2] &= y.array [2];
1807       /* Fall through.  */
1808     case 2:
1809       x.array [1] &= y.array [1];
1810       /* Fall through.  */
1811     case 1:
1812       x.array [0] &= y.array [0];
1813       break;
1814     default:
1815       abort ();
1816     }
1817   return x;
1818 }
1819
1820 static INLINE i386_cpu_flags
1821 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
1822 {
1823   switch (ARRAY_SIZE (x.array))
1824     {
1825     case 5:
1826       x.array [4] |= y.array [4];
1827       /* Fall through.  */
1828     case 4:
1829       x.array [3] |= y.array [3];
1830       /* Fall through.  */
1831     case 3:
1832       x.array [2] |= y.array [2];
1833       /* Fall through.  */
1834     case 2:
1835       x.array [1] |= y.array [1];
1836       /* Fall through.  */
1837     case 1:
1838       x.array [0] |= y.array [0];
1839       break;
1840     default:
1841       abort ();
1842     }
1843   return x;
1844 }
1845
1846 static INLINE i386_cpu_flags
1847 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
1848 {
1849   switch (ARRAY_SIZE (x.array))
1850     {
1851     case 5:
1852       x.array [4] &= ~y.array [4];
1853       /* Fall through.  */
1854     case 4:
1855       x.array [3] &= ~y.array [3];
1856       /* Fall through.  */
1857     case 3:
1858       x.array [2] &= ~y.array [2];
1859       /* Fall through.  */
1860     case 2:
1861       x.array [1] &= ~y.array [1];
1862       /* Fall through.  */
1863     case 1:
1864       x.array [0] &= ~y.array [0];
1865       break;
1866     default:
1867       abort ();
1868     }
1869   return x;
1870 }
1871
1872 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
1873
1874 #define CPU_FLAGS_ARCH_MATCH            0x1
1875 #define CPU_FLAGS_64BIT_MATCH           0x2
1876
1877 #define CPU_FLAGS_PERFECT_MATCH \
1878   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
1879
1880 /* Return CPU flags match bits. */
1881
1882 static int
1883 cpu_flags_match (const insn_template *t)
1884 {
1885   i386_cpu_flags x = cpu_flags_from_attr (t->cpu);
1886   int match = cpu_flags_check_cpu64 (t->cpu) ? CPU_FLAGS_64BIT_MATCH : 0;
1887
1888   x.bitfield.cpu64 = 0;
1889   x.bitfield.cpuno64 = 0;
1890
1891   if (cpu_flags_all_zero (&x))
1892     {
1893       /* This instruction is available on all archs.  */
1894       match |= CPU_FLAGS_ARCH_MATCH;
1895     }
1896   else
1897     {
1898       /* This instruction is available only on some archs.  */
1899       i386_cpu_flags cpu = cpu_arch_flags;
1900
1901       /* AVX512VL is no standalone feature - match it and then strip it.  */
1902       if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
1903         return match;
1904       x.bitfield.cpuavx512vl = 0;
1905
1906       /* AVX and AVX2 present at the same time express an operand size
1907          dependency - strip AVX2 for the purposes here.  The operand size
1908          dependent check occurs in check_vecOperands().  */
1909       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
1910         x.bitfield.cpuavx2 = 0;
1911
1912       cpu = cpu_flags_and (x, cpu);
1913       if (!cpu_flags_all_zero (&cpu))
1914         {
1915           if (x.bitfield.cpuavx)
1916             {
1917               /* We need to check a few extra flags with AVX.  */
1918               if (cpu.bitfield.cpuavx
1919                   && (!t->opcode_modifier.sse2avx
1920                       || (sse2avx && !i.prefix[DATA_PREFIX]))
1921                   && (!x.bitfield.cpuaes || cpu.bitfield.cpuaes)
1922                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1923                   && (!x.bitfield.cpupclmulqdq || cpu.bitfield.cpupclmulqdq))
1924                 match |= CPU_FLAGS_ARCH_MATCH;
1925             }
1926           else if (x.bitfield.cpuavx512f)
1927             {
1928               /* We need to check a few extra flags with AVX512F.  */
1929               if (cpu.bitfield.cpuavx512f
1930                   && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
1931                   && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
1932                   && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
1933                 match |= CPU_FLAGS_ARCH_MATCH;
1934             }
1935           else
1936             match |= CPU_FLAGS_ARCH_MATCH;
1937         }
1938     }
1939   return match;
1940 }
1941
1942 static INLINE i386_operand_type
1943 operand_type_and (i386_operand_type x, i386_operand_type y)
1944 {
1945   if (x.bitfield.class != y.bitfield.class)
1946     x.bitfield.class = ClassNone;
1947   if (x.bitfield.instance != y.bitfield.instance)
1948     x.bitfield.instance = InstanceNone;
1949
1950   switch (ARRAY_SIZE (x.array))
1951     {
1952     case 3:
1953       x.array [2] &= y.array [2];
1954       /* Fall through.  */
1955     case 2:
1956       x.array [1] &= y.array [1];
1957       /* Fall through.  */
1958     case 1:
1959       x.array [0] &= y.array [0];
1960       break;
1961     default:
1962       abort ();
1963     }
1964   return x;
1965 }
1966
1967 static INLINE i386_operand_type
1968 operand_type_and_not (i386_operand_type x, i386_operand_type y)
1969 {
1970   gas_assert (y.bitfield.class == ClassNone);
1971   gas_assert (y.bitfield.instance == InstanceNone);
1972
1973   switch (ARRAY_SIZE (x.array))
1974     {
1975     case 3:
1976       x.array [2] &= ~y.array [2];
1977       /* Fall through.  */
1978     case 2:
1979       x.array [1] &= ~y.array [1];
1980       /* Fall through.  */
1981     case 1:
1982       x.array [0] &= ~y.array [0];
1983       break;
1984     default:
1985       abort ();
1986     }
1987   return x;
1988 }
1989
1990 static INLINE i386_operand_type
1991 operand_type_or (i386_operand_type x, i386_operand_type y)
1992 {
1993   gas_assert (x.bitfield.class == ClassNone ||
1994               y.bitfield.class == ClassNone ||
1995               x.bitfield.class == y.bitfield.class);
1996   gas_assert (x.bitfield.instance == InstanceNone ||
1997               y.bitfield.instance == InstanceNone ||
1998               x.bitfield.instance == y.bitfield.instance);
1999
2000   switch (ARRAY_SIZE (x.array))
2001     {
2002     case 3:
2003       x.array [2] |= y.array [2];
2004       /* Fall through.  */
2005     case 2:
2006       x.array [1] |= y.array [1];
2007       /* Fall through.  */
2008     case 1:
2009       x.array [0] |= y.array [0];
2010       break;
2011     default:
2012       abort ();
2013     }
2014   return x;
2015 }
2016
2017 static INLINE i386_operand_type
2018 operand_type_xor (i386_operand_type x, i386_operand_type y)
2019 {
2020   gas_assert (y.bitfield.class == ClassNone);
2021   gas_assert (y.bitfield.instance == InstanceNone);
2022
2023   switch (ARRAY_SIZE (x.array))
2024     {
2025     case 3:
2026       x.array [2] ^= y.array [2];
2027       /* Fall through.  */
2028     case 2:
2029       x.array [1] ^= y.array [1];
2030       /* Fall through.  */
2031     case 1:
2032       x.array [0] ^= y.array [0];
2033       break;
2034     default:
2035       abort ();
2036     }
2037   return x;
2038 }
2039
2040 static const i386_operand_type anydisp = {
2041   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
2042 };
2043
2044 enum operand_type
2045 {
2046   reg,
2047   imm,
2048   disp,
2049   anymem
2050 };
2051
2052 static INLINE int
2053 operand_type_check (i386_operand_type t, enum operand_type c)
2054 {
2055   switch (c)
2056     {
2057     case reg:
2058       return t.bitfield.class == Reg;
2059
2060     case imm:
2061       return (t.bitfield.imm8
2062               || t.bitfield.imm8s
2063               || t.bitfield.imm16
2064               || t.bitfield.imm32
2065               || t.bitfield.imm32s
2066               || t.bitfield.imm64);
2067
2068     case disp:
2069       return (t.bitfield.disp8
2070               || t.bitfield.disp16
2071               || t.bitfield.disp32
2072               || t.bitfield.disp64);
2073
2074     case anymem:
2075       return (t.bitfield.disp8
2076               || t.bitfield.disp16
2077               || t.bitfield.disp32
2078               || t.bitfield.disp64
2079               || t.bitfield.baseindex);
2080
2081     default:
2082       abort ();
2083     }
2084
2085   return 0;
2086 }
2087
2088 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit/80bit size
2089    between operand GIVEN and opeand WANTED for instruction template T.  */
2090
2091 static INLINE int
2092 match_operand_size (const insn_template *t, unsigned int wanted,
2093                     unsigned int given)
2094 {
2095   return !((i.types[given].bitfield.byte
2096             && !t->operand_types[wanted].bitfield.byte)
2097            || (i.types[given].bitfield.word
2098                && !t->operand_types[wanted].bitfield.word)
2099            || (i.types[given].bitfield.dword
2100                && !t->operand_types[wanted].bitfield.dword)
2101            || (i.types[given].bitfield.qword
2102                && (!t->operand_types[wanted].bitfield.qword
2103                    /* Don't allow 64-bit (memory) operands outside of 64-bit
2104                       mode, when they're used where a 64-bit GPR could also
2105                       be used.  Checking is needed for Intel Syntax only.  */
2106                    || (intel_syntax
2107                        && flag_code != CODE_64BIT
2108                        && (t->operand_types[wanted].bitfield.class == Reg
2109                            || t->operand_types[wanted].bitfield.class == Accum
2110                            || t->opcode_modifier.isstring))))
2111            || (i.types[given].bitfield.tbyte
2112                && !t->operand_types[wanted].bitfield.tbyte));
2113 }
2114
2115 /* Return 1 if there is no conflict in SIMD register between operand
2116    GIVEN and opeand WANTED for instruction template T.  */
2117
2118 static INLINE int
2119 match_simd_size (const insn_template *t, unsigned int wanted,
2120                  unsigned int given)
2121 {
2122   return !((i.types[given].bitfield.xmmword
2123             && !t->operand_types[wanted].bitfield.xmmword)
2124            || (i.types[given].bitfield.ymmword
2125                && !t->operand_types[wanted].bitfield.ymmword)
2126            || (i.types[given].bitfield.zmmword
2127                && !t->operand_types[wanted].bitfield.zmmword)
2128            || (i.types[given].bitfield.tmmword
2129                && !t->operand_types[wanted].bitfield.tmmword));
2130 }
2131
2132 /* Return 1 if there is no conflict in any size between operand GIVEN
2133    and opeand WANTED for instruction template T.  */
2134
2135 static INLINE int
2136 match_mem_size (const insn_template *t, unsigned int wanted,
2137                 unsigned int given)
2138 {
2139   return (match_operand_size (t, wanted, given)
2140           && !((i.types[given].bitfield.unspecified
2141                 && !i.broadcast.type
2142                 && !i.broadcast.bytes
2143                 && !t->operand_types[wanted].bitfield.unspecified)
2144                || (i.types[given].bitfield.fword
2145                    && !t->operand_types[wanted].bitfield.fword)
2146                /* For scalar opcode templates to allow register and memory
2147                   operands at the same time, some special casing is needed
2148                   here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
2149                   down-conversion vpmov*.  */
2150                || ((t->operand_types[wanted].bitfield.class == RegSIMD
2151                     && t->operand_types[wanted].bitfield.byte
2152                        + t->operand_types[wanted].bitfield.word
2153                        + t->operand_types[wanted].bitfield.dword
2154                        + t->operand_types[wanted].bitfield.qword
2155                        > !!t->opcode_modifier.broadcast)
2156                    ? (i.types[given].bitfield.xmmword
2157                       || i.types[given].bitfield.ymmword
2158                       || i.types[given].bitfield.zmmword)
2159                    : !match_simd_size(t, wanted, given))));
2160 }
2161
2162 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
2163    operands for instruction template T, and it has MATCH_REVERSE set if there
2164    is no size conflict on any operands for the template with operands reversed
2165    (and the template allows for reversing in the first place).  */
2166
2167 #define MATCH_STRAIGHT 1
2168 #define MATCH_REVERSE  2
2169
2170 static INLINE unsigned int
2171 operand_size_match (const insn_template *t)
2172 {
2173   unsigned int j, match = MATCH_STRAIGHT;
2174
2175   /* Don't check non-absolute jump instructions.  */
2176   if (t->opcode_modifier.jump
2177       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
2178     return match;
2179
2180   /* Check memory and accumulator operand size.  */
2181   for (j = 0; j < i.operands; j++)
2182     {
2183       if (i.types[j].bitfield.class != Reg
2184           && i.types[j].bitfield.class != RegSIMD
2185           && t->opcode_modifier.operandconstraint == ANY_SIZE)
2186         continue;
2187
2188       if (t->operand_types[j].bitfield.class == Reg
2189           && !match_operand_size (t, j, j))
2190         {
2191           match = 0;
2192           break;
2193         }
2194
2195       if (t->operand_types[j].bitfield.class == RegSIMD
2196           && !match_simd_size (t, j, j))
2197         {
2198           match = 0;
2199           break;
2200         }
2201
2202       if (t->operand_types[j].bitfield.instance == Accum
2203           && (!match_operand_size (t, j, j) || !match_simd_size (t, j, j)))
2204         {
2205           match = 0;
2206           break;
2207         }
2208
2209       if ((i.flags[j] & Operand_Mem) && !match_mem_size (t, j, j))
2210         {
2211           match = 0;
2212           break;
2213         }
2214     }
2215
2216   if (!t->opcode_modifier.d)
2217     return match;
2218
2219   /* Check reverse.  */
2220   gas_assert (i.operands >= 2);
2221
2222   for (j = 0; j < i.operands; j++)
2223     {
2224       unsigned int given = i.operands - j - 1;
2225
2226       /* For FMA4 and XOP insns VEX.W controls just the first two
2227          register operands.  */
2228       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
2229         given = j < 2 ? 1 - j : j;
2230
2231       if (t->operand_types[j].bitfield.class == Reg
2232           && !match_operand_size (t, j, given))
2233         return match;
2234
2235       if (t->operand_types[j].bitfield.class == RegSIMD
2236           && !match_simd_size (t, j, given))
2237         return match;
2238
2239       if (t->operand_types[j].bitfield.instance == Accum
2240           && (!match_operand_size (t, j, given)
2241               || !match_simd_size (t, j, given)))
2242         return match;
2243
2244       if ((i.flags[given] & Operand_Mem) && !match_mem_size (t, j, given))
2245         return match;
2246     }
2247
2248   return match | MATCH_REVERSE;
2249 }
2250
2251 static INLINE int
2252 operand_type_match (i386_operand_type overlap,
2253                     i386_operand_type given)
2254 {
2255   i386_operand_type temp = overlap;
2256
2257   temp.bitfield.unspecified = 0;
2258   temp.bitfield.byte = 0;
2259   temp.bitfield.word = 0;
2260   temp.bitfield.dword = 0;
2261   temp.bitfield.fword = 0;
2262   temp.bitfield.qword = 0;
2263   temp.bitfield.tbyte = 0;
2264   temp.bitfield.xmmword = 0;
2265   temp.bitfield.ymmword = 0;
2266   temp.bitfield.zmmword = 0;
2267   temp.bitfield.tmmword = 0;
2268   if (operand_type_all_zero (&temp))
2269     goto mismatch;
2270
2271   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
2272     return 1;
2273
2274  mismatch:
2275   i.error = operand_type_mismatch;
2276   return 0;
2277 }
2278
2279 /* If given types g0 and g1 are registers they must be of the same type
2280    unless the expected operand type register overlap is null.
2281    Intel syntax sized memory operands are also checked here.  */
2282
2283 static INLINE int
2284 operand_type_register_match (i386_operand_type g0,
2285                              i386_operand_type t0,
2286                              i386_operand_type g1,
2287                              i386_operand_type t1)
2288 {
2289   if (g0.bitfield.class != Reg
2290       && g0.bitfield.class != RegSIMD
2291       && (g0.bitfield.unspecified
2292           || !operand_type_check (g0, anymem)))
2293     return 1;
2294
2295   if (g1.bitfield.class != Reg
2296       && g1.bitfield.class != RegSIMD
2297       && (g1.bitfield.unspecified
2298           || !operand_type_check (g1, anymem)))
2299     return 1;
2300
2301   if (g0.bitfield.byte == g1.bitfield.byte
2302       && g0.bitfield.word == g1.bitfield.word
2303       && g0.bitfield.dword == g1.bitfield.dword
2304       && g0.bitfield.qword == g1.bitfield.qword
2305       && g0.bitfield.xmmword == g1.bitfield.xmmword
2306       && g0.bitfield.ymmword == g1.bitfield.ymmword
2307       && g0.bitfield.zmmword == g1.bitfield.zmmword)
2308     return 1;
2309
2310   /* If expectations overlap in no more than a single size, all is fine. */
2311   g0 = operand_type_and (t0, t1);
2312   if (g0.bitfield.byte
2313       + g0.bitfield.word
2314       + g0.bitfield.dword
2315       + g0.bitfield.qword
2316       + g0.bitfield.xmmword
2317       + g0.bitfield.ymmword
2318       + g0.bitfield.zmmword <= 1)
2319     return 1;
2320
2321   i.error = register_type_mismatch;
2322
2323   return 0;
2324 }
2325
2326 static INLINE unsigned int
2327 register_number (const reg_entry *r)
2328 {
2329   unsigned int nr = r->reg_num;
2330
2331   if (r->reg_flags & RegRex)
2332     nr += 8;
2333
2334   if (r->reg_flags & RegVRex)
2335     nr += 16;
2336
2337   return nr;
2338 }
2339
2340 static INLINE unsigned int
2341 mode_from_disp_size (i386_operand_type t)
2342 {
2343   if (t.bitfield.disp8)
2344     return 1;
2345   else if (t.bitfield.disp16
2346            || t.bitfield.disp32)
2347     return 2;
2348   else
2349     return 0;
2350 }
2351
2352 static INLINE int
2353 fits_in_signed_byte (addressT num)
2354 {
2355   return num + 0x80 <= 0xff;
2356 }
2357
2358 static INLINE int
2359 fits_in_unsigned_byte (addressT num)
2360 {
2361   return num <= 0xff;
2362 }
2363
2364 static INLINE int
2365 fits_in_unsigned_word (addressT num)
2366 {
2367   return num <= 0xffff;
2368 }
2369
2370 static INLINE int
2371 fits_in_signed_word (addressT num)
2372 {
2373   return num + 0x8000 <= 0xffff;
2374 }
2375
2376 static INLINE int
2377 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
2378 {
2379 #ifndef BFD64
2380   return 1;
2381 #else
2382   return num + 0x80000000 <= 0xffffffff;
2383 #endif
2384 }                               /* fits_in_signed_long() */
2385
2386 static INLINE int
2387 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
2388 {
2389 #ifndef BFD64
2390   return 1;
2391 #else
2392   return num <= 0xffffffff;
2393 #endif
2394 }                               /* fits_in_unsigned_long() */
2395
2396 static INLINE valueT extend_to_32bit_address (addressT num)
2397 {
2398 #ifdef BFD64
2399   if (fits_in_unsigned_long(num))
2400     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
2401
2402   if (!fits_in_signed_long (num))
2403     return num & 0xffffffff;
2404 #endif
2405
2406   return num;
2407 }
2408
2409 static INLINE int
2410 fits_in_disp8 (offsetT num)
2411 {
2412   int shift = i.memshift;
2413   unsigned int mask;
2414
2415   if (shift == -1)
2416     abort ();
2417
2418   mask = (1 << shift) - 1;
2419
2420   /* Return 0 if NUM isn't properly aligned.  */
2421   if ((num & mask))
2422     return 0;
2423
2424   /* Check if NUM will fit in 8bit after shift.  */
2425   return fits_in_signed_byte (num >> shift);
2426 }
2427
2428 static INLINE int
2429 fits_in_imm4 (offsetT num)
2430 {
2431   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
2432   return (num & (i.vec_encoding != vex_encoding_evex ? 0xf : 7)) == num;
2433 }
2434
2435 static i386_operand_type
2436 smallest_imm_type (offsetT num)
2437 {
2438   i386_operand_type t;
2439
2440   operand_type_set (&t, 0);
2441   t.bitfield.imm64 = 1;
2442
2443   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
2444     {
2445       /* This code is disabled on the 486 because all the Imm1 forms
2446          in the opcode table are slower on the i486.  They're the
2447          versions with the implicitly specified single-position
2448          displacement, which has another syntax if you really want to
2449          use that form.  */
2450       t.bitfield.imm1 = 1;
2451       t.bitfield.imm8 = 1;
2452       t.bitfield.imm8s = 1;
2453       t.bitfield.imm16 = 1;
2454       t.bitfield.imm32 = 1;
2455       t.bitfield.imm32s = 1;
2456     }
2457   else if (fits_in_signed_byte (num))
2458     {
2459       if (fits_in_unsigned_byte (num))
2460         t.bitfield.imm8 = 1;
2461       t.bitfield.imm8s = 1;
2462       t.bitfield.imm16 = 1;
2463       t.bitfield.imm32 = 1;
2464       t.bitfield.imm32s = 1;
2465     }
2466   else if (fits_in_unsigned_byte (num))
2467     {
2468       t.bitfield.imm8 = 1;
2469       t.bitfield.imm16 = 1;
2470       t.bitfield.imm32 = 1;
2471       t.bitfield.imm32s = 1;
2472     }
2473   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
2474     {
2475       t.bitfield.imm16 = 1;
2476       t.bitfield.imm32 = 1;
2477       t.bitfield.imm32s = 1;
2478     }
2479   else if (fits_in_signed_long (num))
2480     {
2481       t.bitfield.imm32 = 1;
2482       t.bitfield.imm32s = 1;
2483     }
2484   else if (fits_in_unsigned_long (num))
2485     t.bitfield.imm32 = 1;
2486
2487   return t;
2488 }
2489
2490 static offsetT
2491 offset_in_range (offsetT val, int size)
2492 {
2493   addressT mask;
2494
2495   switch (size)
2496     {
2497     case 1: mask = ((addressT) 1 <<  8) - 1; break;
2498     case 2: mask = ((addressT) 1 << 16) - 1; break;
2499 #ifdef BFD64
2500     case 4: mask = ((addressT) 1 << 32) - 1; break;
2501 #endif
2502     case sizeof (val): return val;
2503     default: abort ();
2504     }
2505
2506   if ((val & ~mask) != 0 && (-val & ~mask) != 0)
2507     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
2508              (uint64_t) val, (uint64_t) (val & mask));
2509
2510   return val & mask;
2511 }
2512
2513 static INLINE const char *insn_name (const insn_template *t)
2514 {
2515   return &i386_mnemonics[t->mnem_off];
2516 }
2517
2518 enum PREFIX_GROUP
2519 {
2520   PREFIX_EXIST = 0,
2521   PREFIX_LOCK,
2522   PREFIX_REP,
2523   PREFIX_DS,
2524   PREFIX_OTHER
2525 };
2526
2527 /* Returns
2528    a. PREFIX_EXIST if attempting to add a prefix where one from the
2529    same class already exists.
2530    b. PREFIX_LOCK if lock prefix is added.
2531    c. PREFIX_REP if rep/repne prefix is added.
2532    d. PREFIX_DS if ds prefix is added.
2533    e. PREFIX_OTHER if other prefix is added.
2534  */
2535
2536 static enum PREFIX_GROUP
2537 add_prefix (unsigned int prefix)
2538 {
2539   enum PREFIX_GROUP ret = PREFIX_OTHER;
2540   unsigned int q;
2541
2542   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
2543       && flag_code == CODE_64BIT)
2544     {
2545       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
2546           || (i.prefix[REX_PREFIX] & prefix & REX_R)
2547           || (i.prefix[REX_PREFIX] & prefix & REX_X)
2548           || (i.prefix[REX_PREFIX] & prefix & REX_B))
2549         ret = PREFIX_EXIST;
2550       q = REX_PREFIX;
2551     }
2552   else
2553     {
2554       switch (prefix)
2555         {
2556         default:
2557           abort ();
2558
2559         case DS_PREFIX_OPCODE:
2560           ret = PREFIX_DS;
2561           /* Fall through.  */
2562         case CS_PREFIX_OPCODE:
2563         case ES_PREFIX_OPCODE:
2564         case FS_PREFIX_OPCODE:
2565         case GS_PREFIX_OPCODE:
2566         case SS_PREFIX_OPCODE:
2567           q = SEG_PREFIX;
2568           break;
2569
2570         case REPNE_PREFIX_OPCODE:
2571         case REPE_PREFIX_OPCODE:
2572           q = REP_PREFIX;
2573           ret = PREFIX_REP;
2574           break;
2575
2576         case LOCK_PREFIX_OPCODE:
2577           q = LOCK_PREFIX;
2578           ret = PREFIX_LOCK;
2579           break;
2580
2581         case FWAIT_OPCODE:
2582           q = WAIT_PREFIX;
2583           break;
2584
2585         case ADDR_PREFIX_OPCODE:
2586           q = ADDR_PREFIX;
2587           break;
2588
2589         case DATA_PREFIX_OPCODE:
2590           q = DATA_PREFIX;
2591           break;
2592         }
2593       if (i.prefix[q] != 0)
2594         ret = PREFIX_EXIST;
2595     }
2596
2597   if (ret)
2598     {
2599       if (!i.prefix[q])
2600         ++i.prefixes;
2601       i.prefix[q] |= prefix;
2602     }
2603   else
2604     as_bad (_("same type of prefix used twice"));
2605
2606   return ret;
2607 }
2608
2609 static void
2610 update_code_flag (int value, int check)
2611 {
2612   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
2613
2614   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpulm )
2615     {
2616       as_error (_("64bit mode not supported on `%s'."),
2617                 cpu_arch_name ? cpu_arch_name : default_arch);
2618       return;
2619     }
2620
2621   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
2622     {
2623       as_error (_("32bit mode not supported on `%s'."),
2624                 cpu_arch_name ? cpu_arch_name : default_arch);
2625       return;
2626     }
2627
2628   flag_code = (enum flag_code) value;
2629   if (flag_code == CODE_64BIT)
2630     {
2631       cpu_arch_flags.bitfield.cpu64 = 1;
2632       cpu_arch_flags.bitfield.cpuno64 = 0;
2633     }
2634   else
2635     {
2636       cpu_arch_flags.bitfield.cpu64 = 0;
2637       cpu_arch_flags.bitfield.cpuno64 = 1;
2638     }
2639
2640   stackop_size = '\0';
2641 }
2642
2643 static void
2644 set_code_flag (int value)
2645 {
2646   update_code_flag (value, 0);
2647 }
2648
2649 static void
2650 set_16bit_gcc_code_flag (int new_code_flag)
2651 {
2652   flag_code = (enum flag_code) new_code_flag;
2653   if (flag_code != CODE_16BIT)
2654     abort ();
2655   cpu_arch_flags.bitfield.cpu64 = 0;
2656   cpu_arch_flags.bitfield.cpuno64 = 1;
2657   stackop_size = LONG_MNEM_SUFFIX;
2658 }
2659
2660 static void
2661 set_intel_syntax (int syntax_flag)
2662 {
2663   /* Find out if register prefixing is specified.  */
2664   int ask_naked_reg = 0;
2665
2666   SKIP_WHITESPACE ();
2667   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2668     {
2669       char *string;
2670       int e = get_symbol_name (&string);
2671
2672       if (strcmp (string, "prefix") == 0)
2673         ask_naked_reg = 1;
2674       else if (strcmp (string, "noprefix") == 0)
2675         ask_naked_reg = -1;
2676       else
2677         as_bad (_("bad argument to syntax directive."));
2678       (void) restore_line_pointer (e);
2679     }
2680   demand_empty_rest_of_line ();
2681
2682   intel_syntax = syntax_flag;
2683
2684   if (ask_naked_reg == 0)
2685     allow_naked_reg = (intel_syntax
2686                        && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
2687   else
2688     allow_naked_reg = (ask_naked_reg < 0);
2689
2690   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
2691
2692   register_prefix = allow_naked_reg ? "" : "%";
2693 }
2694
2695 static void
2696 set_intel_mnemonic (int mnemonic_flag)
2697 {
2698   intel_mnemonic = mnemonic_flag;
2699 }
2700
2701 static void
2702 set_allow_index_reg (int flag)
2703 {
2704   allow_index_reg = flag;
2705 }
2706
2707 static void
2708 set_check (int what)
2709 {
2710   enum check_kind *kind;
2711   const char *str;
2712
2713   if (what)
2714     {
2715       kind = &operand_check;
2716       str = "operand";
2717     }
2718   else
2719     {
2720       kind = &sse_check;
2721       str = "sse";
2722     }
2723
2724   SKIP_WHITESPACE ();
2725
2726   if (!is_end_of_line[(unsigned char) *input_line_pointer])
2727     {
2728       char *string;
2729       int e = get_symbol_name (&string);
2730
2731       if (strcmp (string, "none") == 0)
2732         *kind = check_none;
2733       else if (strcmp (string, "warning") == 0)
2734         *kind = check_warning;
2735       else if (strcmp (string, "error") == 0)
2736         *kind = check_error;
2737       else
2738         as_bad (_("bad argument to %s_check directive."), str);
2739       (void) restore_line_pointer (e);
2740     }
2741   else
2742     as_bad (_("missing argument for %s_check directive"), str);
2743
2744   demand_empty_rest_of_line ();
2745 }
2746
2747 static void
2748 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
2749                            i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
2750 {
2751 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
2752   static const char *arch;
2753
2754   /* Intel MCU is only supported on ELF.  */
2755   if (!IS_ELF)
2756     return;
2757
2758   if (!arch)
2759     {
2760       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
2761          use default_arch.  */
2762       arch = cpu_arch_name;
2763       if (!arch)
2764         arch = default_arch;
2765     }
2766
2767   /* If we are targeting Intel MCU, we must enable it.  */
2768   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
2769       == new_flag.bitfield.cpuiamcu)
2770     return;
2771
2772   as_bad (_("`%s' is not supported on `%s'"), name, arch);
2773 #endif
2774 }
2775
2776 static void
2777 extend_cpu_sub_arch_name (const char *name)
2778 {
2779   if (cpu_sub_arch_name)
2780     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
2781                                   ".", name, (const char *) NULL);
2782   else
2783     cpu_sub_arch_name = concat (".", name, (const char *) NULL);
2784 }
2785
2786 static void
2787 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
2788 {
2789   typedef struct arch_stack_entry
2790   {
2791     const struct arch_stack_entry *prev;
2792     const char *name;
2793     char *sub_name;
2794     i386_cpu_flags flags;
2795     i386_cpu_flags isa_flags;
2796     enum processor_type isa;
2797     enum flag_code flag_code;
2798     unsigned int vector_size;
2799     char stackop_size;
2800     bool no_cond_jump_promotion;
2801   } arch_stack_entry;
2802   static const arch_stack_entry *arch_stack_top;
2803   char *s;
2804   int e;
2805   const char *string;
2806   unsigned int j = 0;
2807   i386_cpu_flags flags;
2808
2809   SKIP_WHITESPACE ();
2810
2811   if (is_end_of_line[(unsigned char) *input_line_pointer])
2812     {
2813       as_bad (_("missing cpu architecture"));
2814       input_line_pointer++;
2815       return;
2816     }
2817
2818   e = get_symbol_name (&s);
2819   string = s;
2820
2821   if (strcmp (string, "push") == 0)
2822     {
2823       arch_stack_entry *top = XNEW (arch_stack_entry);
2824
2825       top->name = cpu_arch_name;
2826       if (cpu_sub_arch_name)
2827         top->sub_name = xstrdup (cpu_sub_arch_name);
2828       else
2829         top->sub_name = NULL;
2830       top->flags = cpu_arch_flags;
2831       top->isa = cpu_arch_isa;
2832       top->isa_flags = cpu_arch_isa_flags;
2833       top->flag_code = flag_code;
2834       top->vector_size = vector_size;
2835       top->stackop_size = stackop_size;
2836       top->no_cond_jump_promotion = no_cond_jump_promotion;
2837
2838       top->prev = arch_stack_top;
2839       arch_stack_top = top;
2840
2841       (void) restore_line_pointer (e);
2842       demand_empty_rest_of_line ();
2843       return;
2844     }
2845
2846   if (strcmp (string, "pop") == 0)
2847     {
2848       const arch_stack_entry *top = arch_stack_top;
2849
2850       if (!top)
2851         as_bad (_(".arch stack is empty"));
2852       else if (top->flag_code != flag_code
2853                || top->stackop_size != stackop_size)
2854         {
2855           static const unsigned int bits[] = {
2856             [CODE_16BIT] = 16,
2857             [CODE_32BIT] = 32,
2858             [CODE_64BIT] = 64,
2859           };
2860
2861           as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
2862                   bits[top->flag_code],
2863                   top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
2864         }
2865       else
2866         {
2867           arch_stack_top = top->prev;
2868
2869           cpu_arch_name = top->name;
2870           free (cpu_sub_arch_name);
2871           cpu_sub_arch_name = top->sub_name;
2872           cpu_arch_flags = top->flags;
2873           cpu_arch_isa = top->isa;
2874           cpu_arch_isa_flags = top->isa_flags;
2875           vector_size = top->vector_size;
2876           no_cond_jump_promotion = top->no_cond_jump_promotion;
2877
2878           XDELETE (top);
2879         }
2880
2881       (void) restore_line_pointer (e);
2882       demand_empty_rest_of_line ();
2883       return;
2884     }
2885
2886   if (strcmp (string, "default") == 0)
2887     {
2888       if (strcmp (default_arch, "iamcu") == 0)
2889         string = default_arch;
2890       else
2891         {
2892           static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
2893
2894           cpu_arch_name = NULL;
2895           free (cpu_sub_arch_name);
2896           cpu_sub_arch_name = NULL;
2897           cpu_arch_flags = cpu_unknown_flags;
2898           if (flag_code == CODE_64BIT)
2899             {
2900               cpu_arch_flags.bitfield.cpu64 = 1;
2901               cpu_arch_flags.bitfield.cpuno64 = 0;
2902             }
2903           else
2904             {
2905               cpu_arch_flags.bitfield.cpu64 = 0;
2906               cpu_arch_flags.bitfield.cpuno64 = 1;
2907             }
2908           cpu_arch_isa = PROCESSOR_UNKNOWN;
2909           cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
2910           if (!cpu_arch_tune_set)
2911             {
2912               cpu_arch_tune = cpu_arch_isa;
2913               cpu_arch_tune_flags = cpu_arch_isa_flags;
2914             }
2915
2916           vector_size = VSZ_DEFAULT;
2917
2918           j = ARRAY_SIZE (cpu_arch) + 1;
2919         }
2920     }
2921
2922   for (; j < ARRAY_SIZE (cpu_arch); j++)
2923     {
2924       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
2925           && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
2926         {
2927           if (*string != '.')
2928             {
2929               check_cpu_arch_compatible (string, cpu_arch[j].enable);
2930
2931               if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpulm )
2932                 {
2933                   as_bad (_("64bit mode not supported on `%s'."),
2934                           cpu_arch[j].name);
2935                   (void) restore_line_pointer (e);
2936                   ignore_rest_of_line ();
2937                   return;
2938                 }
2939
2940               if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
2941                 {
2942                   as_bad (_("32bit mode not supported on `%s'."),
2943                           cpu_arch[j].name);
2944                   (void) restore_line_pointer (e);
2945                   ignore_rest_of_line ();
2946                   return;
2947                 }
2948
2949               cpu_arch_name = cpu_arch[j].name;
2950               free (cpu_sub_arch_name);
2951               cpu_sub_arch_name = NULL;
2952               cpu_arch_flags = cpu_arch[j].enable;
2953               if (flag_code == CODE_64BIT)
2954                 {
2955                   cpu_arch_flags.bitfield.cpu64 = 1;
2956                   cpu_arch_flags.bitfield.cpuno64 = 0;
2957                 }
2958               else
2959                 {
2960                   cpu_arch_flags.bitfield.cpu64 = 0;
2961                   cpu_arch_flags.bitfield.cpuno64 = 1;
2962                 }
2963               cpu_arch_isa = cpu_arch[j].type;
2964               cpu_arch_isa_flags = cpu_arch[j].enable;
2965               if (!cpu_arch_tune_set)
2966                 {
2967                   cpu_arch_tune = cpu_arch_isa;
2968                   cpu_arch_tune_flags = cpu_arch_isa_flags;
2969                 }
2970
2971               vector_size = VSZ_DEFAULT;
2972
2973               pre_386_16bit_warned = false;
2974               break;
2975             }
2976
2977           if (cpu_flags_all_zero (&cpu_arch[j].enable))
2978             continue;
2979
2980           flags = cpu_flags_or (cpu_arch_flags, cpu_arch[j].enable);
2981
2982           if (!cpu_flags_equal (&flags, &cpu_arch_flags))
2983             {
2984               extend_cpu_sub_arch_name (string + 1);
2985               cpu_arch_flags = flags;
2986               cpu_arch_isa_flags = flags;
2987             }
2988           else
2989             cpu_arch_isa_flags
2990               = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[j].enable);
2991
2992           (void) restore_line_pointer (e);
2993
2994           switch (cpu_arch[j].vsz)
2995             {
2996             default:
2997               break;
2998
2999             case vsz_set:
3000 #ifdef SVR4_COMMENT_CHARS
3001               if (*input_line_pointer == ':' || *input_line_pointer == '/')
3002 #else
3003               if (*input_line_pointer == '/')
3004 #endif
3005                 {
3006                   ++input_line_pointer;
3007                   switch (get_absolute_expression ())
3008                     {
3009                     case 512: vector_size = VSZ512; break;
3010                     case 256: vector_size = VSZ256; break;
3011                     case 128: vector_size = VSZ128; break;
3012                     default:
3013                       as_bad (_("Unrecognized vector size specifier"));
3014                       ignore_rest_of_line ();
3015                       return;
3016                     }
3017                   break;
3018                 }
3019                 /* Fall through.  */
3020             case vsz_reset:
3021               vector_size = VSZ_DEFAULT;
3022               break;
3023             }
3024
3025           demand_empty_rest_of_line ();
3026           return;
3027         }
3028     }
3029
3030   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
3031     {
3032       /* Disable an ISA extension.  */
3033       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
3034         if (cpu_arch[j].type == PROCESSOR_NONE
3035             && strcmp (string + 3, cpu_arch[j].name) == 0)
3036           {
3037             flags = cpu_flags_and_not (cpu_arch_flags, cpu_arch[j].disable);
3038             if (!cpu_flags_equal (&flags, &cpu_arch_flags))
3039               {
3040                 extend_cpu_sub_arch_name (string + 1);
3041                 cpu_arch_flags = flags;
3042                 cpu_arch_isa_flags = flags;
3043               }
3044
3045             if (cpu_arch[j].vsz == vsz_set)
3046               vector_size = VSZ_DEFAULT;
3047
3048             (void) restore_line_pointer (e);
3049             demand_empty_rest_of_line ();
3050             return;
3051           }
3052     }
3053
3054   if (j == ARRAY_SIZE (cpu_arch))
3055     as_bad (_("no such architecture: `%s'"), string);
3056
3057   *input_line_pointer = e;
3058
3059   no_cond_jump_promotion = 0;
3060   if (*input_line_pointer == ','
3061       && !is_end_of_line[(unsigned char) input_line_pointer[1]])
3062     {
3063       ++input_line_pointer;
3064       e = get_symbol_name (&s);
3065       string = s;
3066
3067       if (strcmp (string, "nojumps") == 0)
3068         no_cond_jump_promotion = 1;
3069       else if (strcmp (string, "jumps") == 0)
3070         ;
3071       else
3072         as_bad (_("no such architecture modifier: `%s'"), string);
3073
3074       (void) restore_line_pointer (e);
3075     }
3076
3077   demand_empty_rest_of_line ();
3078 }
3079
3080 enum bfd_architecture
3081 i386_arch (void)
3082 {
3083   if (cpu_arch_isa == PROCESSOR_IAMCU)
3084     {
3085       if (OUTPUT_FLAVOR != bfd_target_elf_flavour
3086           || flag_code == CODE_64BIT)
3087         as_fatal (_("Intel MCU is 32bit ELF only"));
3088       return bfd_arch_iamcu;
3089     }
3090   else
3091     return bfd_arch_i386;
3092 }
3093
3094 unsigned long
3095 i386_mach (void)
3096 {
3097   if (startswith (default_arch, "x86_64"))
3098     {
3099       if (default_arch[6] == '\0')
3100         return bfd_mach_x86_64;
3101       else
3102         return bfd_mach_x64_32;
3103     }
3104   else if (!strcmp (default_arch, "i386")
3105            || !strcmp (default_arch, "iamcu"))
3106     {
3107       if (cpu_arch_isa == PROCESSOR_IAMCU)
3108         {
3109           if (OUTPUT_FLAVOR != bfd_target_elf_flavour)
3110             as_fatal (_("Intel MCU is 32bit ELF only"));
3111           return bfd_mach_i386_iamcu;
3112         }
3113       else
3114         return bfd_mach_i386_i386;
3115     }
3116   else
3117     as_fatal (_("unknown architecture"));
3118 }
3119 \f
3120 #include "opcodes/i386-tbl.h"
3121
3122 void
3123 md_begin (void)
3124 {
3125   /* Support pseudo prefixes like {disp32}.  */
3126   lex_type ['{'] = LEX_BEGIN_NAME;
3127
3128   /* Initialize op_hash hash table.  */
3129   op_hash = str_htab_create ();
3130
3131   {
3132     const insn_template *const *sets = i386_op_sets;
3133     const insn_template *const *end = sets + ARRAY_SIZE (i386_op_sets) - 1;
3134
3135     /* Type checks to compensate for the conversion through void * which
3136        occurs during hash table insertion / lookup.  */
3137     (void) sizeof (sets == &current_templates->start);
3138     (void) sizeof (end == &current_templates->end);
3139     for (; sets < end; ++sets)
3140       if (str_hash_insert (op_hash, insn_name (*sets), sets, 0))
3141         as_fatal (_("duplicate %s"), insn_name (*sets));
3142   }
3143
3144   /* Initialize reg_hash hash table.  */
3145   reg_hash = str_htab_create ();
3146   {
3147     const reg_entry *regtab;
3148     unsigned int regtab_size = i386_regtab_size;
3149
3150     for (regtab = i386_regtab; regtab_size--; regtab++)
3151       {
3152         switch (regtab->reg_type.bitfield.class)
3153           {
3154           case Reg:
3155             if (regtab->reg_type.bitfield.dword)
3156               {
3157                 if (regtab->reg_type.bitfield.instance == Accum)
3158                   reg_eax = regtab;
3159               }
3160             else if (regtab->reg_type.bitfield.tbyte)
3161               {
3162                 /* There's no point inserting st(<N>) in the hash table, as
3163                    parentheses aren't included in register_chars[] anyway.  */
3164                 if (regtab->reg_type.bitfield.instance != Accum)
3165                   continue;
3166                 reg_st0 = regtab;
3167               }
3168             break;
3169
3170           case SReg:
3171             switch (regtab->reg_num)
3172               {
3173               case 0: reg_es = regtab; break;
3174               case 2: reg_ss = regtab; break;
3175               case 3: reg_ds = regtab; break;
3176               }
3177             break;
3178
3179           case RegMask:
3180             if (!regtab->reg_num)
3181               reg_k0 = regtab;
3182             break;
3183           }
3184
3185         if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
3186           as_fatal (_("duplicate %s"), regtab->reg_name);
3187       }
3188   }
3189
3190   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
3191   {
3192     int c;
3193     const char *p;
3194
3195     for (c = 0; c < 256; c++)
3196       {
3197         if (ISDIGIT (c) || ISLOWER (c))
3198           {
3199             mnemonic_chars[c] = c;
3200             register_chars[c] = c;
3201             operand_chars[c] = c;
3202           }
3203         else if (ISUPPER (c))
3204           {
3205             mnemonic_chars[c] = TOLOWER (c);
3206             register_chars[c] = mnemonic_chars[c];
3207             operand_chars[c] = c;
3208           }
3209 #ifdef SVR4_COMMENT_CHARS
3210         else if (c == '\\' && strchr (i386_comment_chars, '/'))
3211           operand_chars[c] = c;
3212 #endif
3213
3214         if (c >= 128)
3215           operand_chars[c] = c;
3216       }
3217
3218     mnemonic_chars['_'] = '_';
3219     mnemonic_chars['-'] = '-';
3220     mnemonic_chars['.'] = '.';
3221
3222     for (p = extra_symbol_chars; *p != '\0'; p++)
3223       operand_chars[(unsigned char) *p] = *p;
3224     for (p = operand_special_chars; *p != '\0'; p++)
3225       operand_chars[(unsigned char) *p] = *p;
3226   }
3227
3228   if (flag_code == CODE_64BIT)
3229     {
3230 #if defined (OBJ_COFF) && defined (TE_PE)
3231       x86_dwarf2_return_column = (OUTPUT_FLAVOR == bfd_target_coff_flavour
3232                                   ? 32 : 16);
3233 #else
3234       x86_dwarf2_return_column = 16;
3235 #endif
3236       x86_cie_data_alignment = -8;
3237 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3238       x86_sframe_cfa_sp_reg = 7;
3239       x86_sframe_cfa_fp_reg = 6;
3240 #endif
3241     }
3242   else
3243     {
3244       x86_dwarf2_return_column = 8;
3245       x86_cie_data_alignment = -4;
3246     }
3247
3248   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
3249      can be turned into BRANCH_PREFIX frag.  */
3250   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
3251     abort ();
3252 }
3253
3254 void
3255 i386_print_statistics (FILE *file)
3256 {
3257   htab_print_statistics (file, "i386 opcode", op_hash);
3258   htab_print_statistics (file, "i386 register", reg_hash);
3259 }
3260
3261 void
3262 i386_md_end (void)
3263 {
3264   htab_delete (op_hash);
3265   htab_delete (reg_hash);
3266 }
3267 \f
3268 #ifdef DEBUG386
3269
3270 /* Debugging routines for md_assemble.  */
3271 static void pte (insn_template *);
3272 static void pt (i386_operand_type);
3273 static void pe (expressionS *);
3274 static void ps (symbolS *);
3275
3276 static void
3277 pi (const char *line, i386_insn *x)
3278 {
3279   unsigned int j;
3280
3281   fprintf (stdout, "%s: template ", line);
3282   pte (&x->tm);
3283   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
3284            x->base_reg ? x->base_reg->reg_name : "none",
3285            x->index_reg ? x->index_reg->reg_name : "none",
3286            x->log2_scale_factor);
3287   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
3288            x->rm.mode, x->rm.reg, x->rm.regmem);
3289   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
3290            x->sib.base, x->sib.index, x->sib.scale);
3291   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
3292            (x->rex & REX_W) != 0,
3293            (x->rex & REX_R) != 0,
3294            (x->rex & REX_X) != 0,
3295            (x->rex & REX_B) != 0);
3296   for (j = 0; j < x->operands; j++)
3297     {
3298       fprintf (stdout, "    #%d:  ", j + 1);
3299       pt (x->types[j]);
3300       fprintf (stdout, "\n");
3301       if (x->types[j].bitfield.class == Reg
3302           || x->types[j].bitfield.class == RegMMX
3303           || x->types[j].bitfield.class == RegSIMD
3304           || x->types[j].bitfield.class == RegMask
3305           || x->types[j].bitfield.class == SReg
3306           || x->types[j].bitfield.class == RegCR
3307           || x->types[j].bitfield.class == RegDR
3308           || x->types[j].bitfield.class == RegTR
3309           || x->types[j].bitfield.class == RegBND)
3310         fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
3311       if (operand_type_check (x->types[j], imm))
3312         pe (x->op[j].imms);
3313       if (operand_type_check (x->types[j], disp))
3314         pe (x->op[j].disps);
3315     }
3316 }
3317
3318 static void
3319 pte (insn_template *t)
3320 {
3321   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
3322   static const char *const opc_spc[] = {
3323     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
3324     "XOP08", "XOP09", "XOP0A",
3325   };
3326   unsigned int j;
3327
3328   fprintf (stdout, " %d operands ", t->operands);
3329   if (opc_pfx[t->opcode_modifier.opcodeprefix])
3330     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
3331   if (opc_spc[t->opcode_space])
3332     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
3333   fprintf (stdout, "opcode %x ", t->base_opcode);
3334   if (t->extension_opcode != None)
3335     fprintf (stdout, "ext %x ", t->extension_opcode);
3336   if (t->opcode_modifier.d)
3337     fprintf (stdout, "D");
3338   if (t->opcode_modifier.w)
3339     fprintf (stdout, "W");
3340   fprintf (stdout, "\n");
3341   for (j = 0; j < t->operands; j++)
3342     {
3343       fprintf (stdout, "    #%d type ", j + 1);
3344       pt (t->operand_types[j]);
3345       fprintf (stdout, "\n");
3346     }
3347 }
3348
3349 static void
3350 pe (expressionS *e)
3351 {
3352   fprintf (stdout, "    operation     %d\n", e->X_op);
3353   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
3354            (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
3355   if (e->X_add_symbol)
3356     {
3357       fprintf (stdout, "    add_symbol    ");
3358       ps (e->X_add_symbol);
3359       fprintf (stdout, "\n");
3360     }
3361   if (e->X_op_symbol)
3362     {
3363       fprintf (stdout, "    op_symbol    ");
3364       ps (e->X_op_symbol);
3365       fprintf (stdout, "\n");
3366     }
3367 }
3368
3369 static void
3370 ps (symbolS *s)
3371 {
3372   fprintf (stdout, "%s type %s%s",
3373            S_GET_NAME (s),
3374            S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
3375            segment_name (S_GET_SEGMENT (s)));
3376 }
3377
3378 static struct type_name
3379   {
3380     i386_operand_type mask;
3381     const char *name;
3382   }
3383 const type_names[] =
3384 {
3385   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
3386   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
3387   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
3388   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
3389   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
3390   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
3391   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
3392   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
3393   { { .bitfield = { .imm8 = 1 } }, "i8" },
3394   { { .bitfield = { .imm8s = 1 } }, "i8s" },
3395   { { .bitfield = { .imm16 = 1 } }, "i16" },
3396   { { .bitfield = { .imm32 = 1 } }, "i32" },
3397   { { .bitfield = { .imm32s = 1 } }, "i32s" },
3398   { { .bitfield = { .imm64 = 1 } }, "i64" },
3399   { { .bitfield = { .imm1 = 1 } }, "i1" },
3400   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
3401   { { .bitfield = { .disp8 = 1 } }, "d8" },
3402   { { .bitfield = { .disp16 = 1 } }, "d16" },
3403   { { .bitfield = { .disp32 = 1 } }, "d32" },
3404   { { .bitfield = { .disp64 = 1 } }, "d64" },
3405   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
3406   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
3407   { { .bitfield = { .class = RegCR } }, "control reg" },
3408   { { .bitfield = { .class = RegTR } }, "test reg" },
3409   { { .bitfield = { .class = RegDR } }, "debug reg" },
3410   { { .bitfield = { .class = Reg, .tbyte = 1 } }, "FReg" },
3411   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
3412   { { .bitfield = { .class = SReg } }, "SReg" },
3413   { { .bitfield = { .class = RegMMX } }, "rMMX" },
3414   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
3415   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
3416   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
3417   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
3418   { { .bitfield = { .class = RegMask } }, "Mask reg" },
3419 };
3420
3421 static void
3422 pt (i386_operand_type t)
3423 {
3424   unsigned int j;
3425   i386_operand_type a;
3426
3427   for (j = 0; j < ARRAY_SIZE (type_names); j++)
3428     {
3429       a = operand_type_and (t, type_names[j].mask);
3430       if (operand_type_equal (&a, &type_names[j].mask))
3431         fprintf (stdout, "%s, ",  type_names[j].name);
3432     }
3433   fflush (stdout);
3434 }
3435
3436 #endif /* DEBUG386 */
3437 \f
3438 static bfd_reloc_code_real_type
3439 reloc (unsigned int size,
3440        int pcrel,
3441        int sign,
3442        bfd_reloc_code_real_type other)
3443 {
3444   if (other != NO_RELOC)
3445     {
3446       reloc_howto_type *rel;
3447
3448       if (size == 8)
3449         switch (other)
3450           {
3451           case BFD_RELOC_X86_64_GOT32:
3452             return BFD_RELOC_X86_64_GOT64;
3453             break;
3454           case BFD_RELOC_X86_64_GOTPLT64:
3455             return BFD_RELOC_X86_64_GOTPLT64;
3456             break;
3457           case BFD_RELOC_X86_64_PLTOFF64:
3458             return BFD_RELOC_X86_64_PLTOFF64;
3459             break;
3460           case BFD_RELOC_X86_64_GOTPC32:
3461             other = BFD_RELOC_X86_64_GOTPC64;
3462             break;
3463           case BFD_RELOC_X86_64_GOTPCREL:
3464             other = BFD_RELOC_X86_64_GOTPCREL64;
3465             break;
3466           case BFD_RELOC_X86_64_TPOFF32:
3467             other = BFD_RELOC_X86_64_TPOFF64;
3468             break;
3469           case BFD_RELOC_X86_64_DTPOFF32:
3470             other = BFD_RELOC_X86_64_DTPOFF64;
3471             break;
3472           default:
3473             break;
3474           }
3475
3476 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3477       if (other == BFD_RELOC_SIZE32)
3478         {
3479           if (size == 8)
3480             other = BFD_RELOC_SIZE64;
3481           if (pcrel)
3482             {
3483               as_bad (_("there are no pc-relative size relocations"));
3484               return NO_RELOC;
3485             }
3486         }
3487 #endif
3488
3489       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
3490       if (size == 4 && (flag_code != CODE_64BIT || disallow_64bit_reloc))
3491         sign = -1;
3492
3493       rel = bfd_reloc_type_lookup (stdoutput, other);
3494       if (!rel)
3495         as_bad (_("unknown relocation (%u)"), other);
3496       else if (size != bfd_get_reloc_size (rel))
3497         as_bad (_("%u-byte relocation cannot be applied to %u-byte field"),
3498                 bfd_get_reloc_size (rel),
3499                 size);
3500       else if (pcrel && !rel->pc_relative)
3501         as_bad (_("non-pc-relative relocation for pc-relative field"));
3502       else if ((rel->complain_on_overflow == complain_overflow_signed
3503                 && !sign)
3504                || (rel->complain_on_overflow == complain_overflow_unsigned
3505                    && sign > 0))
3506         as_bad (_("relocated field and relocation type differ in signedness"));
3507       else
3508         return other;
3509       return NO_RELOC;
3510     }
3511
3512   if (pcrel)
3513     {
3514       if (!sign)
3515         as_bad (_("there are no unsigned pc-relative relocations"));
3516       switch (size)
3517         {
3518         case 1: return BFD_RELOC_8_PCREL;
3519         case 2: return BFD_RELOC_16_PCREL;
3520         case 4: return BFD_RELOC_32_PCREL;
3521         case 8: return BFD_RELOC_64_PCREL;
3522         }
3523       as_bad (_("cannot do %u byte pc-relative relocation"), size);
3524     }
3525   else
3526     {
3527       if (sign > 0)
3528         switch (size)
3529           {
3530           case 4: return BFD_RELOC_X86_64_32S;
3531           }
3532       else
3533         switch (size)
3534           {
3535           case 1: return BFD_RELOC_8;
3536           case 2: return BFD_RELOC_16;
3537           case 4: return BFD_RELOC_32;
3538           case 8: return BFD_RELOC_64;
3539           }
3540       as_bad (_("cannot do %s %u byte relocation"),
3541               sign > 0 ? "signed" : "unsigned", size);
3542     }
3543
3544   return NO_RELOC;
3545 }
3546
3547 /* Here we decide which fixups can be adjusted to make them relative to
3548    the beginning of the section instead of the symbol.  Basically we need
3549    to make sure that the dynamic relocations are done correctly, so in
3550    some cases we force the original symbol to be used.  */
3551
3552 int
3553 tc_i386_fix_adjustable (fixS *fixP ATTRIBUTE_UNUSED)
3554 {
3555 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
3556   if (!IS_ELF)
3557     return 1;
3558
3559   /* Don't adjust pc-relative references to merge sections in 64-bit
3560      mode.  */
3561   if (use_rela_relocations
3562       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
3563       && fixP->fx_pcrel)
3564     return 0;
3565
3566   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
3567      and changed later by validate_fix.  */
3568   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
3569       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
3570     return 0;
3571
3572   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
3573      for size relocations.  */
3574   if (fixP->fx_r_type == BFD_RELOC_SIZE32
3575       || fixP->fx_r_type == BFD_RELOC_SIZE64
3576       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
3577       || fixP->fx_r_type == BFD_RELOC_386_GOT32
3578       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
3579       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
3580       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
3581       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
3582       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
3583       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
3584       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
3585       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
3586       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
3587       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
3588       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
3589       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
3590       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
3591       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
3592       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
3593       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
3594       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
3595       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
3596       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
3597       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
3598       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
3599       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
3600       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
3601       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
3602       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
3603       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
3604       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
3605     return 0;
3606 #endif
3607   return 1;
3608 }
3609
3610 static INLINE bool
3611 want_disp32 (const insn_template *t)
3612 {
3613   return flag_code != CODE_64BIT
3614          || i.prefix[ADDR_PREFIX]
3615          || (t->mnem_off == MN_lea
3616              && (!i.types[1].bitfield.qword
3617                 || t->opcode_modifier.size == SIZE32));
3618 }
3619
3620 static int
3621 intel_float_operand (const char *mnemonic)
3622 {
3623   /* Note that the value returned is meaningful only for opcodes with (memory)
3624      operands, hence the code here is free to improperly handle opcodes that
3625      have no operands (for better performance and smaller code). */
3626
3627   if (mnemonic[0] != 'f')
3628     return 0; /* non-math */
3629
3630   switch (mnemonic[1])
3631     {
3632     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
3633        the fs segment override prefix not currently handled because no
3634        call path can make opcodes without operands get here */
3635     case 'i':
3636       return 2 /* integer op */;
3637     case 'l':
3638       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
3639         return 3; /* fldcw/fldenv */
3640       break;
3641     case 'n':
3642       if (mnemonic[2] != 'o' /* fnop */)
3643         return 3; /* non-waiting control op */
3644       break;
3645     case 'r':
3646       if (mnemonic[2] == 's')
3647         return 3; /* frstor/frstpm */
3648       break;
3649     case 's':
3650       if (mnemonic[2] == 'a')
3651         return 3; /* fsave */
3652       if (mnemonic[2] == 't')
3653         {
3654           switch (mnemonic[3])
3655             {
3656             case 'c': /* fstcw */
3657             case 'd': /* fstdw */
3658             case 'e': /* fstenv */
3659             case 's': /* fsts[gw] */
3660               return 3;
3661             }
3662         }
3663       break;
3664     case 'x':
3665       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
3666         return 0; /* fxsave/fxrstor are not really math ops */
3667       break;
3668     }
3669
3670   return 1;
3671 }
3672
3673 static INLINE void
3674 install_template (const insn_template *t)
3675 {
3676   unsigned int l;
3677
3678   i.tm = *t;
3679
3680   /* Note that for pseudo prefixes this produces a length of 1. But for them
3681      the length isn't interesting at all.  */
3682   for (l = 1; l < 4; ++l)
3683     if (!(t->base_opcode >> (8 * l)))
3684       break;
3685
3686   i.opcode_length = l;
3687 }
3688
3689 /* Build the VEX prefix.  */
3690
3691 static void
3692 build_vex_prefix (const insn_template *t)
3693 {
3694   unsigned int register_specifier;
3695   unsigned int vector_length;
3696   unsigned int w;
3697
3698   /* Check register specifier.  */
3699   if (i.vex.register_specifier)
3700     {
3701       register_specifier =
3702         ~register_number (i.vex.register_specifier) & 0xf;
3703       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
3704     }
3705   else
3706     register_specifier = 0xf;
3707
3708   /* Use 2-byte VEX prefix by swapping destination and source operand
3709      if there are more than 1 register operand.  */
3710   if (i.reg_operands > 1
3711       && i.vec_encoding != vex_encoding_vex3
3712       && i.dir_encoding == dir_encoding_default
3713       && i.operands == i.reg_operands
3714       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
3715       && i.tm.opcode_space == SPACE_0F
3716       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
3717       && i.rex == REX_B)
3718     {
3719       unsigned int xchg;
3720
3721       swap_2_operands (0, i.operands - 1);
3722
3723       gas_assert (i.rm.mode == 3);
3724
3725       i.rex = REX_R;
3726       xchg = i.rm.regmem;
3727       i.rm.regmem = i.rm.reg;
3728       i.rm.reg = xchg;
3729
3730       if (i.tm.opcode_modifier.d)
3731         i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
3732                             ? Opcode_ExtD : Opcode_SIMD_IntD;
3733       else /* Use the next insn.  */
3734         install_template (&t[1]);
3735     }
3736
3737   /* Use 2-byte VEX prefix by swapping commutative source operands if there
3738      are no memory operands and at least 3 register ones.  */
3739   if (i.reg_operands >= 3
3740       && i.vec_encoding != vex_encoding_vex3
3741       && i.reg_operands == i.operands - i.imm_operands
3742       && i.tm.opcode_modifier.vex
3743       && i.tm.opcode_modifier.commutative
3744       && (i.tm.opcode_modifier.sse2avx
3745           || (optimize > 1 && !i.no_optimize))
3746       && i.rex == REX_B
3747       && i.vex.register_specifier
3748       && !(i.vex.register_specifier->reg_flags & RegRex))
3749     {
3750       unsigned int xchg = i.operands - i.reg_operands;
3751
3752       gas_assert (i.tm.opcode_space == SPACE_0F);
3753       gas_assert (!i.tm.opcode_modifier.sae);
3754       gas_assert (operand_type_equal (&i.types[i.operands - 2],
3755                                       &i.types[i.operands - 3]));
3756       gas_assert (i.rm.mode == 3);
3757
3758       swap_2_operands (xchg, xchg + 1);
3759
3760       i.rex = 0;
3761       xchg = i.rm.regmem | 8;
3762       i.rm.regmem = ~register_specifier & 0xf;
3763       gas_assert (!(i.rm.regmem & 8));
3764       i.vex.register_specifier += xchg - i.rm.regmem;
3765       register_specifier = ~xchg & 0xf;
3766     }
3767
3768   if (i.tm.opcode_modifier.vex == VEXScalar)
3769     vector_length = avxscalar;
3770   else if (i.tm.opcode_modifier.vex == VEX256)
3771     vector_length = 1;
3772   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
3773     vector_length = 0;
3774   else
3775     {
3776       unsigned int op;
3777
3778       /* Determine vector length from the last multi-length vector
3779          operand.  */
3780       vector_length = 0;
3781       for (op = t->operands; op--;)
3782         if (t->operand_types[op].bitfield.xmmword
3783             && t->operand_types[op].bitfield.ymmword
3784             && i.types[op].bitfield.ymmword)
3785           {
3786             vector_length = 1;
3787             break;
3788           }
3789     }
3790
3791   /* Check the REX.W bit and VEXW.  */
3792   if (i.tm.opcode_modifier.vexw == VEXWIG)
3793     w = (vexwig == vexw1 || (i.rex & REX_W)) ? 1 : 0;
3794   else if (i.tm.opcode_modifier.vexw)
3795     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
3796   else
3797     w = (flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1) ? 1 : 0;
3798
3799   /* Use 2-byte VEX prefix if possible.  */
3800   if (w == 0
3801       && i.vec_encoding != vex_encoding_vex3
3802       && i.tm.opcode_space == SPACE_0F
3803       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
3804     {
3805       /* 2-byte VEX prefix.  */
3806       unsigned int r;
3807
3808       i.vex.length = 2;
3809       i.vex.bytes[0] = 0xc5;
3810
3811       /* Check the REX.R bit.  */
3812       r = (i.rex & REX_R) ? 0 : 1;
3813       i.vex.bytes[1] = (r << 7
3814                         | register_specifier << 3
3815                         | vector_length << 2
3816                         | i.tm.opcode_modifier.opcodeprefix);
3817     }
3818   else
3819     {
3820       /* 3-byte VEX prefix.  */
3821       i.vex.length = 3;
3822
3823       switch (i.tm.opcode_space)
3824         {
3825         case SPACE_0F:
3826         case SPACE_0F38:
3827         case SPACE_0F3A:
3828           i.vex.bytes[0] = 0xc4;
3829           break;
3830         case SPACE_XOP08:
3831         case SPACE_XOP09:
3832         case SPACE_XOP0A:
3833           i.vex.bytes[0] = 0x8f;
3834           break;
3835         default:
3836           abort ();
3837         }
3838
3839       /* The high 3 bits of the second VEX byte are 1's compliment
3840          of RXB bits from REX.  */
3841       i.vex.bytes[1] = ((~i.rex & 7) << 5)
3842                        | (!dot_insn () ? i.tm.opcode_space
3843                                        : i.insn_opcode_space);
3844
3845       i.vex.bytes[2] = (w << 7
3846                         | register_specifier << 3
3847                         | vector_length << 2
3848                         | i.tm.opcode_modifier.opcodeprefix);
3849     }
3850 }
3851
3852 static INLINE bool
3853 is_evex_encoding (const insn_template *t)
3854 {
3855   return t->opcode_modifier.evex || t->opcode_modifier.disp8memshift
3856          || t->opcode_modifier.broadcast || t->opcode_modifier.masking
3857          || t->opcode_modifier.sae;
3858 }
3859
3860 static INLINE bool
3861 is_any_vex_encoding (const insn_template *t)
3862 {
3863   return t->opcode_modifier.vex || is_evex_encoding (t);
3864 }
3865
3866 static unsigned int
3867 get_broadcast_bytes (const insn_template *t, bool diag)
3868 {
3869   unsigned int op, bytes;
3870   const i386_operand_type *types;
3871
3872   if (i.broadcast.type)
3873     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
3874
3875   gas_assert (intel_syntax);
3876
3877   for (op = 0; op < t->operands; ++op)
3878     if (t->operand_types[op].bitfield.baseindex)
3879       break;
3880
3881   gas_assert (op < t->operands);
3882
3883   if (t->opcode_modifier.evex
3884       && t->opcode_modifier.evex != EVEXDYN)
3885     switch (i.broadcast.bytes)
3886       {
3887       case 1:
3888         if (t->operand_types[op].bitfield.word)
3889           return 2;
3890       /* Fall through.  */
3891       case 2:
3892         if (t->operand_types[op].bitfield.dword)
3893           return 4;
3894       /* Fall through.  */
3895       case 4:
3896         if (t->operand_types[op].bitfield.qword)
3897           return 8;
3898       /* Fall through.  */
3899       case 8:
3900         if (t->operand_types[op].bitfield.xmmword)
3901           return 16;
3902         if (t->operand_types[op].bitfield.ymmword)
3903           return 32;
3904         if (t->operand_types[op].bitfield.zmmword)
3905           return 64;
3906       /* Fall through.  */
3907       default:
3908         abort ();
3909       }
3910
3911   gas_assert (op + 1 < t->operands);
3912
3913   if (t->operand_types[op + 1].bitfield.xmmword
3914       + t->operand_types[op + 1].bitfield.ymmword
3915       + t->operand_types[op + 1].bitfield.zmmword > 1)
3916     {
3917       types = &i.types[op + 1];
3918       diag = false;
3919     }
3920   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
3921     types = &t->operand_types[op];
3922
3923   if (types->bitfield.zmmword)
3924     bytes = 64;
3925   else if (types->bitfield.ymmword)
3926     bytes = 32;
3927   else
3928     bytes = 16;
3929
3930   if (diag)
3931     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
3932              insn_name (t), bytes * 8);
3933
3934   return bytes;
3935 }
3936
3937 /* Build the EVEX prefix.  */
3938
3939 static void
3940 build_evex_prefix (void)
3941 {
3942   unsigned int register_specifier, w;
3943   rex_byte vrex_used = 0;
3944
3945   /* Check register specifier.  */
3946   if (i.vex.register_specifier)
3947     {
3948       gas_assert ((i.vrex & REX_X) == 0);
3949
3950       register_specifier = i.vex.register_specifier->reg_num;
3951       if ((i.vex.register_specifier->reg_flags & RegRex))
3952         register_specifier += 8;
3953       /* The upper 16 registers are encoded in the fourth byte of the
3954          EVEX prefix.  */
3955       if (!(i.vex.register_specifier->reg_flags & RegVRex))
3956         i.vex.bytes[3] = 0x8;
3957       register_specifier = ~register_specifier & 0xf;
3958     }
3959   else
3960     {
3961       register_specifier = 0xf;
3962
3963       /* Encode upper 16 vector index register in the fourth byte of
3964          the EVEX prefix.  */
3965       if (!(i.vrex & REX_X))
3966         i.vex.bytes[3] = 0x8;
3967       else
3968         vrex_used |= REX_X;
3969     }
3970
3971   /* 4 byte EVEX prefix.  */
3972   i.vex.length = 4;
3973   i.vex.bytes[0] = 0x62;
3974
3975   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
3976      bits from REX.  */
3977   gas_assert (i.tm.opcode_space >= SPACE_0F);
3978   gas_assert (i.tm.opcode_space <= SPACE_EVEXMAP6);
3979   i.vex.bytes[1] = ((~i.rex & 7) << 5)
3980                    | (!dot_insn () ? i.tm.opcode_space
3981                                    : i.insn_opcode_space);
3982
3983   /* The fifth bit of the second EVEX byte is 1's compliment of the
3984      REX_R bit in VREX.  */
3985   if (!(i.vrex & REX_R))
3986     i.vex.bytes[1] |= 0x10;
3987   else
3988     vrex_used |= REX_R;
3989
3990   if ((i.reg_operands + i.imm_operands) == i.operands)
3991     {
3992       /* When all operands are registers, the REX_X bit in REX is not
3993          used.  We reuse it to encode the upper 16 registers, which is
3994          indicated by the REX_B bit in VREX.  The REX_X bit is encoded
3995          as 1's compliment.  */
3996       if ((i.vrex & REX_B))
3997         {
3998           vrex_used |= REX_B;
3999           i.vex.bytes[1] &= ~0x40;
4000         }
4001     }
4002
4003   /* EVEX instructions shouldn't need the REX prefix.  */
4004   i.vrex &= ~vrex_used;
4005   gas_assert (i.vrex == 0);
4006
4007   /* Check the REX.W bit and VEXW.  */
4008   if (i.tm.opcode_modifier.vexw == VEXWIG)
4009     w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;
4010   else if (i.tm.opcode_modifier.vexw)
4011     w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;
4012   else
4013     w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;
4014
4015   /* The third byte of the EVEX prefix.  */
4016   i.vex.bytes[2] = ((w << 7)
4017                     | (register_specifier << 3)
4018                     | 4 /* Encode the U bit.  */
4019                     | i.tm.opcode_modifier.opcodeprefix);
4020
4021   /* The fourth byte of the EVEX prefix.  */
4022   /* The zeroing-masking bit.  */
4023   if (i.mask.reg && i.mask.zeroing)
4024     i.vex.bytes[3] |= 0x80;
4025
4026   /* Don't always set the broadcast bit if there is no RC.  */
4027   if (i.rounding.type == rc_none)
4028     {
4029       /* Encode the vector length.  */
4030       unsigned int vec_length;
4031
4032       if (!i.tm.opcode_modifier.evex
4033           || i.tm.opcode_modifier.evex == EVEXDYN)
4034         {
4035           unsigned int op;
4036
4037           /* Determine vector length from the last multi-length vector
4038              operand.  */
4039           for (op = i.operands; op--;)
4040             if (i.tm.operand_types[op].bitfield.xmmword
4041                 + i.tm.operand_types[op].bitfield.ymmword
4042                 + i.tm.operand_types[op].bitfield.zmmword > 1)
4043               {
4044                 if (i.types[op].bitfield.zmmword)
4045                   {
4046                     i.tm.opcode_modifier.evex = EVEX512;
4047                     break;
4048                   }
4049                 else if (i.types[op].bitfield.ymmword)
4050                   {
4051                     i.tm.opcode_modifier.evex = EVEX256;
4052                     break;
4053                   }
4054                 else if (i.types[op].bitfield.xmmword)
4055                   {
4056                     i.tm.opcode_modifier.evex = EVEX128;
4057                     break;
4058                   }
4059                 else if ((i.broadcast.type || i.broadcast.bytes)
4060                          && op == i.broadcast.operand)
4061                   {
4062                     switch (get_broadcast_bytes (&i.tm, true))
4063                       {
4064                         case 64:
4065                           i.tm.opcode_modifier.evex = EVEX512;
4066                           break;
4067                         case 32:
4068                           i.tm.opcode_modifier.evex = EVEX256;
4069                           break;
4070                         case 16:
4071                           i.tm.opcode_modifier.evex = EVEX128;
4072                           break;
4073                         default:
4074                           abort ();
4075                       }
4076                     break;
4077                   }
4078               }
4079
4080           if (op >= MAX_OPERANDS)
4081             abort ();
4082         }
4083
4084       switch (i.tm.opcode_modifier.evex)
4085         {
4086         case EVEXLIG: /* LL' is ignored */
4087           vec_length = evexlig << 5;
4088           break;
4089         case EVEX128:
4090           vec_length = 0 << 5;
4091           break;
4092         case EVEX256:
4093           vec_length = 1 << 5;
4094           break;
4095         case EVEX512:
4096           vec_length = 2 << 5;
4097           break;
4098         case EVEX_L3:
4099           if (dot_insn ())
4100             {
4101               vec_length = 3 << 5;
4102               break;
4103             }
4104           /* Fall through.  */
4105         default:
4106           abort ();
4107           break;
4108         }
4109       i.vex.bytes[3] |= vec_length;
4110       /* Encode the broadcast bit.  */
4111       if (i.broadcast.type || i.broadcast.bytes)
4112         i.vex.bytes[3] |= 0x10;
4113     }
4114   else if (i.rounding.type != saeonly)
4115     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
4116   else
4117     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
4118
4119   if (i.mask.reg)
4120     i.vex.bytes[3] |= i.mask.reg->reg_num;
4121 }
4122
4123 static void
4124 process_immext (void)
4125 {
4126   expressionS *exp;
4127
4128   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
4129      which is coded in the same place as an 8-bit immediate field
4130      would be.  Here we fake an 8-bit immediate operand from the
4131      opcode suffix stored in tm.extension_opcode.
4132
4133      AVX instructions also use this encoding, for some of
4134      3 argument instructions.  */
4135
4136   gas_assert (i.imm_operands <= 1
4137               && (i.operands <= 2
4138                   || (is_any_vex_encoding (&i.tm)
4139                       && i.operands <= 4)));
4140
4141   exp = &im_expressions[i.imm_operands++];
4142   i.op[i.operands].imms = exp;
4143   i.types[i.operands].bitfield.imm8 = 1;
4144   i.operands++;
4145   exp->X_op = O_constant;
4146   exp->X_add_number = i.tm.extension_opcode;
4147   i.tm.extension_opcode = None;
4148 }
4149
4150
4151 static int
4152 check_hle (void)
4153 {
4154   switch (i.tm.opcode_modifier.prefixok)
4155     {
4156     default:
4157       abort ();
4158     case PrefixLock:
4159     case PrefixNone:
4160     case PrefixNoTrack:
4161     case PrefixRep:
4162       as_bad (_("invalid instruction `%s' after `%s'"),
4163               insn_name (&i.tm), i.hle_prefix);
4164       return 0;
4165     case PrefixHLELock:
4166       if (i.prefix[LOCK_PREFIX])
4167         return 1;
4168       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
4169       return 0;
4170     case PrefixHLEAny:
4171       return 1;
4172     case PrefixHLERelease:
4173       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
4174         {
4175           as_bad (_("instruction `%s' after `xacquire' not allowed"),
4176                   insn_name (&i.tm));
4177           return 0;
4178         }
4179       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
4180         {
4181           as_bad (_("memory destination needed for instruction `%s'"
4182                     " after `xrelease'"), insn_name (&i.tm));
4183           return 0;
4184         }
4185       return 1;
4186     }
4187 }
4188
4189 /* Encode aligned vector move as unaligned vector move.  */
4190
4191 static void
4192 encode_with_unaligned_vector_move (void)
4193 {
4194   switch (i.tm.base_opcode)
4195     {
4196     case 0x28:  /* Load instructions.  */
4197     case 0x29:  /* Store instructions.  */
4198       /* movaps/movapd/vmovaps/vmovapd.  */
4199       if (i.tm.opcode_space == SPACE_0F
4200           && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
4201         i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
4202       break;
4203     case 0x6f:  /* Load instructions.  */
4204     case 0x7f:  /* Store instructions.  */
4205       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
4206       if (i.tm.opcode_space == SPACE_0F
4207           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
4208         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4209       break;
4210     default:
4211       break;
4212     }
4213 }
4214
4215 /* Try the shortest encoding by shortening operand size.  */
4216
4217 static void
4218 optimize_encoding (void)
4219 {
4220   unsigned int j;
4221
4222   if (i.tm.mnem_off == MN_lea)
4223     {
4224       /* Optimize: -O:
4225            lea symbol, %rN    -> mov $symbol, %rN
4226            lea (%rM), %rN     -> mov %rM, %rN
4227            lea (,%rM,1), %rN  -> mov %rM, %rN
4228
4229            and in 32-bit mode for 16-bit addressing
4230
4231            lea (%rM), %rN     -> movzx %rM, %rN
4232
4233            and in 64-bit mode zap 32-bit addressing in favor of using a
4234            32-bit (or less) destination.
4235        */
4236       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
4237         {
4238           if (!i.op[1].regs->reg_type.bitfield.word)
4239             i.tm.opcode_modifier.size = SIZE32;
4240           i.prefix[ADDR_PREFIX] = 0;
4241         }
4242
4243       if (!i.index_reg && !i.base_reg)
4244         {
4245           /* Handle:
4246                lea symbol, %rN    -> mov $symbol, %rN
4247            */
4248           if (flag_code == CODE_64BIT)
4249             {
4250               /* Don't transform a relocation to a 16-bit one.  */
4251               if (i.op[0].disps
4252                   && i.op[0].disps->X_op != O_constant
4253                   && i.op[1].regs->reg_type.bitfield.word)
4254                 return;
4255
4256               if (!i.op[1].regs->reg_type.bitfield.qword
4257                   || i.tm.opcode_modifier.size == SIZE32)
4258                 {
4259                   i.tm.base_opcode = 0xb8;
4260                   i.tm.opcode_modifier.modrm = 0;
4261                   if (!i.op[1].regs->reg_type.bitfield.word)
4262                     i.types[0].bitfield.imm32 = 1;
4263                   else
4264                     {
4265                       i.tm.opcode_modifier.size = SIZE16;
4266                       i.types[0].bitfield.imm16 = 1;
4267                     }
4268                 }
4269               else
4270                 {
4271                   /* Subject to further optimization below.  */
4272                   i.tm.base_opcode = 0xc7;
4273                   i.tm.extension_opcode = 0;
4274                   i.types[0].bitfield.imm32s = 1;
4275                   i.types[0].bitfield.baseindex = 0;
4276                 }
4277             }
4278           /* Outside of 64-bit mode address and operand sizes have to match if
4279              a relocation is involved, as otherwise we wouldn't (currently) or
4280              even couldn't express the relocation correctly.  */
4281           else if (i.op[0].disps
4282                    && i.op[0].disps->X_op != O_constant
4283                    && ((!i.prefix[ADDR_PREFIX])
4284                        != (flag_code == CODE_32BIT
4285                            ? i.op[1].regs->reg_type.bitfield.dword
4286                            : i.op[1].regs->reg_type.bitfield.word)))
4287             return;
4288           /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
4289              destination is going to grow encoding size.  */
4290           else if (flag_code == CODE_16BIT
4291                    && (optimize <= 1 || optimize_for_space)
4292                    && !i.prefix[ADDR_PREFIX]
4293                    && i.op[1].regs->reg_type.bitfield.dword)
4294             return;
4295           else
4296             {
4297               i.tm.base_opcode = 0xb8;
4298               i.tm.opcode_modifier.modrm = 0;
4299               if (i.op[1].regs->reg_type.bitfield.dword)
4300                 i.types[0].bitfield.imm32 = 1;
4301               else
4302                 i.types[0].bitfield.imm16 = 1;
4303
4304               if (i.op[0].disps
4305                   && i.op[0].disps->X_op == O_constant
4306                   && i.op[1].regs->reg_type.bitfield.dword
4307                   /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
4308                      GCC 5. */
4309                   && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
4310                 i.op[0].disps->X_add_number &= 0xffff;
4311             }
4312
4313           i.tm.operand_types[0] = i.types[0];
4314           i.imm_operands = 1;
4315           if (!i.op[0].imms)
4316             {
4317               i.op[0].imms = &im_expressions[0];
4318               i.op[0].imms->X_op = O_absent;
4319             }
4320         }
4321       else if (i.op[0].disps
4322                   && (i.op[0].disps->X_op != O_constant
4323                       || i.op[0].disps->X_add_number))
4324         return;
4325       else
4326         {
4327           /* Handle:
4328                lea (%rM), %rN     -> mov %rM, %rN
4329                lea (,%rM,1), %rN  -> mov %rM, %rN
4330                lea (%rM), %rN     -> movzx %rM, %rN
4331            */
4332           const reg_entry *addr_reg;
4333
4334           if (!i.index_reg && i.base_reg->reg_num != RegIP)
4335             addr_reg = i.base_reg;
4336           else if (!i.base_reg
4337                    && i.index_reg->reg_num != RegIZ
4338                    && !i.log2_scale_factor)
4339             addr_reg = i.index_reg;
4340           else
4341             return;
4342
4343           if (addr_reg->reg_type.bitfield.word
4344               && i.op[1].regs->reg_type.bitfield.dword)
4345             {
4346               if (flag_code != CODE_32BIT)
4347                 return;
4348               i.tm.opcode_space = SPACE_0F;
4349               i.tm.base_opcode = 0xb7;
4350             }
4351           else
4352             i.tm.base_opcode = 0x8b;
4353
4354           if (addr_reg->reg_type.bitfield.dword
4355               && i.op[1].regs->reg_type.bitfield.qword)
4356             i.tm.opcode_modifier.size = SIZE32;
4357
4358           i.op[0].regs = addr_reg;
4359           i.reg_operands = 2;
4360         }
4361
4362       i.mem_operands = 0;
4363       i.disp_operands = 0;
4364       i.prefix[ADDR_PREFIX] = 0;
4365       i.prefix[SEG_PREFIX] = 0;
4366       i.seg[0] = NULL;
4367     }
4368
4369   if (optimize_for_space
4370       && i.tm.mnem_off == MN_test
4371       && i.reg_operands == 1
4372       && i.imm_operands == 1
4373       && !i.types[1].bitfield.byte
4374       && i.op[0].imms->X_op == O_constant
4375       && fits_in_imm7 (i.op[0].imms->X_add_number))
4376     {
4377       /* Optimize: -Os:
4378            test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
4379        */
4380       unsigned int base_regnum = i.op[1].regs->reg_num;
4381       if (flag_code == CODE_64BIT || base_regnum < 4)
4382         {
4383           i.types[1].bitfield.byte = 1;
4384           /* Ignore the suffix.  */
4385           i.suffix = 0;
4386           /* Convert to byte registers.  */
4387           if (i.types[1].bitfield.word)
4388             j = 16;
4389           else if (i.types[1].bitfield.dword)
4390             j = 32;
4391           else
4392             j = 48;
4393           if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
4394             j += 8;
4395           i.op[1].regs -= j;
4396         }
4397     }
4398   else if (flag_code == CODE_64BIT
4399            && i.tm.opcode_space == SPACE_BASE
4400            && ((i.types[1].bitfield.qword
4401                 && i.reg_operands == 1
4402                 && i.imm_operands == 1
4403                 && i.op[0].imms->X_op == O_constant
4404                 && ((i.tm.base_opcode == 0xb8
4405                      && i.tm.extension_opcode == None
4406                      && fits_in_unsigned_long (i.op[0].imms->X_add_number))
4407                     || (fits_in_imm31 (i.op[0].imms->X_add_number)
4408                         && (i.tm.base_opcode == 0x24
4409                             || (i.tm.base_opcode == 0x80
4410                                 && i.tm.extension_opcode == 0x4)
4411                             || i.tm.mnem_off == MN_test
4412                             || ((i.tm.base_opcode | 1) == 0xc7
4413                                 && i.tm.extension_opcode == 0x0)))
4414                     || (fits_in_imm7 (i.op[0].imms->X_add_number)
4415                         && i.tm.base_opcode == 0x83
4416                         && i.tm.extension_opcode == 0x4)))
4417                || (i.types[0].bitfield.qword
4418                    && ((i.reg_operands == 2
4419                         && i.op[0].regs == i.op[1].regs
4420                         && (i.tm.mnem_off == MN_xor
4421                             || i.tm.mnem_off == MN_sub))
4422                        || i.tm.mnem_off == MN_clr))))
4423     {
4424       /* Optimize: -O:
4425            andq $imm31, %r64   -> andl $imm31, %r32
4426            andq $imm7, %r64    -> andl $imm7, %r32
4427            testq $imm31, %r64  -> testl $imm31, %r32
4428            xorq %r64, %r64     -> xorl %r32, %r32
4429            subq %r64, %r64     -> subl %r32, %r32
4430            movq $imm31, %r64   -> movl $imm31, %r32
4431            movq $imm32, %r64   -> movl $imm32, %r32
4432         */
4433       i.tm.opcode_modifier.size = SIZE32;
4434       if (i.imm_operands)
4435         {
4436           i.types[0].bitfield.imm32 = 1;
4437           i.types[0].bitfield.imm32s = 0;
4438           i.types[0].bitfield.imm64 = 0;
4439         }
4440       else
4441         {
4442           i.types[0].bitfield.dword = 1;
4443           i.types[0].bitfield.qword = 0;
4444         }
4445       i.types[1].bitfield.dword = 1;
4446       i.types[1].bitfield.qword = 0;
4447       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
4448         {
4449           /* Handle
4450                movq $imm31, %r64   -> movl $imm31, %r32
4451                movq $imm32, %r64   -> movl $imm32, %r32
4452            */
4453           i.tm.operand_types[0].bitfield.imm32 = 1;
4454           i.tm.operand_types[0].bitfield.imm32s = 0;
4455           i.tm.operand_types[0].bitfield.imm64 = 0;
4456           if ((i.tm.base_opcode | 1) == 0xc7)
4457             {
4458               /* Handle
4459                    movq $imm31, %r64   -> movl $imm31, %r32
4460                */
4461               i.tm.base_opcode = 0xb8;
4462               i.tm.extension_opcode = None;
4463               i.tm.opcode_modifier.w = 0;
4464               i.tm.opcode_modifier.modrm = 0;
4465             }
4466         }
4467     }
4468   else if (optimize > 1
4469            && !optimize_for_space
4470            && i.reg_operands == 2
4471            && i.op[0].regs == i.op[1].regs
4472            && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
4473            && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
4474     {
4475       /* Optimize: -O2:
4476            andb %rN, %rN  -> testb %rN, %rN
4477            andw %rN, %rN  -> testw %rN, %rN
4478            andq %rN, %rN  -> testq %rN, %rN
4479            orb %rN, %rN   -> testb %rN, %rN
4480            orw %rN, %rN   -> testw %rN, %rN
4481            orq %rN, %rN   -> testq %rN, %rN
4482
4483            and outside of 64-bit mode
4484
4485            andl %rN, %rN  -> testl %rN, %rN
4486            orl %rN, %rN   -> testl %rN, %rN
4487        */
4488       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
4489     }
4490   else if (i.tm.base_opcode == 0xba
4491            && i.tm.opcode_space == SPACE_0F
4492            && i.reg_operands == 1
4493            && i.op[0].imms->X_op == O_constant
4494            && i.op[0].imms->X_add_number >= 0)
4495     {
4496       /* Optimize: -O:
4497            btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
4498            btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
4499            btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4500
4501            With <BT> one of bts, btr, and bts also:
4502            <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
4503            <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
4504        */
4505       switch (flag_code)
4506         {
4507         case CODE_64BIT:
4508           if (i.tm.extension_opcode != 4)
4509             break;
4510           if (i.types[1].bitfield.qword
4511               && i.op[0].imms->X_add_number < 32
4512               && !(i.op[1].regs->reg_flags & RegRex))
4513             i.tm.opcode_modifier.size = SIZE32;
4514           /* Fall through.  */
4515         case CODE_32BIT:
4516           if (i.types[1].bitfield.word
4517               && i.op[0].imms->X_add_number < 16)
4518             i.tm.opcode_modifier.size = SIZE32;
4519           break;
4520         case CODE_16BIT:
4521           if (i.op[0].imms->X_add_number < 16)
4522             i.tm.opcode_modifier.size = SIZE16;
4523           break;
4524         }
4525     }
4526   else if (i.reg_operands == 3
4527            && i.op[0].regs == i.op[1].regs
4528            && !i.types[2].bitfield.xmmword
4529            && (i.tm.opcode_modifier.vex
4530                || ((!i.mask.reg || i.mask.zeroing)
4531                    && is_evex_encoding (&i.tm)
4532                    && (i.vec_encoding != vex_encoding_evex
4533                        || cpu_arch_isa_flags.bitfield.cpuavx512vl
4534                        || is_cpu (&i.tm, CpuAVX512VL)
4535                        || (i.tm.operand_types[2].bitfield.zmmword
4536                            && i.types[2].bitfield.ymmword))))
4537            && i.tm.opcode_space == SPACE_0F
4538            && ((i.tm.base_opcode | 2) == 0x57
4539                || i.tm.base_opcode == 0xdf
4540                || i.tm.base_opcode == 0xef
4541                || (i.tm.base_opcode | 3) == 0xfb
4542                || i.tm.base_opcode == 0x42
4543                || i.tm.base_opcode == 0x47))
4544     {
4545       /* Optimize: -O1:
4546            VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
4547            vpsubq and vpsubw:
4548              EVEX VOP %zmmM, %zmmM, %zmmN
4549                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4550                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4551              EVEX VOP %ymmM, %ymmM, %ymmN
4552                -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
4553                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4554              VEX VOP %ymmM, %ymmM, %ymmN
4555                -> VEX VOP %xmmM, %xmmM, %xmmN
4556            VOP, one of vpandn and vpxor:
4557              VEX VOP %ymmM, %ymmM, %ymmN
4558                -> VEX VOP %xmmM, %xmmM, %xmmN
4559            VOP, one of vpandnd and vpandnq:
4560              EVEX VOP %zmmM, %zmmM, %zmmN
4561                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4562                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4563              EVEX VOP %ymmM, %ymmM, %ymmN
4564                -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
4565                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4566            VOP, one of vpxord and vpxorq:
4567              EVEX VOP %zmmM, %zmmM, %zmmN
4568                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4569                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4570              EVEX VOP %ymmM, %ymmM, %ymmN
4571                -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
4572                -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
4573            VOP, one of kxord and kxorq:
4574              VEX VOP %kM, %kM, %kN
4575                -> VEX kxorw %kM, %kM, %kN
4576            VOP, one of kandnd and kandnq:
4577              VEX VOP %kM, %kM, %kN
4578                -> VEX kandnw %kM, %kM, %kN
4579        */
4580       if (is_evex_encoding (&i.tm))
4581         {
4582           if (i.vec_encoding != vex_encoding_evex)
4583             {
4584               i.tm.opcode_modifier.vex = VEX128;
4585               i.tm.opcode_modifier.vexw = VEXW0;
4586               i.tm.opcode_modifier.evex = 0;
4587             }
4588           else if (optimize > 1)
4589             i.tm.opcode_modifier.evex = EVEX128;
4590           else
4591             return;
4592         }
4593       else if (i.tm.operand_types[0].bitfield.class == RegMask)
4594         {
4595           i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
4596           i.tm.opcode_modifier.vexw = VEXW0;
4597         }
4598       else
4599         i.tm.opcode_modifier.vex = VEX128;
4600
4601       if (i.tm.opcode_modifier.vex)
4602         for (j = 0; j < 3; j++)
4603           {
4604             i.types[j].bitfield.xmmword = 1;
4605             i.types[j].bitfield.ymmword = 0;
4606           }
4607     }
4608   else if (i.vec_encoding != vex_encoding_evex
4609            && !i.types[0].bitfield.zmmword
4610            && !i.types[1].bitfield.zmmword
4611            && !i.mask.reg
4612            && !i.broadcast.type
4613            && !i.broadcast.bytes
4614            && is_evex_encoding (&i.tm)
4615            && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4616                || (i.tm.base_opcode & ~4) == 0xdb
4617                || (i.tm.base_opcode & ~4) == 0xeb)
4618            && i.tm.extension_opcode == None)
4619     {
4620       /* Optimize: -O1:
4621            VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
4622            vmovdqu32 and vmovdqu64:
4623              EVEX VOP %xmmM, %xmmN
4624                -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
4625              EVEX VOP %ymmM, %ymmN
4626                -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
4627              EVEX VOP %xmmM, mem
4628                -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
4629              EVEX VOP %ymmM, mem
4630                -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
4631              EVEX VOP mem, %xmmN
4632                -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
4633              EVEX VOP mem, %ymmN
4634                -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
4635            VOP, one of vpand, vpandn, vpor, vpxor:
4636              EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
4637                -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
4638              EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
4639                -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
4640              EVEX VOP{d,q} mem, %xmmM, %xmmN
4641                -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
4642              EVEX VOP{d,q} mem, %ymmM, %ymmN
4643                -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
4644        */
4645       for (j = 0; j < i.operands; j++)
4646         if (operand_type_check (i.types[j], disp)
4647             && i.op[j].disps->X_op == O_constant)
4648           {
4649             /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
4650                has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
4651                bytes, we choose EVEX Disp8 over VEX Disp32.  */
4652             int evex_disp8, vex_disp8;
4653             unsigned int memshift = i.memshift;
4654             offsetT n = i.op[j].disps->X_add_number;
4655
4656             evex_disp8 = fits_in_disp8 (n);
4657             i.memshift = 0;
4658             vex_disp8 = fits_in_disp8 (n);
4659             if (evex_disp8 != vex_disp8)
4660               {
4661                 i.memshift = memshift;
4662                 return;
4663               }
4664
4665             i.types[j].bitfield.disp8 = vex_disp8;
4666             break;
4667           }
4668       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
4669           && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
4670         i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
4671       i.tm.opcode_modifier.vex
4672         = i.types[0].bitfield.ymmword ? VEX256 : VEX128;
4673       i.tm.opcode_modifier.vexw = VEXW0;
4674       /* VPAND, VPOR, and VPXOR are commutative.  */
4675       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
4676         i.tm.opcode_modifier.commutative = 1;
4677       i.tm.opcode_modifier.evex = 0;
4678       i.tm.opcode_modifier.masking = 0;
4679       i.tm.opcode_modifier.broadcast = 0;
4680       i.tm.opcode_modifier.disp8memshift = 0;
4681       i.memshift = 0;
4682       if (j < i.operands)
4683         i.types[j].bitfield.disp8
4684           = fits_in_disp8 (i.op[j].disps->X_add_number);
4685     }
4686   else if (optimize_for_space
4687            && i.tm.base_opcode == 0x29
4688            && i.tm.opcode_space == SPACE_0F38
4689            && i.operands == i.reg_operands
4690            && i.op[0].regs == i.op[1].regs
4691            && (!i.tm.opcode_modifier.vex
4692                || !(i.op[0].regs->reg_flags & RegRex))
4693            && !is_evex_encoding (&i.tm))
4694     {
4695       /* Optimize: -Os:
4696          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
4697          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
4698          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
4699        */
4700       i.tm.opcode_space = SPACE_0F;
4701       i.tm.base_opcode = 0x76;
4702     }
4703   else if (((i.tm.base_opcode >= 0x64
4704              && i.tm.base_opcode <= 0x66
4705              && i.tm.opcode_space == SPACE_0F)
4706             || (i.tm.base_opcode == 0x37
4707                 && i.tm.opcode_space == SPACE_0F38))
4708            && i.operands == i.reg_operands
4709            && i.op[0].regs == i.op[1].regs
4710            && !is_evex_encoding (&i.tm))
4711     {
4712       /* Optimize: -O:
4713          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
4714          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
4715          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
4716          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
4717          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
4718          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
4719        */
4720       i.tm.opcode_space = SPACE_0F;
4721       i.tm.base_opcode = 0xef;
4722       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
4723         {
4724           if (i.operands == 2)
4725             {
4726               gas_assert (i.tm.opcode_modifier.sse2avx);
4727
4728               i.operands = 3;
4729               i.reg_operands = 3;
4730               i.tm.operands = 3;
4731
4732               i.op[2].regs = i.op[0].regs;
4733               i.types[2] = i.types[0];
4734               i.flags[2] = i.flags[0];
4735               i.tm.operand_types[2] = i.tm.operand_types[0];
4736
4737               i.tm.opcode_modifier.sse2avx = 0;
4738             }
4739           i.op[0].regs -= i.op[0].regs->reg_num + 8;
4740           i.op[1].regs = i.op[0].regs;
4741         }
4742     }
4743   else if (optimize_for_space
4744            && i.tm.base_opcode == 0x59
4745            && i.tm.opcode_space == SPACE_0F38
4746            && i.operands == i.reg_operands
4747            && i.tm.opcode_modifier.vex
4748            && !(i.op[0].regs->reg_flags & RegRex)
4749            && i.op[0].regs->reg_type.bitfield.xmmword
4750            && i.vec_encoding != vex_encoding_vex3)
4751     {
4752       /* Optimize: -Os:
4753          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
4754        */
4755       i.tm.opcode_space = SPACE_0F;
4756       i.tm.base_opcode = 0x6c;
4757       i.tm.opcode_modifier.vexvvvv = 1;
4758
4759       ++i.operands;
4760       ++i.reg_operands;
4761       ++i.tm.operands;
4762
4763       i.op[2].regs = i.op[0].regs;
4764       i.types[2] = i.types[0];
4765       i.flags[2] = i.flags[0];
4766       i.tm.operand_types[2] = i.tm.operand_types[0];
4767
4768       swap_2_operands (1, 2);
4769     }
4770 }
4771
4772 /* Return non-zero for load instruction.  */
4773
4774 static int
4775 load_insn_p (void)
4776 {
4777   unsigned int dest;
4778   int any_vex_p = is_any_vex_encoding (&i.tm);
4779   unsigned int base_opcode = i.tm.base_opcode | 1;
4780
4781   if (!any_vex_p)
4782     {
4783       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
4784          bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
4785       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
4786         return 0;
4787
4788       /* pop.   */
4789       if (i.tm.mnem_off == MN_pop)
4790         return 1;
4791     }
4792
4793   if (i.tm.opcode_space == SPACE_BASE)
4794     {
4795       /* popf, popa.   */
4796       if (i.tm.base_opcode == 0x9d
4797           || i.tm.base_opcode == 0x61)
4798         return 1;
4799
4800       /* movs, cmps, lods, scas.  */
4801       if ((i.tm.base_opcode | 0xb) == 0xaf)
4802         return 1;
4803
4804       /* outs, xlatb.  */
4805       if (base_opcode == 0x6f
4806           || i.tm.base_opcode == 0xd7)
4807         return 1;
4808       /* NB: For AMD-specific insns with implicit memory operands,
4809          they're intentionally not covered.  */
4810     }
4811
4812   /* No memory operand.  */
4813   if (!i.mem_operands)
4814     return 0;
4815
4816   if (any_vex_p)
4817     {
4818       if (i.tm.mnem_off == MN_vldmxcsr)
4819         return 1;
4820     }
4821   else if (i.tm.opcode_space == SPACE_BASE)
4822     {
4823       /* test, not, neg, mul, imul, div, idiv.  */
4824       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
4825         return 1;
4826
4827       /* inc, dec.  */
4828       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
4829         return 1;
4830
4831       /* add, or, adc, sbb, and, sub, xor, cmp.  */
4832       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
4833         return 1;
4834
4835       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
4836       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
4837           && i.tm.extension_opcode != 6)
4838         return 1;
4839
4840       /* Check for x87 instructions.  */
4841       if ((base_opcode | 6) == 0xdf)
4842         {
4843           /* Skip fst, fstp, fstenv, fstcw.  */
4844           if (i.tm.base_opcode == 0xd9
4845               && (i.tm.extension_opcode == 2
4846                   || i.tm.extension_opcode == 3
4847                   || i.tm.extension_opcode == 6
4848                   || i.tm.extension_opcode == 7))
4849             return 0;
4850
4851           /* Skip fisttp, fist, fistp, fstp.  */
4852           if (i.tm.base_opcode == 0xdb
4853               && (i.tm.extension_opcode == 1
4854                   || i.tm.extension_opcode == 2
4855                   || i.tm.extension_opcode == 3
4856                   || i.tm.extension_opcode == 7))
4857             return 0;
4858
4859           /* Skip fisttp, fst, fstp, fsave, fstsw.  */
4860           if (i.tm.base_opcode == 0xdd
4861               && (i.tm.extension_opcode == 1
4862                   || i.tm.extension_opcode == 2
4863                   || i.tm.extension_opcode == 3
4864                   || i.tm.extension_opcode == 6
4865                   || i.tm.extension_opcode == 7))
4866             return 0;
4867
4868           /* Skip fisttp, fist, fistp, fbstp, fistp.  */
4869           if (i.tm.base_opcode == 0xdf
4870               && (i.tm.extension_opcode == 1
4871                   || i.tm.extension_opcode == 2
4872                   || i.tm.extension_opcode == 3
4873                   || i.tm.extension_opcode == 6
4874                   || i.tm.extension_opcode == 7))
4875             return 0;
4876
4877           return 1;
4878         }
4879     }
4880   else if (i.tm.opcode_space == SPACE_0F)
4881     {
4882       /* bt, bts, btr, btc.  */
4883       if (i.tm.base_opcode == 0xba
4884           && (i.tm.extension_opcode | 3) == 7)
4885         return 1;
4886
4887       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
4888       if (i.tm.base_opcode == 0xc7
4889           && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
4890           && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
4891               || i.tm.extension_opcode == 6))
4892         return 1;
4893
4894       /* fxrstor, ldmxcsr, xrstor.  */
4895       if (i.tm.base_opcode == 0xae
4896           && (i.tm.extension_opcode == 1
4897               || i.tm.extension_opcode == 2
4898               || i.tm.extension_opcode == 5))
4899         return 1;
4900
4901       /* lgdt, lidt, lmsw.  */
4902       if (i.tm.base_opcode == 0x01
4903           && (i.tm.extension_opcode == 2
4904               || i.tm.extension_opcode == 3
4905               || i.tm.extension_opcode == 6))
4906         return 1;
4907     }
4908
4909   dest = i.operands - 1;
4910
4911   /* Check fake imm8 operand and 3 source operands.  */
4912   if ((i.tm.opcode_modifier.immext
4913        || i.reg_operands + i.mem_operands == 4)
4914       && i.types[dest].bitfield.imm8)
4915     dest--;
4916
4917   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
4918   if (i.tm.opcode_space == SPACE_BASE
4919       && ((base_opcode | 0x38) == 0x39
4920           || (base_opcode | 2) == 0x87))
4921     return 1;
4922
4923   if (i.tm.mnem_off == MN_xadd)
4924     return 1;
4925
4926   /* Check for load instruction.  */
4927   return (i.types[dest].bitfield.class != ClassNone
4928           || i.types[dest].bitfield.instance == Accum);
4929 }
4930
4931 /* Output lfence, 0xfaee8, after instruction.  */
4932
4933 static void
4934 insert_lfence_after (void)
4935 {
4936   if (lfence_after_load && load_insn_p ())
4937     {
4938       /* There are also two REP string instructions that require
4939          special treatment. Specifically, the compare string (CMPS)
4940          and scan string (SCAS) instructions set EFLAGS in a manner
4941          that depends on the data being compared/scanned. When used
4942          with a REP prefix, the number of iterations may therefore
4943          vary depending on this data. If the data is a program secret
4944          chosen by the adversary using an LVI method,
4945          then this data-dependent behavior may leak some aspect
4946          of the secret.  */
4947       if (((i.tm.base_opcode | 0x9) == 0xaf)
4948           && i.prefix[REP_PREFIX])
4949         {
4950             as_warn (_("`%s` changes flags which would affect control flow behavior"),
4951                      insn_name (&i.tm));
4952         }
4953       char *p = frag_more (3);
4954       *p++ = 0xf;
4955       *p++ = 0xae;
4956       *p = 0xe8;
4957     }
4958 }
4959
4960 /* Output lfence, 0xfaee8, before instruction.  */
4961
4962 static void
4963 insert_lfence_before (void)
4964 {
4965   char *p;
4966
4967   if (i.tm.opcode_space != SPACE_BASE)
4968     return;
4969
4970   if (i.tm.base_opcode == 0xff
4971       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
4972     {
4973       /* Insert lfence before indirect branch if needed.  */
4974
4975       if (lfence_before_indirect_branch == lfence_branch_none)
4976         return;
4977
4978       if (i.operands != 1)
4979         abort ();
4980
4981       if (i.reg_operands == 1)
4982         {
4983           /* Indirect branch via register.  Don't insert lfence with
4984              -mlfence-after-load=yes.  */
4985           if (lfence_after_load
4986               || lfence_before_indirect_branch == lfence_branch_memory)
4987             return;
4988         }
4989       else if (i.mem_operands == 1
4990                && lfence_before_indirect_branch != lfence_branch_register)
4991         {
4992           as_warn (_("indirect `%s` with memory operand should be avoided"),
4993                    insn_name (&i.tm));
4994           return;
4995         }
4996       else
4997         return;
4998
4999       if (last_insn.kind != last_insn_other
5000           && last_insn.seg == now_seg)
5001         {
5002           as_warn_where (last_insn.file, last_insn.line,
5003                          _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
5004                          last_insn.name, insn_name (&i.tm));
5005           return;
5006         }
5007
5008       p = frag_more (3);
5009       *p++ = 0xf;
5010       *p++ = 0xae;
5011       *p = 0xe8;
5012       return;
5013     }
5014
5015   /* Output or/not/shl and lfence before near ret.  */
5016   if (lfence_before_ret != lfence_before_ret_none
5017       && (i.tm.base_opcode | 1) == 0xc3)
5018     {
5019       if (last_insn.kind != last_insn_other
5020           && last_insn.seg == now_seg)
5021         {
5022           as_warn_where (last_insn.file, last_insn.line,
5023                          _("`%s` skips -mlfence-before-ret on `%s`"),
5024                          last_insn.name, insn_name (&i.tm));
5025           return;
5026         }
5027
5028       /* Near ret ingore operand size override under CPU64.  */
5029       char prefix = flag_code == CODE_64BIT
5030                     ? 0x48
5031                     : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
5032
5033       if (lfence_before_ret == lfence_before_ret_not)
5034         {
5035           /* not: 0xf71424, may add prefix
5036              for operand size override or 64-bit code.  */
5037           p = frag_more ((prefix ? 2 : 0) + 6 + 3);
5038           if (prefix)
5039             *p++ = prefix;
5040           *p++ = 0xf7;
5041           *p++ = 0x14;
5042           *p++ = 0x24;
5043           if (prefix)
5044             *p++ = prefix;
5045           *p++ = 0xf7;
5046           *p++ = 0x14;
5047           *p++ = 0x24;
5048         }
5049       else
5050         {
5051           p = frag_more ((prefix ? 1 : 0) + 4 + 3);
5052           if (prefix)
5053             *p++ = prefix;
5054           if (lfence_before_ret == lfence_before_ret_or)
5055             {
5056               /* or: 0x830c2400, may add prefix
5057                  for operand size override or 64-bit code.  */
5058               *p++ = 0x83;
5059               *p++ = 0x0c;
5060             }
5061           else
5062             {
5063               /* shl: 0xc1242400, may add prefix
5064                  for operand size override or 64-bit code.  */
5065               *p++ = 0xc1;
5066               *p++ = 0x24;
5067             }
5068
5069           *p++ = 0x24;
5070           *p++ = 0x0;
5071         }
5072
5073       *p++ = 0xf;
5074       *p++ = 0xae;
5075       *p = 0xe8;
5076     }
5077 }
5078
5079 /* Shared helper for md_assemble() and s_insn().  */
5080 static void init_globals (void)
5081 {
5082   unsigned int j;
5083
5084   memset (&i, '\0', sizeof (i));
5085   i.rounding.type = rc_none;
5086   for (j = 0; j < MAX_OPERANDS; j++)
5087     i.reloc[j] = NO_RELOC;
5088   memset (disp_expressions, '\0', sizeof (disp_expressions));
5089   memset (im_expressions, '\0', sizeof (im_expressions));
5090   save_stack_p = save_stack;
5091 }
5092
5093 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
5094    parsing pass. Instead of introducing a rarely use new insn attribute this
5095    utilizes a common pattern between affected templates. It is deemed
5096    acceptable that this will lead to unnecessary pass 2 preparations in a
5097    limited set of cases.  */
5098 static INLINE bool may_need_pass2 (const insn_template *t)
5099 {
5100   return t->opcode_modifier.sse2avx
5101          /* Note that all SSE2AVX templates have at least one operand.  */
5102          ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
5103          : (t->opcode_space == SPACE_0F
5104             && (t->base_opcode | 1) == 0xbf)
5105            || (t->opcode_space == SPACE_BASE
5106                && t->base_opcode == 0x63);
5107 }
5108
5109 /* This is the guts of the machine-dependent assembler.  LINE points to a
5110    machine dependent instruction.  This function is supposed to emit
5111    the frags/bytes it assembles to.  */
5112
5113 void
5114 md_assemble (char *line)
5115 {
5116   unsigned int j;
5117   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
5118   const char *end, *pass1_mnem = NULL;
5119   enum i386_error pass1_err = 0;
5120   const insn_template *t;
5121
5122   /* Initialize globals.  */
5123   current_templates = NULL;
5124  retry:
5125   init_globals ();
5126
5127   /* First parse an instruction mnemonic & call i386_operand for the operands.
5128      We assume that the scrubber has arranged it so that line[0] is the valid
5129      start of a (possibly prefixed) mnemonic.  */
5130
5131   end = parse_insn (line, mnemonic, false);
5132   if (end == NULL)
5133     {
5134       if (pass1_mnem != NULL)
5135         goto match_error;
5136       if (i.error != no_error)
5137         {
5138           gas_assert (current_templates != NULL);
5139           if (may_need_pass2 (current_templates->start) && !i.suffix)
5140             goto no_match;
5141           /* No point in trying a 2nd pass - it'll only find the same suffix
5142              again.  */
5143           mnem_suffix = i.suffix;
5144           goto match_error;
5145         }
5146       return;
5147     }
5148   t = current_templates->start;
5149   if (may_need_pass2 (t))
5150     {
5151       /* Make a copy of the full line in case we need to retry.  */
5152       copy = xstrdup (line);
5153     }
5154   line += end - line;
5155   mnem_suffix = i.suffix;
5156
5157   line = parse_operands (line, mnemonic);
5158   this_operand = -1;
5159   if (line == NULL)
5160     {
5161       free (copy);
5162       return;
5163     }
5164
5165   /* Now we've parsed the mnemonic into a set of templates, and have the
5166      operands at hand.  */
5167
5168   /* All Intel opcodes have reversed operands except for "bound", "enter",
5169      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
5170      "rmpadjust", "rmpupdate", and "rmpquery".  We also don't reverse
5171      intersegment "jmp" and "call" instructions with 2 immediate operands so
5172      that the immediate segment precedes the offset consistently in Intel and
5173      AT&T modes.  */
5174   if (intel_syntax
5175       && i.operands > 1
5176       && (t->mnem_off != MN_bound)
5177       && !startswith (mnemonic, "invlpg")
5178       && !startswith (mnemonic, "monitor")
5179       && !startswith (mnemonic, "mwait")
5180       && (t->mnem_off != MN_pvalidate)
5181       && !startswith (mnemonic, "rmp")
5182       && (t->mnem_off != MN_tpause)
5183       && (t->mnem_off != MN_umwait)
5184       && !(i.operands == 2
5185            && operand_type_check (i.types[0], imm)
5186            && operand_type_check (i.types[1], imm)))
5187     swap_operands ();
5188
5189   /* The order of the immediates should be reversed
5190      for 2 immediates extrq and insertq instructions */
5191   if (i.imm_operands == 2
5192       && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
5193       swap_2_operands (0, 1);
5194
5195   if (i.imm_operands)
5196     optimize_imm ();
5197
5198   if (i.disp_operands && !optimize_disp (t))
5199     return;
5200
5201   /* Next, we find a template that matches the given insn,
5202      making sure the overlap of the given operands types is consistent
5203      with the template operand types.  */
5204
5205   if (!(t = match_template (mnem_suffix)))
5206     {
5207       const char *err_msg;
5208
5209       if (copy && !mnem_suffix)
5210         {
5211           line = copy;
5212           copy = NULL;
5213   no_match:
5214           pass1_err = i.error;
5215           pass1_mnem = insn_name (current_templates->start);
5216           goto retry;
5217         }
5218
5219       /* If a non-/only-64bit template (group) was found in pass 1, and if
5220          _some_ template (group) was found in pass 2, squash pass 1's
5221          error.  */
5222       if (pass1_err == unsupported_64bit)
5223         pass1_mnem = NULL;
5224
5225   match_error:
5226       free (copy);
5227
5228       switch (pass1_mnem ? pass1_err : i.error)
5229         {
5230         default:
5231           abort ();
5232         case operand_size_mismatch:
5233           err_msg = _("operand size mismatch");
5234           break;
5235         case operand_type_mismatch:
5236           err_msg = _("operand type mismatch");
5237           break;
5238         case register_type_mismatch:
5239           err_msg = _("register type mismatch");
5240           break;
5241         case number_of_operands_mismatch:
5242           err_msg = _("number of operands mismatch");
5243           break;
5244         case invalid_instruction_suffix:
5245           err_msg = _("invalid instruction suffix");
5246           break;
5247         case bad_imm4:
5248           err_msg = _("constant doesn't fit in 4 bits");
5249           break;
5250         case unsupported_with_intel_mnemonic:
5251           err_msg = _("unsupported with Intel mnemonic");
5252           break;
5253         case unsupported_syntax:
5254           err_msg = _("unsupported syntax");
5255           break;
5256         case unsupported:
5257           as_bad (_("unsupported instruction `%s'"),
5258                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5259           return;
5260         case unsupported_on_arch:
5261           as_bad (_("`%s' is not supported on `%s%s'"),
5262                   pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5263                   cpu_arch_name ? cpu_arch_name : default_arch,
5264                   cpu_sub_arch_name ? cpu_sub_arch_name : "");
5265           return;
5266         case unsupported_64bit:
5267           if (ISLOWER (mnem_suffix))
5268             {
5269               if (flag_code == CODE_64BIT)
5270                 as_bad (_("`%s%c' is not supported in 64-bit mode"),
5271                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5272                         mnem_suffix);
5273               else
5274                 as_bad (_("`%s%c' is only supported in 64-bit mode"),
5275                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start),
5276                         mnem_suffix);
5277             }
5278           else
5279             {
5280               if (flag_code == CODE_64BIT)
5281                 as_bad (_("`%s' is not supported in 64-bit mode"),
5282                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5283               else
5284                 as_bad (_("`%s' is only supported in 64-bit mode"),
5285                         pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5286             }
5287           return;
5288         case invalid_sib_address:
5289           err_msg = _("invalid SIB address");
5290           break;
5291         case invalid_vsib_address:
5292           err_msg = _("invalid VSIB address");
5293           break;
5294         case invalid_vector_register_set:
5295           err_msg = _("mask, index, and destination registers must be distinct");
5296           break;
5297         case invalid_tmm_register_set:
5298           err_msg = _("all tmm registers must be distinct");
5299           break;
5300         case invalid_dest_and_src_register_set:
5301           err_msg = _("destination and source registers must be distinct");
5302           break;
5303         case unsupported_vector_index_register:
5304           err_msg = _("unsupported vector index register");
5305           break;
5306         case unsupported_broadcast:
5307           err_msg = _("unsupported broadcast");
5308           break;
5309         case broadcast_needed:
5310           err_msg = _("broadcast is needed for operand of such type");
5311           break;
5312         case unsupported_masking:
5313           err_msg = _("unsupported masking");
5314           break;
5315         case mask_not_on_destination:
5316           err_msg = _("mask not on destination operand");
5317           break;
5318         case no_default_mask:
5319           err_msg = _("default mask isn't allowed");
5320           break;
5321         case unsupported_rc_sae:
5322           err_msg = _("unsupported static rounding/sae");
5323           break;
5324         case invalid_register_operand:
5325           err_msg = _("invalid register operand");
5326           break;
5327         }
5328       as_bad (_("%s for `%s'"), err_msg,
5329               pass1_mnem ? pass1_mnem : insn_name (current_templates->start));
5330       return;
5331     }
5332
5333   free (copy);
5334
5335   if (sse_check != check_none
5336       /* The opcode space check isn't strictly needed; it's there only to
5337          bypass the logic below when easily possible.  */
5338       && t->opcode_space >= SPACE_0F
5339       && t->opcode_space <= SPACE_0F3A
5340       && !is_cpu (&i.tm, CpuSSE4a)
5341       && !is_any_vex_encoding (t))
5342     {
5343       bool simd = false;
5344
5345       for (j = 0; j < t->operands; ++j)
5346         {
5347           if (t->operand_types[j].bitfield.class == RegMMX)
5348             break;
5349           if (t->operand_types[j].bitfield.class == RegSIMD)
5350             simd = true;
5351         }
5352
5353       if (j >= t->operands && simd)
5354         (sse_check == check_warning
5355          ? as_warn
5356          : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
5357     }
5358
5359   if (i.tm.opcode_modifier.fwait)
5360     if (!add_prefix (FWAIT_OPCODE))
5361       return;
5362
5363   /* Check if REP prefix is OK.  */
5364   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep)
5365     {
5366       as_bad (_("invalid instruction `%s' after `%s'"),
5367                 insn_name (&i.tm), i.rep_prefix);
5368       return;
5369     }
5370
5371   /* Check for lock without a lockable instruction.  Destination operand
5372      must be memory unless it is xchg (0x86).  */
5373   if (i.prefix[LOCK_PREFIX])
5374     {
5375       if (i.tm.opcode_modifier.prefixok < PrefixLock
5376           || i.mem_operands == 0
5377           || (i.tm.base_opcode != 0x86
5378               && !(i.flags[i.operands - 1] & Operand_Mem)))
5379         {
5380           as_bad (_("expecting lockable instruction after `lock'"));
5381           return;
5382         }
5383
5384       /* Zap the redundant prefix from XCHG when optimizing.  */
5385       if (i.tm.base_opcode == 0x86 && optimize && !i.no_optimize)
5386         i.prefix[LOCK_PREFIX] = 0;
5387     }
5388
5389   if (is_any_vex_encoding (&i.tm)
5390       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
5391       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
5392     {
5393       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
5394       if (i.prefix[DATA_PREFIX])
5395         {
5396           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
5397           return;
5398         }
5399
5400       /* Don't allow e.g. KMOV in TLS code sequences.  */
5401       for (j = i.imm_operands; j < i.operands; ++j)
5402         switch (i.reloc[j])
5403           {
5404           case BFD_RELOC_386_TLS_GOTIE:
5405           case BFD_RELOC_386_TLS_LE_32:
5406           case BFD_RELOC_X86_64_GOTTPOFF:
5407           case BFD_RELOC_X86_64_TLSLD:
5408             as_bad (_("TLS relocation cannot be used with `%s'"), insn_name (&i.tm));
5409             return;
5410           default:
5411             break;
5412           }
5413     }
5414
5415   /* Check if HLE prefix is OK.  */
5416   if (i.hle_prefix && !check_hle ())
5417     return;
5418
5419   /* Check BND prefix.  */
5420   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
5421     as_bad (_("expecting valid branch instruction after `bnd'"));
5422
5423   /* Check NOTRACK prefix.  */
5424   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
5425     as_bad (_("expecting indirect branch instruction after `notrack'"));
5426
5427   if (is_cpu (&i.tm, CpuMPX))
5428     {
5429       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
5430         as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
5431       else if (flag_code != CODE_16BIT
5432                ? i.prefix[ADDR_PREFIX]
5433                : i.mem_operands && !i.prefix[ADDR_PREFIX])
5434         as_bad (_("16-bit address isn't allowed in MPX instructions"));
5435     }
5436
5437   /* Insert BND prefix.  */
5438   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
5439     {
5440       if (!i.prefix[BND_PREFIX])
5441         add_prefix (BND_PREFIX_OPCODE);
5442       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
5443         {
5444           as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
5445           i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
5446         }
5447     }
5448
5449   /* Check string instruction segment overrides.  */
5450   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
5451     {
5452       gas_assert (i.mem_operands);
5453       if (!check_string ())
5454         return;
5455       i.disp_operands = 0;
5456     }
5457
5458   /* The memory operand of (%dx) should be only used with input/output
5459      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
5460   if (i.input_output_operand
5461       && ((i.tm.base_opcode | 0x82) != 0xee
5462           || i.tm.opcode_space != SPACE_BASE))
5463     {
5464       as_bad (_("input/output port address isn't allowed with `%s'"),
5465               insn_name (&i.tm));
5466       return;
5467     }
5468
5469   if (optimize && !i.no_optimize && i.tm.opcode_modifier.optimize)
5470     optimize_encoding ();
5471
5472   if (use_unaligned_vector_move)
5473     encode_with_unaligned_vector_move ();
5474
5475   if (!process_suffix ())
5476     return;
5477
5478   /* Check if IP-relative addressing requirements can be satisfied.  */
5479   if (is_cpu (&i.tm, CpuPREFETCHI)
5480       && !(i.base_reg && i.base_reg->reg_num == RegIP))
5481     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
5482
5483   /* Update operand types and check extended states.  */
5484   for (j = 0; j < i.operands; j++)
5485     {
5486       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
5487       switch (i.tm.operand_types[j].bitfield.class)
5488         {
5489         default:
5490           break;
5491         case RegMMX:
5492           i.xstate |= xstate_mmx;
5493           break;
5494         case RegMask:
5495           i.xstate |= xstate_mask;
5496           break;
5497         case RegSIMD:
5498           if (i.tm.operand_types[j].bitfield.tmmword)
5499             i.xstate |= xstate_tmm;
5500           else if (i.tm.operand_types[j].bitfield.zmmword
5501                    && vector_size >= VSZ512)
5502             i.xstate |= xstate_zmm;
5503           else if (i.tm.operand_types[j].bitfield.ymmword
5504                    && vector_size >= VSZ256)
5505             i.xstate |= xstate_ymm;
5506           else if (i.tm.operand_types[j].bitfield.xmmword)
5507             i.xstate |= xstate_xmm;
5508           break;
5509         }
5510     }
5511
5512   /* Make still unresolved immediate matches conform to size of immediate
5513      given in i.suffix.  */
5514   if (!finalize_imm ())
5515     return;
5516
5517   if (i.types[0].bitfield.imm1)
5518     i.imm_operands = 0; /* kludge for shift insns.  */
5519
5520   /* For insns with operands there are more diddles to do to the opcode.  */
5521   if (i.operands)
5522     {
5523       if (!process_operands ())
5524         return;
5525     }
5526   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
5527     {
5528       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
5529       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
5530     }
5531
5532   if (is_any_vex_encoding (&i.tm))
5533     {
5534       if (!cpu_arch_flags.bitfield.cpui286)
5535         {
5536           as_bad (_("instruction `%s' isn't supported outside of protected mode."),
5537                   insn_name (&i.tm));
5538           return;
5539         }
5540
5541       /* Check for explicit REX prefix.  */
5542       if (i.prefix[REX_PREFIX] || i.rex_encoding)
5543         {
5544           as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
5545           return;
5546         }
5547
5548       if (i.tm.opcode_modifier.vex)
5549         build_vex_prefix (t);
5550       else
5551         build_evex_prefix ();
5552
5553       /* The individual REX.RXBW bits got consumed.  */
5554       i.rex &= REX_OPCODE;
5555     }
5556
5557   /* Handle conversion of 'int $3' --> special int3 insn.  */
5558   if (i.tm.mnem_off == MN_int
5559       && i.op[0].imms->X_add_number == 3)
5560     {
5561       i.tm.base_opcode = INT3_OPCODE;
5562       i.imm_operands = 0;
5563     }
5564
5565   if ((i.tm.opcode_modifier.jump == JUMP
5566        || i.tm.opcode_modifier.jump == JUMP_BYTE
5567        || i.tm.opcode_modifier.jump == JUMP_DWORD)
5568       && i.op[0].disps->X_op == O_constant)
5569     {
5570       /* Convert "jmp constant" (and "call constant") to a jump (call) to
5571          the absolute address given by the constant.  Since ix86 jumps and
5572          calls are pc relative, we need to generate a reloc.  */
5573       i.op[0].disps->X_add_symbol = &abs_symbol;
5574       i.op[0].disps->X_op = O_symbol;
5575     }
5576
5577   /* For 8 bit registers we need an empty rex prefix.  Also if the
5578      instruction already has a prefix, we need to convert old
5579      registers to new ones.  */
5580
5581   if ((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte
5582        && (i.op[0].regs->reg_flags & RegRex64) != 0)
5583       || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte
5584           && (i.op[1].regs->reg_flags & RegRex64) != 0)
5585       || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
5586            || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
5587           && i.rex != 0))
5588     {
5589       int x;
5590
5591       i.rex |= REX_OPCODE;
5592       for (x = 0; x < 2; x++)
5593         {
5594           /* Look for 8 bit operand that uses old registers.  */
5595           if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
5596               && (i.op[x].regs->reg_flags & RegRex64) == 0)
5597             {
5598               gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5599               /* In case it is "hi" register, give up.  */
5600               if (i.op[x].regs->reg_num > 3)
5601                 as_bad (_("can't encode register '%s%s' in an "
5602                           "instruction requiring REX prefix."),
5603                         register_prefix, i.op[x].regs->reg_name);
5604
5605               /* Otherwise it is equivalent to the extended register.
5606                  Since the encoding doesn't change this is merely
5607                  cosmetic cleanup for debug output.  */
5608
5609               i.op[x].regs = i.op[x].regs + 8;
5610             }
5611         }
5612     }
5613
5614   if (i.rex == 0 && i.rex_encoding)
5615     {
5616       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
5617          that uses legacy register.  If it is "hi" register, don't add
5618          the REX_OPCODE byte.  */
5619       int x;
5620       for (x = 0; x < 2; x++)
5621         if (i.types[x].bitfield.class == Reg
5622             && i.types[x].bitfield.byte
5623             && (i.op[x].regs->reg_flags & RegRex64) == 0
5624             && i.op[x].regs->reg_num > 3)
5625           {
5626             gas_assert (!(i.op[x].regs->reg_flags & RegRex));
5627             i.rex_encoding = false;
5628             break;
5629           }
5630
5631       if (i.rex_encoding)
5632         i.rex = REX_OPCODE;
5633     }
5634
5635   if (i.rex != 0)
5636     add_prefix (REX_OPCODE | i.rex);
5637
5638   insert_lfence_before ();
5639
5640   /* We are ready to output the insn.  */
5641   output_insn ();
5642
5643   insert_lfence_after ();
5644
5645   last_insn.seg = now_seg;
5646
5647   if (i.tm.opcode_modifier.isprefix)
5648     {
5649       last_insn.kind = last_insn_prefix;
5650       last_insn.name = insn_name (&i.tm);
5651       last_insn.file = as_where (&last_insn.line);
5652     }
5653   else
5654     last_insn.kind = last_insn_other;
5655 }
5656
5657 /* The Q suffix is generally valid only in 64-bit mode, with very few
5658    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
5659    and fisttp only one of their two templates is matched below: That's
5660    sufficient since other relevant attributes are the same between both
5661    respective templates.  */
5662 static INLINE bool q_suffix_allowed(const insn_template *t)
5663 {
5664   return flag_code == CODE_64BIT
5665          || (t->opcode_space == SPACE_BASE
5666              && t->base_opcode == 0xdf
5667              && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
5668          || t->mnem_off == MN_cmpxchg8b;
5669 }
5670
5671 static const char *
5672 parse_insn (const char *line, char *mnemonic, bool prefix_only)
5673 {
5674   const char *l = line, *token_start = l;
5675   char *mnem_p;
5676   bool pass1 = !current_templates;
5677   int supported;
5678   const insn_template *t;
5679   char *dot_p = NULL;
5680
5681   while (1)
5682     {
5683       mnem_p = mnemonic;
5684       /* Pseudo-prefixes start with an opening figure brace.  */
5685       if ((*mnem_p = *l) == '{')
5686         {
5687           ++mnem_p;
5688           ++l;
5689         }
5690       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
5691         {
5692           if (*mnem_p == '.')
5693             dot_p = mnem_p;
5694           mnem_p++;
5695           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5696             {
5697             too_long:
5698               as_bad (_("no such instruction: `%s'"), token_start);
5699               return NULL;
5700             }
5701           l++;
5702         }
5703       /* Pseudo-prefixes end with a closing figure brace.  */
5704       if (*mnemonic == '{' && *l == '}')
5705         {
5706           *mnem_p++ = *l++;
5707           if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
5708             goto too_long;
5709           *mnem_p = '\0';
5710
5711           /* Point l at the closing brace if there's no other separator.  */
5712           if (*l != END_OF_INSN && !is_space_char (*l)
5713               && *l != PREFIX_SEPARATOR)
5714             --l;
5715         }
5716       else if (!is_space_char (*l)
5717                && *l != END_OF_INSN
5718                && (intel_syntax
5719                    || (*l != PREFIX_SEPARATOR && *l != ',')))
5720         {
5721           if (prefix_only)
5722             break;
5723           as_bad (_("invalid character %s in mnemonic"),
5724                   output_invalid (*l));
5725           return NULL;
5726         }
5727       if (token_start == l)
5728         {
5729           if (!intel_syntax && *l == PREFIX_SEPARATOR)
5730             as_bad (_("expecting prefix; got nothing"));
5731           else
5732             as_bad (_("expecting mnemonic; got nothing"));
5733           return NULL;
5734         }
5735
5736       /* Look up instruction (or prefix) via hash table.  */
5737       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5738
5739       if (*l != END_OF_INSN
5740           && (!is_space_char (*l) || l[1] != END_OF_INSN)
5741           && current_templates
5742           && current_templates->start->opcode_modifier.isprefix)
5743         {
5744           if (!cpu_flags_check_cpu64 (current_templates->start->cpu))
5745             {
5746               as_bad ((flag_code != CODE_64BIT
5747                        ? _("`%s' is only supported in 64-bit mode")
5748                        : _("`%s' is not supported in 64-bit mode")),
5749                       insn_name (current_templates->start));
5750               return NULL;
5751             }
5752           /* If we are in 16-bit mode, do not allow addr16 or data16.
5753              Similarly, in 32-bit mode, do not allow addr32 or data32.  */
5754           if ((current_templates->start->opcode_modifier.size == SIZE16
5755                || current_templates->start->opcode_modifier.size == SIZE32)
5756               && flag_code != CODE_64BIT
5757               && ((current_templates->start->opcode_modifier.size == SIZE32)
5758                   ^ (flag_code == CODE_16BIT)))
5759             {
5760               as_bad (_("redundant %s prefix"),
5761                       insn_name (current_templates->start));
5762               return NULL;
5763             }
5764
5765           if (current_templates->start->base_opcode == PSEUDO_PREFIX)
5766             {
5767               /* Handle pseudo prefixes.  */
5768               switch (current_templates->start->extension_opcode)
5769                 {
5770                 case Prefix_Disp8:
5771                   /* {disp8} */
5772                   i.disp_encoding = disp_encoding_8bit;
5773                   break;
5774                 case Prefix_Disp16:
5775                   /* {disp16} */
5776                   i.disp_encoding = disp_encoding_16bit;
5777                   break;
5778                 case Prefix_Disp32:
5779                   /* {disp32} */
5780                   i.disp_encoding = disp_encoding_32bit;
5781                   break;
5782                 case Prefix_Load:
5783                   /* {load} */
5784                   i.dir_encoding = dir_encoding_load;
5785                   break;
5786                 case Prefix_Store:
5787                   /* {store} */
5788                   i.dir_encoding = dir_encoding_store;
5789                   break;
5790                 case Prefix_VEX:
5791                   /* {vex} */
5792                   i.vec_encoding = vex_encoding_vex;
5793                   break;
5794                 case Prefix_VEX3:
5795                   /* {vex3} */
5796                   i.vec_encoding = vex_encoding_vex3;
5797                   break;
5798                 case Prefix_EVEX:
5799                   /* {evex} */
5800                   i.vec_encoding = vex_encoding_evex;
5801                   break;
5802                 case Prefix_REX:
5803                   /* {rex} */
5804                   i.rex_encoding = true;
5805                   break;
5806                 case Prefix_NoOptimize:
5807                   /* {nooptimize} */
5808                   i.no_optimize = true;
5809                   break;
5810                 default:
5811                   abort ();
5812                 }
5813             }
5814           else
5815             {
5816               /* Add prefix, checking for repeated prefixes.  */
5817               switch (add_prefix (current_templates->start->base_opcode))
5818                 {
5819                 case PREFIX_EXIST:
5820                   return NULL;
5821                 case PREFIX_DS:
5822                   if (is_cpu (current_templates->start, CpuIBT))
5823                     i.notrack_prefix = insn_name (current_templates->start);
5824                   break;
5825                 case PREFIX_REP:
5826                   if (is_cpu (current_templates->start, CpuHLE))
5827                     i.hle_prefix = insn_name (current_templates->start);
5828                   else if (is_cpu (current_templates->start, CpuMPX))
5829                     i.bnd_prefix = insn_name (current_templates->start);
5830                   else
5831                     i.rep_prefix = insn_name (current_templates->start);
5832                   break;
5833                 default:
5834                   break;
5835                 }
5836             }
5837           /* Skip past PREFIX_SEPARATOR and reset token_start.  */
5838           token_start = ++l;
5839         }
5840       else
5841         break;
5842     }
5843
5844   if (prefix_only)
5845     return token_start;
5846
5847   if (!current_templates)
5848     {
5849       /* Deprecated functionality (new code should use pseudo-prefixes instead):
5850          Check if we should swap operand or force 32bit displacement in
5851          encoding.  */
5852       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
5853         i.dir_encoding = dir_encoding_swap;
5854       else if (mnem_p - 3 == dot_p
5855                && dot_p[1] == 'd'
5856                && dot_p[2] == '8')
5857         i.disp_encoding = disp_encoding_8bit;
5858       else if (mnem_p - 4 == dot_p
5859                && dot_p[1] == 'd'
5860                && dot_p[2] == '3'
5861                && dot_p[3] == '2')
5862         i.disp_encoding = disp_encoding_32bit;
5863       else
5864         goto check_suffix;
5865       mnem_p = dot_p;
5866       *dot_p = '\0';
5867       current_templates = (const templates *) str_hash_find (op_hash, mnemonic);
5868     }
5869
5870   if (!current_templates || !pass1)
5871     {
5872       current_templates = NULL;
5873
5874     check_suffix:
5875       if (mnem_p > mnemonic)
5876         {
5877           /* See if we can get a match by trimming off a suffix.  */
5878           switch (mnem_p[-1])
5879             {
5880             case WORD_MNEM_SUFFIX:
5881               if (intel_syntax && (intel_float_operand (mnemonic) & 2))
5882                 i.suffix = SHORT_MNEM_SUFFIX;
5883               else
5884                 /* Fall through.  */
5885               case BYTE_MNEM_SUFFIX:
5886               case QWORD_MNEM_SUFFIX:
5887                 i.suffix = mnem_p[-1];
5888               mnem_p[-1] = '\0';
5889               current_templates
5890                 = (const templates *) str_hash_find (op_hash, mnemonic);
5891               break;
5892             case SHORT_MNEM_SUFFIX:
5893             case LONG_MNEM_SUFFIX:
5894               if (!intel_syntax)
5895                 {
5896                   i.suffix = mnem_p[-1];
5897                   mnem_p[-1] = '\0';
5898                   current_templates
5899                     = (const templates *) str_hash_find (op_hash, mnemonic);
5900                 }
5901               break;
5902
5903               /* Intel Syntax.  */
5904             case 'd':
5905               if (intel_syntax)
5906                 {
5907                   if (intel_float_operand (mnemonic) == 1)
5908                     i.suffix = SHORT_MNEM_SUFFIX;
5909                   else
5910                     i.suffix = LONG_MNEM_SUFFIX;
5911                   mnem_p[-1] = '\0';
5912                   current_templates
5913                     = (const templates *) str_hash_find (op_hash, mnemonic);
5914                 }
5915               /* For compatibility reasons accept MOVSD and CMPSD without
5916                  operands even in AT&T mode.  */
5917               else if (*l == END_OF_INSN
5918                        || (is_space_char (*l) && l[1] == END_OF_INSN))
5919                 {
5920                   mnem_p[-1] = '\0';
5921                   current_templates
5922                     = (const templates *) str_hash_find (op_hash, mnemonic);
5923                   if (current_templates != NULL
5924                       /* MOVS or CMPS */
5925                       && (current_templates->start->base_opcode | 2) == 0xa6
5926                       && current_templates->start->opcode_space
5927                          == SPACE_BASE
5928                       && mnem_p[-2] == 's')
5929                     {
5930                       as_warn (_("found `%sd'; assuming `%sl' was meant"),
5931                                mnemonic, mnemonic);
5932                       i.suffix = LONG_MNEM_SUFFIX;
5933                     }
5934                   else
5935                     {
5936                       current_templates = NULL;
5937                       mnem_p[-1] = 'd';
5938                     }
5939                 }
5940               break;
5941             }
5942         }
5943
5944       if (!current_templates)
5945         {
5946           if (pass1)
5947             as_bad (_("no such instruction: `%s'"), token_start);
5948           return NULL;
5949         }
5950     }
5951
5952   if (current_templates->start->opcode_modifier.jump == JUMP
5953       || current_templates->start->opcode_modifier.jump == JUMP_BYTE)
5954     {
5955       /* Check for a branch hint.  We allow ",pt" and ",pn" for
5956          predict taken and predict not taken respectively.
5957          I'm not sure that branch hints actually do anything on loop
5958          and jcxz insns (JumpByte) for current Pentium4 chips.  They
5959          may work in the future and it doesn't hurt to accept them
5960          now.  */
5961       if (l[0] == ',' && l[1] == 'p')
5962         {
5963           if (l[2] == 't')
5964             {
5965               if (!add_prefix (DS_PREFIX_OPCODE))
5966                 return NULL;
5967               l += 3;
5968             }
5969           else if (l[2] == 'n')
5970             {
5971               if (!add_prefix (CS_PREFIX_OPCODE))
5972                 return NULL;
5973               l += 3;
5974             }
5975         }
5976     }
5977   /* Any other comma loses.  */
5978   if (*l == ',')
5979     {
5980       as_bad (_("invalid character %s in mnemonic"),
5981               output_invalid (*l));
5982       return NULL;
5983     }
5984
5985   /* Check if instruction is supported on specified architecture.  */
5986   supported = 0;
5987   for (t = current_templates->start; t < current_templates->end; ++t)
5988     {
5989       supported |= cpu_flags_match (t);
5990
5991       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
5992         supported &= ~CPU_FLAGS_64BIT_MATCH;
5993
5994       if (supported == CPU_FLAGS_PERFECT_MATCH)
5995         return l;
5996     }
5997
5998   if (pass1)
5999     {
6000       if (supported & CPU_FLAGS_64BIT_MATCH)
6001         i.error = unsupported_on_arch;
6002       else
6003         i.error = unsupported_64bit;
6004     }
6005
6006   return NULL;
6007 }
6008
6009 static char *
6010 parse_operands (char *l, const char *mnemonic)
6011 {
6012   char *token_start;
6013
6014   /* 1 if operand is pending after ','.  */
6015   unsigned int expecting_operand = 0;
6016
6017   while (*l != END_OF_INSN)
6018     {
6019       /* Non-zero if operand parens not balanced.  */
6020       unsigned int paren_not_balanced = 0;
6021       /* True if inside double quotes.  */
6022       bool in_quotes = false;
6023
6024       /* Skip optional white space before operand.  */
6025       if (is_space_char (*l))
6026         ++l;
6027       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
6028         {
6029           as_bad (_("invalid character %s before operand %d"),
6030                   output_invalid (*l),
6031                   i.operands + 1);
6032           return NULL;
6033         }
6034       token_start = l;  /* After white space.  */
6035       while (in_quotes || paren_not_balanced || *l != ',')
6036         {
6037           if (*l == END_OF_INSN)
6038             {
6039               if (in_quotes)
6040                 {
6041                   as_bad (_("unbalanced double quotes in operand %d."),
6042                           i.operands + 1);
6043                   return NULL;
6044                 }
6045               if (paren_not_balanced)
6046                 {
6047                   know (!intel_syntax);
6048                   as_bad (_("unbalanced parenthesis in operand %d."),
6049                           i.operands + 1);
6050                   return NULL;
6051                 }
6052               else
6053                 break;  /* we are done */
6054             }
6055           else if (*l == '\\' && l[1] == '"')
6056             ++l;
6057           else if (*l == '"')
6058             in_quotes = !in_quotes;
6059           else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l))
6060             {
6061               as_bad (_("invalid character %s in operand %d"),
6062                       output_invalid (*l),
6063                       i.operands + 1);
6064               return NULL;
6065             }
6066           if (!intel_syntax && !in_quotes)
6067             {
6068               if (*l == '(')
6069                 ++paren_not_balanced;
6070               if (*l == ')')
6071                 --paren_not_balanced;
6072             }
6073           l++;
6074         }
6075       if (l != token_start)
6076         {                       /* Yes, we've read in another operand.  */
6077           unsigned int operand_ok;
6078           this_operand = i.operands++;
6079           if (i.operands > MAX_OPERANDS)
6080             {
6081               as_bad (_("spurious operands; (%d operands/instruction max)"),
6082                       MAX_OPERANDS);
6083               return NULL;
6084             }
6085           i.types[this_operand].bitfield.unspecified = 1;
6086           /* Now parse operand adding info to 'i' as we go along.  */
6087           END_STRING_AND_SAVE (l);
6088
6089           if (i.mem_operands > 1)
6090             {
6091               as_bad (_("too many memory references for `%s'"),
6092                       mnemonic);
6093               return 0;
6094             }
6095
6096           if (intel_syntax)
6097             operand_ok =
6098               i386_intel_operand (token_start,
6099                                   intel_float_operand (mnemonic));
6100           else
6101             operand_ok = i386_att_operand (token_start);
6102
6103           RESTORE_END_STRING (l);
6104           if (!operand_ok)
6105             return NULL;
6106         }
6107       else
6108         {
6109           if (expecting_operand)
6110             {
6111             expecting_operand_after_comma:
6112               as_bad (_("expecting operand after ','; got nothing"));
6113               return NULL;
6114             }
6115           if (*l == ',')
6116             {
6117               as_bad (_("expecting operand before ','; got nothing"));
6118               return NULL;
6119             }
6120         }
6121
6122       /* Now *l must be either ',' or END_OF_INSN.  */
6123       if (*l == ',')
6124         {
6125           if (*++l == END_OF_INSN)
6126             {
6127               /* Just skip it, if it's \n complain.  */
6128               goto expecting_operand_after_comma;
6129             }
6130           expecting_operand = 1;
6131         }
6132     }
6133   return l;
6134 }
6135
6136 static void
6137 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
6138 {
6139   union i386_op temp_op;
6140   i386_operand_type temp_type;
6141   unsigned int temp_flags;
6142   enum bfd_reloc_code_real temp_reloc;
6143
6144   temp_type = i.types[xchg2];
6145   i.types[xchg2] = i.types[xchg1];
6146   i.types[xchg1] = temp_type;
6147
6148   temp_flags = i.flags[xchg2];
6149   i.flags[xchg2] = i.flags[xchg1];
6150   i.flags[xchg1] = temp_flags;
6151
6152   temp_op = i.op[xchg2];
6153   i.op[xchg2] = i.op[xchg1];
6154   i.op[xchg1] = temp_op;
6155
6156   temp_reloc = i.reloc[xchg2];
6157   i.reloc[xchg2] = i.reloc[xchg1];
6158   i.reloc[xchg1] = temp_reloc;
6159
6160   temp_flags = i.imm_bits[xchg2];
6161   i.imm_bits[xchg2] = i.imm_bits[xchg1];
6162   i.imm_bits[xchg1] = temp_flags;
6163
6164   if (i.mask.reg)
6165     {
6166       if (i.mask.operand == xchg1)
6167         i.mask.operand = xchg2;
6168       else if (i.mask.operand == xchg2)
6169         i.mask.operand = xchg1;
6170     }
6171   if (i.broadcast.type || i.broadcast.bytes)
6172     {
6173       if (i.broadcast.operand == xchg1)
6174         i.broadcast.operand = xchg2;
6175       else if (i.broadcast.operand == xchg2)
6176         i.broadcast.operand = xchg1;
6177     }
6178 }
6179
6180 static void
6181 swap_operands (void)
6182 {
6183   switch (i.operands)
6184     {
6185     case 5:
6186     case 4:
6187       swap_2_operands (1, i.operands - 2);
6188       /* Fall through.  */
6189     case 3:
6190     case 2:
6191       swap_2_operands (0, i.operands - 1);
6192       break;
6193     default:
6194       abort ();
6195     }
6196
6197   if (i.mem_operands == 2)
6198     {
6199       const reg_entry *temp_seg;
6200       temp_seg = i.seg[0];
6201       i.seg[0] = i.seg[1];
6202       i.seg[1] = temp_seg;
6203     }
6204 }
6205
6206 /* Try to ensure constant immediates are represented in the smallest
6207    opcode possible.  */
6208 static void
6209 optimize_imm (void)
6210 {
6211   char guess_suffix = 0;
6212   int op;
6213
6214   if (i.suffix)
6215     guess_suffix = i.suffix;
6216   else if (i.reg_operands)
6217     {
6218       /* Figure out a suffix from the last register operand specified.
6219          We can't do this properly yet, i.e. excluding special register
6220          instances, but the following works for instructions with
6221          immediates.  In any case, we can't set i.suffix yet.  */
6222       for (op = i.operands; --op >= 0;)
6223         if (i.types[op].bitfield.class != Reg)
6224           continue;
6225         else if (i.types[op].bitfield.byte)
6226           {
6227             guess_suffix = BYTE_MNEM_SUFFIX;
6228             break;
6229           }
6230         else if (i.types[op].bitfield.word)
6231           {
6232             guess_suffix = WORD_MNEM_SUFFIX;
6233             break;
6234           }
6235         else if (i.types[op].bitfield.dword)
6236           {
6237             guess_suffix = LONG_MNEM_SUFFIX;
6238             break;
6239           }
6240         else if (i.types[op].bitfield.qword)
6241           {
6242             guess_suffix = QWORD_MNEM_SUFFIX;
6243             break;
6244           }
6245     }
6246   else if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
6247     guess_suffix = WORD_MNEM_SUFFIX;
6248   else if (flag_code != CODE_64BIT || !(i.prefix[REX_PREFIX] & REX_W))
6249     guess_suffix = LONG_MNEM_SUFFIX;
6250
6251   for (op = i.operands; --op >= 0;)
6252     if (operand_type_check (i.types[op], imm))
6253       {
6254         switch (i.op[op].imms->X_op)
6255           {
6256           case O_constant:
6257             /* If a suffix is given, this operand may be shortened.  */
6258             switch (guess_suffix)
6259               {
6260               case LONG_MNEM_SUFFIX:
6261                 i.types[op].bitfield.imm32 = 1;
6262                 i.types[op].bitfield.imm64 = 1;
6263                 break;
6264               case WORD_MNEM_SUFFIX:
6265                 i.types[op].bitfield.imm16 = 1;
6266                 i.types[op].bitfield.imm32 = 1;
6267                 i.types[op].bitfield.imm32s = 1;
6268                 i.types[op].bitfield.imm64 = 1;
6269                 break;
6270               case BYTE_MNEM_SUFFIX:
6271                 i.types[op].bitfield.imm8 = 1;
6272                 i.types[op].bitfield.imm8s = 1;
6273                 i.types[op].bitfield.imm16 = 1;
6274                 i.types[op].bitfield.imm32 = 1;
6275                 i.types[op].bitfield.imm32s = 1;
6276                 i.types[op].bitfield.imm64 = 1;
6277                 break;
6278               }
6279
6280             /* If this operand is at most 16 bits, convert it
6281                to a signed 16 bit number before trying to see
6282                whether it will fit in an even smaller size.
6283                This allows a 16-bit operand such as $0xffe0 to
6284                be recognised as within Imm8S range.  */
6285             if ((i.types[op].bitfield.imm16)
6286                 && fits_in_unsigned_word (i.op[op].imms->X_add_number))
6287               {
6288                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6289                                                 ^ 0x8000) - 0x8000);
6290               }
6291 #ifdef BFD64
6292             /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
6293             if ((i.types[op].bitfield.imm32)
6294                 && fits_in_unsigned_long (i.op[op].imms->X_add_number))
6295               {
6296                 i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
6297                                                 ^ ((offsetT) 1 << 31))
6298                                                - ((offsetT) 1 << 31));
6299               }
6300 #endif
6301             i.types[op]
6302               = operand_type_or (i.types[op],
6303                                  smallest_imm_type (i.op[op].imms->X_add_number));
6304
6305             /* We must avoid matching of Imm32 templates when 64bit
6306                only immediate is available.  */
6307             if (guess_suffix == QWORD_MNEM_SUFFIX)
6308               i.types[op].bitfield.imm32 = 0;
6309             break;
6310
6311           case O_absent:
6312           case O_register:
6313             abort ();
6314
6315             /* Symbols and expressions.  */
6316           default:
6317             /* Convert symbolic operand to proper sizes for matching, but don't
6318                prevent matching a set of insns that only supports sizes other
6319                than those matching the insn suffix.  */
6320             {
6321               i386_operand_type mask, allowed;
6322               const insn_template *t = current_templates->start;
6323
6324               operand_type_set (&mask, 0);
6325               switch (guess_suffix)
6326                 {
6327                 case QWORD_MNEM_SUFFIX:
6328                   mask.bitfield.imm64 = 1;
6329                   mask.bitfield.imm32s = 1;
6330                   break;
6331                 case LONG_MNEM_SUFFIX:
6332                   mask.bitfield.imm32 = 1;
6333                   break;
6334                 case WORD_MNEM_SUFFIX:
6335                   mask.bitfield.imm16 = 1;
6336                   break;
6337                 case BYTE_MNEM_SUFFIX:
6338                   mask.bitfield.imm8 = 1;
6339                   break;
6340                 default:
6341                   break;
6342                 }
6343
6344               allowed = operand_type_and (t->operand_types[op], mask);
6345               while (++t < current_templates->end)
6346                 {
6347                   allowed = operand_type_or (allowed, t->operand_types[op]);
6348                   allowed = operand_type_and (allowed, mask);
6349                 }
6350
6351               if (!operand_type_all_zero (&allowed))
6352                 i.types[op] = operand_type_and (i.types[op], mask);
6353             }
6354             break;
6355           }
6356       }
6357 }
6358
6359 /* Try to use the smallest displacement type too.  */
6360 static bool
6361 optimize_disp (const insn_template *t)
6362 {
6363   unsigned int op;
6364
6365   if (!want_disp32 (t)
6366       && (!t->opcode_modifier.jump
6367           || i.jumpabsolute || i.types[0].bitfield.baseindex))
6368     {
6369       for (op = 0; op < i.operands; ++op)
6370         {
6371           const expressionS *exp = i.op[op].disps;
6372
6373           if (!operand_type_check (i.types[op], disp))
6374             continue;
6375
6376           if (exp->X_op != O_constant)
6377             continue;
6378
6379           /* Since displacement is signed extended to 64bit, don't allow
6380              disp32 if it is out of range.  */
6381           if (fits_in_signed_long (exp->X_add_number))
6382             continue;
6383
6384           i.types[op].bitfield.disp32 = 0;
6385           if (i.types[op].bitfield.baseindex)
6386             {
6387               as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
6388                       (uint64_t) exp->X_add_number);
6389               return false;
6390             }
6391         }
6392     }
6393
6394   /* Don't optimize displacement for movabs since it only takes 64bit
6395      displacement.  */
6396   if (i.disp_encoding > disp_encoding_8bit
6397       || (flag_code == CODE_64BIT && t->mnem_off == MN_movabs))
6398     return true;
6399
6400   for (op = i.operands; op-- > 0;)
6401     if (operand_type_check (i.types[op], disp))
6402       {
6403         if (i.op[op].disps->X_op == O_constant)
6404           {
6405             offsetT op_disp = i.op[op].disps->X_add_number;
6406
6407             if (!op_disp && i.types[op].bitfield.baseindex)
6408               {
6409                 i.types[op] = operand_type_and_not (i.types[op], anydisp);
6410                 i.op[op].disps = NULL;
6411                 i.disp_operands--;
6412                 continue;
6413               }
6414
6415             if (i.types[op].bitfield.disp16
6416                 && fits_in_unsigned_word (op_disp))
6417               {
6418                 /* If this operand is at most 16 bits, convert
6419                    to a signed 16 bit number and don't use 64bit
6420                    displacement.  */
6421                 op_disp = ((op_disp ^ 0x8000) - 0x8000);
6422                 i.types[op].bitfield.disp64 = 0;
6423               }
6424
6425 #ifdef BFD64
6426             /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
6427             if ((flag_code != CODE_64BIT
6428                  ? i.types[op].bitfield.disp32
6429                  : want_disp32 (t)
6430                    && (!t->opcode_modifier.jump
6431                        || i.jumpabsolute || i.types[op].bitfield.baseindex))
6432                 && fits_in_unsigned_long (op_disp))
6433               {
6434                 /* If this operand is at most 32 bits, convert
6435                    to a signed 32 bit number and don't use 64bit
6436                    displacement.  */
6437                 op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
6438                 i.types[op].bitfield.disp64 = 0;
6439                 i.types[op].bitfield.disp32 = 1;
6440               }
6441
6442             if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
6443               {
6444                 i.types[op].bitfield.disp64 = 0;
6445                 i.types[op].bitfield.disp32 = 1;
6446               }
6447 #endif
6448             if ((i.types[op].bitfield.disp32
6449                  || i.types[op].bitfield.disp16)
6450                 && fits_in_disp8 (op_disp))
6451               i.types[op].bitfield.disp8 = 1;
6452
6453             i.op[op].disps->X_add_number = op_disp;
6454           }
6455         else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
6456                  || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
6457           {
6458             fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
6459                          i.op[op].disps, 0, i.reloc[op]);
6460             i.types[op] = operand_type_and_not (i.types[op], anydisp);
6461           }
6462         else
6463           /* We only support 64bit displacement on constants.  */
6464           i.types[op].bitfield.disp64 = 0;
6465       }
6466
6467   return true;
6468 }
6469
6470 /* Return 1 if there is a match in broadcast bytes between operand
6471    GIVEN and instruction template T.   */
6472
6473 static INLINE int
6474 match_broadcast_size (const insn_template *t, unsigned int given)
6475 {
6476   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
6477            && i.types[given].bitfield.byte)
6478           || (t->opcode_modifier.broadcast == WORD_BROADCAST
6479               && i.types[given].bitfield.word)
6480           || (t->opcode_modifier.broadcast == DWORD_BROADCAST
6481               && i.types[given].bitfield.dword)
6482           || (t->opcode_modifier.broadcast == QWORD_BROADCAST
6483               && i.types[given].bitfield.qword));
6484 }
6485
6486 /* Check if operands are valid for the instruction.  */
6487
6488 static int
6489 check_VecOperands (const insn_template *t)
6490 {
6491   unsigned int op;
6492   i386_cpu_flags cpu;
6493
6494   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
6495      any one operand are implicity requiring AVX512VL support if the actual
6496      operand size is YMMword or XMMword.  Since this function runs after
6497      template matching, there's no need to check for YMMword/XMMword in
6498      the template.  */
6499   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
6500   if (!cpu_flags_all_zero (&cpu)
6501       && !is_cpu (t, CpuAVX512VL)
6502       && !cpu_arch_flags.bitfield.cpuavx512vl)
6503     {
6504       for (op = 0; op < t->operands; ++op)
6505         {
6506           if (t->operand_types[op].bitfield.zmmword
6507               && (i.types[op].bitfield.ymmword
6508                   || i.types[op].bitfield.xmmword))
6509             {
6510               i.error = unsupported;
6511               return 1;
6512             }
6513         }
6514     }
6515
6516   /* Somewhat similarly, templates specifying both AVX and AVX2 are
6517      requiring AVX2 support if the actual operand size is YMMword.  */
6518   if (is_cpu (t, CpuAVX) && is_cpu (t, CpuAVX2)
6519       && !cpu_arch_flags.bitfield.cpuavx2)
6520     {
6521       for (op = 0; op < t->operands; ++op)
6522         {
6523           if (t->operand_types[op].bitfield.xmmword
6524               && i.types[op].bitfield.ymmword)
6525             {
6526               i.error = unsupported;
6527               return 1;
6528             }
6529         }
6530     }
6531
6532   /* Without VSIB byte, we can't have a vector register for index.  */
6533   if (!t->opcode_modifier.sib
6534       && i.index_reg
6535       && (i.index_reg->reg_type.bitfield.xmmword
6536           || i.index_reg->reg_type.bitfield.ymmword
6537           || i.index_reg->reg_type.bitfield.zmmword))
6538     {
6539       i.error = unsupported_vector_index_register;
6540       return 1;
6541     }
6542
6543   /* Check if default mask is allowed.  */
6544   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
6545       && (!i.mask.reg || i.mask.reg->reg_num == 0))
6546     {
6547       i.error = no_default_mask;
6548       return 1;
6549     }
6550
6551   /* For VSIB byte, we need a vector register for index, and all vector
6552      registers must be distinct.  */
6553   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
6554     {
6555       if (!i.index_reg
6556           || !((t->opcode_modifier.sib == VECSIB128
6557                 && i.index_reg->reg_type.bitfield.xmmword)
6558                || (t->opcode_modifier.sib == VECSIB256
6559                    && i.index_reg->reg_type.bitfield.ymmword)
6560                || (t->opcode_modifier.sib == VECSIB512
6561                    && i.index_reg->reg_type.bitfield.zmmword)))
6562       {
6563         i.error = invalid_vsib_address;
6564         return 1;
6565       }
6566
6567       gas_assert (i.reg_operands == 2 || i.mask.reg);
6568       if (i.reg_operands == 2 && !i.mask.reg)
6569         {
6570           gas_assert (i.types[0].bitfield.class == RegSIMD);
6571           gas_assert (i.types[0].bitfield.xmmword
6572                       || i.types[0].bitfield.ymmword);
6573           gas_assert (i.types[2].bitfield.class == RegSIMD);
6574           gas_assert (i.types[2].bitfield.xmmword
6575                       || i.types[2].bitfield.ymmword);
6576           if (operand_check == check_none)
6577             return 0;
6578           if (register_number (i.op[0].regs)
6579               != register_number (i.index_reg)
6580               && register_number (i.op[2].regs)
6581                  != register_number (i.index_reg)
6582               && register_number (i.op[0].regs)
6583                  != register_number (i.op[2].regs))
6584             return 0;
6585           if (operand_check == check_error)
6586             {
6587               i.error = invalid_vector_register_set;
6588               return 1;
6589             }
6590           as_warn (_("mask, index, and destination registers should be distinct"));
6591         }
6592       else if (i.reg_operands == 1 && i.mask.reg)
6593         {
6594           if (i.types[1].bitfield.class == RegSIMD
6595               && (i.types[1].bitfield.xmmword
6596                   || i.types[1].bitfield.ymmword
6597                   || i.types[1].bitfield.zmmword)
6598               && (register_number (i.op[1].regs)
6599                   == register_number (i.index_reg)))
6600             {
6601               if (operand_check == check_error)
6602                 {
6603                   i.error = invalid_vector_register_set;
6604                   return 1;
6605                 }
6606               if (operand_check != check_none)
6607                 as_warn (_("index and destination registers should be distinct"));
6608             }
6609         }
6610     }
6611
6612   /* For AMX instructions with 3 TMM register operands, all operands
6613       must be distinct.  */
6614   if (i.reg_operands == 3
6615       && t->operand_types[0].bitfield.tmmword
6616       && (i.op[0].regs == i.op[1].regs
6617           || i.op[0].regs == i.op[2].regs
6618           || i.op[1].regs == i.op[2].regs))
6619     {
6620       i.error = invalid_tmm_register_set;
6621       return 1;
6622     }
6623
6624   /* For some special instructions require that destination must be distinct
6625      from source registers.  */
6626   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
6627     {
6628       unsigned int dest_reg = i.operands - 1;
6629
6630       know (i.operands >= 3);
6631
6632       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
6633       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
6634           || (i.reg_operands > 2
6635               && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
6636         {
6637           i.error = invalid_dest_and_src_register_set;
6638           return 1;
6639         }
6640     }
6641
6642   /* Check if broadcast is supported by the instruction and is applied
6643      to the memory operand.  */
6644   if (i.broadcast.type || i.broadcast.bytes)
6645     {
6646       i386_operand_type type, overlap;
6647
6648       /* Check if specified broadcast is supported in this instruction,
6649          and its broadcast bytes match the memory operand.  */
6650       op = i.broadcast.operand;
6651       if (!t->opcode_modifier.broadcast
6652           || !(i.flags[op] & Operand_Mem)
6653           || (!i.types[op].bitfield.unspecified
6654               && !match_broadcast_size (t, op)))
6655         {
6656         bad_broadcast:
6657           i.error = unsupported_broadcast;
6658           return 1;
6659         }
6660
6661       operand_type_set (&type, 0);
6662       switch (get_broadcast_bytes (t, false))
6663         {
6664         case 2:
6665           type.bitfield.word = 1;
6666           break;
6667         case 4:
6668           type.bitfield.dword = 1;
6669           break;
6670         case 8:
6671           type.bitfield.qword = 1;
6672           break;
6673         case 16:
6674           type.bitfield.xmmword = 1;
6675           break;
6676         case 32:
6677           if (vector_size < VSZ256)
6678             goto bad_broadcast;
6679           type.bitfield.ymmword = 1;
6680           break;
6681         case 64:
6682           if (vector_size < VSZ512)
6683             goto bad_broadcast;
6684           type.bitfield.zmmword = 1;
6685           break;
6686         default:
6687           goto bad_broadcast;
6688         }
6689
6690       overlap = operand_type_and (type, t->operand_types[op]);
6691       if (t->operand_types[op].bitfield.class == RegSIMD
6692           && t->operand_types[op].bitfield.byte
6693              + t->operand_types[op].bitfield.word
6694              + t->operand_types[op].bitfield.dword
6695              + t->operand_types[op].bitfield.qword > 1)
6696         {
6697           overlap.bitfield.xmmword = 0;
6698           overlap.bitfield.ymmword = 0;
6699           overlap.bitfield.zmmword = 0;
6700         }
6701       if (operand_type_all_zero (&overlap))
6702           goto bad_broadcast;
6703
6704       if (t->opcode_modifier.checkoperandsize)
6705         {
6706           unsigned int j;
6707
6708           type.bitfield.baseindex = 1;
6709           for (j = 0; j < i.operands; ++j)
6710             {
6711               if (j != op
6712                   && !operand_type_register_match(i.types[j],
6713                                                   t->operand_types[j],
6714                                                   type,
6715                                                   t->operand_types[op]))
6716                 goto bad_broadcast;
6717             }
6718         }
6719     }
6720   /* If broadcast is supported in this instruction, we need to check if
6721      operand of one-element size isn't specified without broadcast.  */
6722   else if (t->opcode_modifier.broadcast && i.mem_operands)
6723     {
6724       /* Find memory operand.  */
6725       for (op = 0; op < i.operands; op++)
6726         if (i.flags[op] & Operand_Mem)
6727           break;
6728       gas_assert (op < i.operands);
6729       /* Check size of the memory operand.  */
6730       if (match_broadcast_size (t, op))
6731         {
6732           i.error = broadcast_needed;
6733           return 1;
6734         }
6735     }
6736   else
6737     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
6738
6739   /* Check if requested masking is supported.  */
6740   if (i.mask.reg)
6741     {
6742       if (!t->opcode_modifier.masking)
6743         {
6744           i.error = unsupported_masking;
6745           return 1;
6746         }
6747
6748       /* Common rules for masking:
6749          - mask register destinations permit only zeroing-masking, without
6750            that actually being expressed by a {z} operand suffix or EVEX.z,
6751          - memory destinations allow only merging-masking,
6752          - scatter/gather insns (i.e. ones using vSIB) only allow merging-
6753            masking.  */
6754       if (i.mask.zeroing
6755           && (t->operand_types[t->operands - 1].bitfield.class == RegMask
6756               || (i.flags[t->operands - 1] & Operand_Mem)
6757               || t->opcode_modifier.sib))
6758         {
6759           i.error = unsupported_masking;
6760           return 1;
6761         }
6762     }
6763
6764   /* Check if masking is applied to dest operand.  */
6765   if (i.mask.reg && (i.mask.operand != i.operands - 1))
6766     {
6767       i.error = mask_not_on_destination;
6768       return 1;
6769     }
6770
6771   /* Check RC/SAE.  */
6772   if (i.rounding.type != rc_none)
6773     {
6774       if (!t->opcode_modifier.sae
6775           || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
6776           || i.mem_operands)
6777         {
6778           i.error = unsupported_rc_sae;
6779           return 1;
6780         }
6781
6782       /* Non-EVEX.LIG forms need to have a ZMM register as at least one
6783          operand.  */
6784       if (t->opcode_modifier.evex != EVEXLIG)
6785         {
6786           for (op = 0; op < t->operands; ++op)
6787             if (i.types[op].bitfield.zmmword)
6788               break;
6789           if (op >= t->operands)
6790             {
6791               i.error = operand_size_mismatch;
6792               return 1;
6793             }
6794         }
6795     }
6796
6797   /* Check the special Imm4 cases; must be the first operand.  */
6798   if (is_cpu (t, CpuXOP) && t->operands == 5)
6799     {
6800       if (i.op[0].imms->X_op != O_constant
6801           || !fits_in_imm4 (i.op[0].imms->X_add_number))
6802         {
6803           i.error = bad_imm4;
6804           return 1;
6805         }
6806
6807       /* Turn off Imm<N> so that update_imm won't complain.  */
6808       operand_type_set (&i.types[0], 0);
6809     }
6810
6811   /* Check vector Disp8 operand.  */
6812   if (t->opcode_modifier.disp8memshift
6813       && i.disp_encoding <= disp_encoding_8bit)
6814     {
6815       if (i.broadcast.type || i.broadcast.bytes)
6816         i.memshift = t->opcode_modifier.broadcast - 1;
6817       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
6818         i.memshift = t->opcode_modifier.disp8memshift;
6819       else
6820         {
6821           const i386_operand_type *type = NULL, *fallback = NULL;
6822
6823           i.memshift = 0;
6824           for (op = 0; op < i.operands; op++)
6825             if (i.flags[op] & Operand_Mem)
6826               {
6827                 if (t->opcode_modifier.evex == EVEXLIG)
6828                   i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
6829                 else if (t->operand_types[op].bitfield.xmmword
6830                          + t->operand_types[op].bitfield.ymmword
6831                          + t->operand_types[op].bitfield.zmmword <= 1)
6832                   type = &t->operand_types[op];
6833                 else if (!i.types[op].bitfield.unspecified)
6834                   type = &i.types[op];
6835                 else /* Ambiguities get resolved elsewhere.  */
6836                   fallback = &t->operand_types[op];
6837               }
6838             else if (i.types[op].bitfield.class == RegSIMD
6839                      && t->opcode_modifier.evex != EVEXLIG)
6840               {
6841                 if (i.types[op].bitfield.zmmword)
6842                   i.memshift = 6;
6843                 else if (i.types[op].bitfield.ymmword && i.memshift < 5)
6844                   i.memshift = 5;
6845                 else if (i.types[op].bitfield.xmmword && i.memshift < 4)
6846                   i.memshift = 4;
6847               }
6848
6849           if (!type && !i.memshift)
6850             type = fallback;
6851           if (type)
6852             {
6853               if (type->bitfield.zmmword)
6854                 i.memshift = 6;
6855               else if (type->bitfield.ymmword)
6856                 i.memshift = 5;
6857               else if (type->bitfield.xmmword)
6858                 i.memshift = 4;
6859             }
6860
6861           /* For the check in fits_in_disp8().  */
6862           if (i.memshift == 0)
6863             i.memshift = -1;
6864         }
6865
6866       for (op = 0; op < i.operands; op++)
6867         if (operand_type_check (i.types[op], disp)
6868             && i.op[op].disps->X_op == O_constant)
6869           {
6870             if (fits_in_disp8 (i.op[op].disps->X_add_number))
6871               {
6872                 i.types[op].bitfield.disp8 = 1;
6873                 return 0;
6874               }
6875             i.types[op].bitfield.disp8 = 0;
6876           }
6877     }
6878
6879   i.memshift = 0;
6880
6881   return 0;
6882 }
6883
6884 /* Check if encoding requirements are met by the instruction.  */
6885
6886 static int
6887 VEX_check_encoding (const insn_template *t)
6888 {
6889   if (i.vec_encoding == vex_encoding_error)
6890     {
6891       i.error = unsupported;
6892       return 1;
6893     }
6894
6895   /* Vector size restrictions.  */
6896   if ((vector_size < VSZ512
6897        && (t->opcode_modifier.evex == EVEX512
6898            || t->opcode_modifier.vsz >= VSZ512))
6899       || (vector_size < VSZ256
6900           && (t->opcode_modifier.evex == EVEX256
6901               || t->opcode_modifier.vex == VEX256
6902               || t->opcode_modifier.vsz >= VSZ256)))
6903     {
6904       i.error = unsupported;
6905       return 1;
6906     }
6907
6908   if (i.vec_encoding == vex_encoding_evex)
6909     {
6910       /* This instruction must be encoded with EVEX prefix.  */
6911       if (!is_evex_encoding (t))
6912         {
6913           i.error = unsupported;
6914           return 1;
6915         }
6916       return 0;
6917     }
6918
6919   if (!t->opcode_modifier.vex)
6920     {
6921       /* This instruction template doesn't have VEX prefix.  */
6922       if (i.vec_encoding != vex_encoding_default)
6923         {
6924           i.error = unsupported;
6925           return 1;
6926         }
6927       return 0;
6928     }
6929
6930   return 0;
6931 }
6932
6933 /* Helper function for the progress() macro in match_template().  */
6934 static INLINE enum i386_error progress (enum i386_error new,
6935                                         enum i386_error last,
6936                                         unsigned int line, unsigned int *line_p)
6937 {
6938   if (line <= *line_p)
6939     return last;
6940   *line_p = line;
6941   return new;
6942 }
6943
6944 static const insn_template *
6945 match_template (char mnem_suffix)
6946 {
6947   /* Points to template once we've found it.  */
6948   const insn_template *t;
6949   i386_operand_type overlap0, overlap1, overlap2, overlap3;
6950   i386_operand_type overlap4;
6951   unsigned int found_reverse_match;
6952   i386_operand_type operand_types [MAX_OPERANDS];
6953   int addr_prefix_disp;
6954   unsigned int j, size_match, check_register, errline = __LINE__;
6955   enum i386_error specific_error = number_of_operands_mismatch;
6956 #define progress(err) progress (err, specific_error, __LINE__, &errline)
6957
6958 #if MAX_OPERANDS != 5
6959 # error "MAX_OPERANDS must be 5."
6960 #endif
6961
6962   found_reverse_match = 0;
6963   addr_prefix_disp = -1;
6964
6965   for (t = current_templates->start; t < current_templates->end; t++)
6966     {
6967       addr_prefix_disp = -1;
6968       found_reverse_match = 0;
6969
6970       /* Must have right number of operands.  */
6971       if (i.operands != t->operands)
6972         continue;
6973
6974       /* Check processor support.  */
6975       specific_error = progress (unsupported);
6976       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
6977         continue;
6978
6979       /* Check AT&T mnemonic.   */
6980       specific_error = progress (unsupported_with_intel_mnemonic);
6981       if (intel_mnemonic && t->opcode_modifier.attmnemonic)
6982         continue;
6983
6984       /* Check AT&T/Intel syntax.  */
6985       specific_error = progress (unsupported_syntax);
6986       if ((intel_syntax && t->opcode_modifier.attsyntax)
6987           || (!intel_syntax && t->opcode_modifier.intelsyntax))
6988         continue;
6989
6990       /* Check Intel64/AMD64 ISA.   */
6991       switch (isa64)
6992         {
6993         default:
6994           /* Default: Don't accept Intel64.  */
6995           if (t->opcode_modifier.isa64 == INTEL64)
6996             continue;
6997           break;
6998         case amd64:
6999           /* -mamd64: Don't accept Intel64 and Intel64 only.  */
7000           if (t->opcode_modifier.isa64 >= INTEL64)
7001             continue;
7002           break;
7003         case intel64:
7004           /* -mintel64: Don't accept AMD64.  */
7005           if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
7006             continue;
7007           break;
7008         }
7009
7010       /* Check the suffix.  */
7011       specific_error = progress (invalid_instruction_suffix);
7012       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
7013           || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
7014           || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
7015           || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
7016           || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
7017         continue;
7018
7019       specific_error = progress (operand_size_mismatch);
7020       size_match = operand_size_match (t);
7021       if (!size_match)
7022         continue;
7023
7024       /* This is intentionally not
7025
7026          if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
7027
7028          as the case of a missing * on the operand is accepted (perhaps with
7029          a warning, issued further down).  */
7030       specific_error = progress (operand_type_mismatch);
7031       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
7032         continue;
7033
7034       /* In Intel syntax, normally we can check for memory operand size when
7035          there is no mnemonic suffix.  But jmp and call have 2 different
7036          encodings with Dword memory operand size.  Skip the "near" one
7037          (permitting a register operand) when "far" was requested.  */
7038       if (i.far_branch
7039           && t->opcode_modifier.jump == JUMP_ABSOLUTE
7040           && t->operand_types[0].bitfield.class == Reg)
7041         continue;
7042
7043       for (j = 0; j < MAX_OPERANDS; j++)
7044         operand_types[j] = t->operand_types[j];
7045
7046       /* In general, don't allow 32-bit operands on pre-386.  */
7047       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
7048                                              : operand_size_mismatch);
7049       j = i.imm_operands + (t->operands > i.imm_operands + 1);
7050       if (i.suffix == LONG_MNEM_SUFFIX
7051           && !cpu_arch_flags.bitfield.cpui386
7052           && (intel_syntax
7053               ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
7054                  && !intel_float_operand (insn_name (t)))
7055               : intel_float_operand (insn_name (t)) != 2)
7056           && (t->operands == i.imm_operands
7057               || (operand_types[i.imm_operands].bitfield.class != RegMMX
7058                && operand_types[i.imm_operands].bitfield.class != RegSIMD
7059                && operand_types[i.imm_operands].bitfield.class != RegMask)
7060               || (operand_types[j].bitfield.class != RegMMX
7061                   && operand_types[j].bitfield.class != RegSIMD
7062                   && operand_types[j].bitfield.class != RegMask))
7063           && !t->opcode_modifier.sib)
7064         continue;
7065
7066       /* Do not verify operands when there are none.  */
7067       if (!t->operands)
7068         {
7069           if (VEX_check_encoding (t))
7070             {
7071               specific_error = progress (i.error);
7072               continue;
7073             }
7074
7075           /* We've found a match; break out of loop.  */
7076           break;
7077         }
7078
7079       if (!t->opcode_modifier.jump
7080           || t->opcode_modifier.jump == JUMP_ABSOLUTE)
7081         {
7082           /* There should be only one Disp operand.  */
7083           for (j = 0; j < MAX_OPERANDS; j++)
7084             if (operand_type_check (operand_types[j], disp))
7085               break;
7086           if (j < MAX_OPERANDS)
7087             {
7088               bool override = (i.prefix[ADDR_PREFIX] != 0);
7089
7090               addr_prefix_disp = j;
7091
7092               /* Address size prefix will turn Disp64 operand into Disp32 and
7093                  Disp32/Disp16 one into Disp16/Disp32 respectively.  */
7094               switch (flag_code)
7095                 {
7096                 case CODE_16BIT:
7097                   override = !override;
7098                   /* Fall through.  */
7099                 case CODE_32BIT:
7100                   if (operand_types[j].bitfield.disp32
7101                       && operand_types[j].bitfield.disp16)
7102                     {
7103                       operand_types[j].bitfield.disp16 = override;
7104                       operand_types[j].bitfield.disp32 = !override;
7105                     }
7106                   gas_assert (!operand_types[j].bitfield.disp64);
7107                   break;
7108
7109                 case CODE_64BIT:
7110                   if (operand_types[j].bitfield.disp64)
7111                     {
7112                       gas_assert (!operand_types[j].bitfield.disp32);
7113                       operand_types[j].bitfield.disp32 = override;
7114                       operand_types[j].bitfield.disp64 = !override;
7115                     }
7116                   operand_types[j].bitfield.disp16 = 0;
7117                   break;
7118                 }
7119             }
7120         }
7121
7122       /* We check register size if needed.  */
7123       if (t->opcode_modifier.checkoperandsize)
7124         {
7125           check_register = (1 << t->operands) - 1;
7126           if (i.broadcast.type || i.broadcast.bytes)
7127             check_register &= ~(1 << i.broadcast.operand);
7128         }
7129       else
7130         check_register = 0;
7131
7132       overlap0 = operand_type_and (i.types[0], operand_types[0]);
7133       switch (t->operands)
7134         {
7135         case 1:
7136           if (!operand_type_match (overlap0, i.types[0]))
7137             continue;
7138
7139           /* Allow the ModR/M encoding to be requested by using the {load} or
7140              {store} pseudo prefix on an applicable insn.  */
7141           if (!t->opcode_modifier.modrm
7142               && i.reg_operands == 1
7143               && ((i.dir_encoding == dir_encoding_load
7144                    && t->mnem_off != MN_pop)
7145                   || (i.dir_encoding == dir_encoding_store
7146                       && t->mnem_off != MN_push))
7147               /* Avoid BSWAP.  */
7148               && t->mnem_off != MN_bswap)
7149             continue;
7150           break;
7151
7152         case 2:
7153           /* xchg %eax, %eax is a special case. It is an alias for nop
7154              only in 32bit mode and we can use opcode 0x90.  In 64bit
7155              mode, we can't use 0x90 for xchg %eax, %eax since it should
7156              zero-extend %eax to %rax.  */
7157           if (t->base_opcode == 0x90
7158               && t->opcode_space == SPACE_BASE)
7159             {
7160               if (flag_code == CODE_64BIT
7161                   && i.types[0].bitfield.instance == Accum
7162                   && i.types[0].bitfield.dword
7163                   && i.types[1].bitfield.instance == Accum)
7164                 continue;
7165
7166               /* Allow the ModR/M encoding to be requested by using the
7167                  {load} or {store} pseudo prefix.  */
7168               if (i.dir_encoding == dir_encoding_load
7169                   || i.dir_encoding == dir_encoding_store)
7170                 continue;
7171             }
7172
7173           if (t->base_opcode == MOV_AX_DISP32
7174               && t->opcode_space == SPACE_BASE
7175               && t->mnem_off != MN_movabs)
7176             {
7177               /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
7178               if (i.reloc[0] == BFD_RELOC_386_GOT32)
7179                 continue;
7180
7181               /* xrelease mov %eax, <disp> is another special case. It must not
7182                  match the accumulator-only encoding of mov.  */
7183               if (i.hle_prefix)
7184                 continue;
7185
7186               /* Allow the ModR/M encoding to be requested by using a suitable
7187                  {load} or {store} pseudo prefix.  */
7188               if (i.dir_encoding == (i.types[0].bitfield.instance == Accum
7189                                      ? dir_encoding_store
7190                                      : dir_encoding_load)
7191                   && !i.types[0].bitfield.disp64
7192                   && !i.types[1].bitfield.disp64)
7193                 continue;
7194             }
7195
7196           /* Allow the ModR/M encoding to be requested by using the {load} or
7197              {store} pseudo prefix on an applicable insn.  */
7198           if (!t->opcode_modifier.modrm
7199               && i.reg_operands == 1
7200               && i.imm_operands == 1
7201               && (i.dir_encoding == dir_encoding_load
7202                   || i.dir_encoding == dir_encoding_store)
7203               && t->opcode_space == SPACE_BASE)
7204             {
7205               if (t->base_opcode == 0xb0 /* mov $imm, %reg */
7206                   && i.dir_encoding == dir_encoding_store)
7207                 continue;
7208
7209               if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
7210                   && (t->base_opcode != 0x3c /* cmp $imm, %acc */
7211                       || i.dir_encoding == dir_encoding_load))
7212                 continue;
7213
7214               if (t->base_opcode == 0xa8 /* test $imm, %acc */
7215                   && i.dir_encoding == dir_encoding_load)
7216                 continue;
7217             }
7218           /* Fall through.  */
7219
7220         case 3:
7221           if (!(size_match & MATCH_STRAIGHT))
7222             goto check_reverse;
7223           /* Reverse direction of operands if swapping is possible in the first
7224              place (operands need to be symmetric) and
7225              - the load form is requested, and the template is a store form,
7226              - the store form is requested, and the template is a load form,
7227              - the non-default (swapped) form is requested.  */
7228           overlap1 = operand_type_and (operand_types[0], operand_types[1]);
7229           if (t->opcode_modifier.d && i.reg_operands == i.operands
7230               && !operand_type_all_zero (&overlap1))
7231             switch (i.dir_encoding)
7232               {
7233               case dir_encoding_load:
7234                 if (operand_type_check (operand_types[i.operands - 1], anymem)
7235                     || t->opcode_modifier.regmem)
7236                   goto check_reverse;
7237                 break;
7238
7239               case dir_encoding_store:
7240                 if (!operand_type_check (operand_types[i.operands - 1], anymem)
7241                     && !t->opcode_modifier.regmem)
7242                   goto check_reverse;
7243                 break;
7244
7245               case dir_encoding_swap:
7246                 goto check_reverse;
7247
7248               case dir_encoding_default:
7249                 break;
7250               }
7251           /* If we want store form, we skip the current load.  */
7252           if ((i.dir_encoding == dir_encoding_store
7253                || i.dir_encoding == dir_encoding_swap)
7254               && i.mem_operands == 0
7255               && t->opcode_modifier.load)
7256             continue;
7257           /* Fall through.  */
7258         case 4:
7259         case 5:
7260           overlap1 = operand_type_and (i.types[1], operand_types[1]);
7261           if (!operand_type_match (overlap0, i.types[0])
7262               || !operand_type_match (overlap1, i.types[1])
7263               || ((check_register & 3) == 3
7264                   && !operand_type_register_match (i.types[0],
7265                                                    operand_types[0],
7266                                                    i.types[1],
7267                                                    operand_types[1])))
7268             {
7269               specific_error = progress (i.error);
7270
7271               /* Check if other direction is valid ...  */
7272               if (!t->opcode_modifier.d)
7273                 continue;
7274
7275             check_reverse:
7276               if (!(size_match & MATCH_REVERSE))
7277                 continue;
7278               /* Try reversing direction of operands.  */
7279               j = is_cpu (t, CpuFMA4)
7280                   || is_cpu (t, CpuXOP) ? 1 : i.operands - 1;
7281               overlap0 = operand_type_and (i.types[0], operand_types[j]);
7282               overlap1 = operand_type_and (i.types[j], operand_types[0]);
7283               overlap2 = operand_type_and (i.types[1], operand_types[1]);
7284               gas_assert (t->operands != 3 || !check_register);
7285               if (!operand_type_match (overlap0, i.types[0])
7286                   || !operand_type_match (overlap1, i.types[j])
7287                   || (t->operands == 3
7288                       && !operand_type_match (overlap2, i.types[1]))
7289                   || (check_register
7290                       && !operand_type_register_match (i.types[0],
7291                                                        operand_types[j],
7292                                                        i.types[j],
7293                                                        operand_types[0])))
7294                 {
7295                   /* Does not match either direction.  */
7296                   specific_error = progress (i.error);
7297                   continue;
7298                 }
7299               /* found_reverse_match holds which variant of D
7300                  we've found.  */
7301               if (!t->opcode_modifier.d)
7302                 found_reverse_match = 0;
7303               else if (operand_types[0].bitfield.tbyte)
7304                 {
7305                   if (t->opcode_modifier.operandconstraint != UGH)
7306                     found_reverse_match = Opcode_FloatD;
7307                   else
7308                     found_reverse_match = ~0;
7309                   /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
7310                   if ((t->extension_opcode & 4)
7311                       && (intel_syntax || intel_mnemonic))
7312                     found_reverse_match |= Opcode_FloatR;
7313                 }
7314               else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
7315                 {
7316                   found_reverse_match = Opcode_VexW;
7317                   goto check_operands_345;
7318                 }
7319               else if (t->opcode_space != SPACE_BASE
7320                        && (t->opcode_space != SPACE_0F
7321                            /* MOV to/from CR/DR/TR, as an exception, follow
7322                               the base opcode space encoding model.  */
7323                            || (t->base_opcode | 7) != 0x27))
7324                 found_reverse_match = (t->base_opcode & 0xee) != 0x6e
7325                                       ? Opcode_ExtD : Opcode_SIMD_IntD;
7326               else if (!t->opcode_modifier.commutative)
7327                 found_reverse_match = Opcode_D;
7328               else
7329                 found_reverse_match = ~0;
7330             }
7331           else
7332             {
7333               /* Found a forward 2 operand match here.  */
7334             check_operands_345:
7335               switch (t->operands)
7336                 {
7337                 case 5:
7338                   overlap4 = operand_type_and (i.types[4], operand_types[4]);
7339                   if (!operand_type_match (overlap4, i.types[4])
7340                       || !operand_type_register_match (i.types[3],
7341                                                        operand_types[3],
7342                                                        i.types[4],
7343                                                        operand_types[4]))
7344                     {
7345                       specific_error = progress (i.error);
7346                       continue;
7347                     }
7348                   /* Fall through.  */
7349                 case 4:
7350                   overlap3 = operand_type_and (i.types[3], operand_types[3]);
7351                   if (!operand_type_match (overlap3, i.types[3])
7352                       || ((check_register & 0xa) == 0xa
7353                           && !operand_type_register_match (i.types[1],
7354                                                             operand_types[1],
7355                                                             i.types[3],
7356                                                             operand_types[3]))
7357                       || ((check_register & 0xc) == 0xc
7358                           && !operand_type_register_match (i.types[2],
7359                                                             operand_types[2],
7360                                                             i.types[3],
7361                                                             operand_types[3])))
7362                     {
7363                       specific_error = progress (i.error);
7364                       continue;
7365                     }
7366                   /* Fall through.  */
7367                 case 3:
7368                   overlap2 = operand_type_and (i.types[2], operand_types[2]);
7369                   if (!operand_type_match (overlap2, i.types[2])
7370                       || ((check_register & 5) == 5
7371                           && !operand_type_register_match (i.types[0],
7372                                                             operand_types[0],
7373                                                             i.types[2],
7374                                                             operand_types[2]))
7375                       || ((check_register & 6) == 6
7376                           && !operand_type_register_match (i.types[1],
7377                                                             operand_types[1],
7378                                                             i.types[2],
7379                                                             operand_types[2])))
7380                     {
7381                       specific_error = progress (i.error);
7382                       continue;
7383                     }
7384                   break;
7385                 }
7386             }
7387           /* Found either forward/reverse 2, 3 or 4 operand match here:
7388              slip through to break.  */
7389         }
7390
7391       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
7392       if (VEX_check_encoding (t))
7393         {
7394           specific_error = progress (i.error);
7395           continue;
7396         }
7397
7398       /* Check if vector operands are valid.  */
7399       if (check_VecOperands (t))
7400         {
7401           specific_error = progress (i.error);
7402           continue;
7403         }
7404
7405       /* We've found a match; break out of loop.  */
7406       break;
7407     }
7408
7409 #undef progress
7410
7411   if (t == current_templates->end)
7412     {
7413       /* We found no match.  */
7414       i.error = specific_error;
7415       return NULL;
7416     }
7417
7418   if (!quiet_warnings)
7419     {
7420       if (!intel_syntax
7421           && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
7422         as_warn (_("indirect %s without `*'"), insn_name (t));
7423
7424       if (t->opcode_modifier.isprefix
7425           && t->opcode_modifier.mnemonicsize == IGNORESIZE)
7426         {
7427           /* Warn them that a data or address size prefix doesn't
7428              affect assembly of the next line of code.  */
7429           as_warn (_("stand-alone `%s' prefix"), insn_name (t));
7430         }
7431     }
7432
7433   /* Copy the template we found.  */
7434   install_template (t);
7435
7436   if (addr_prefix_disp != -1)
7437     i.tm.operand_types[addr_prefix_disp]
7438       = operand_types[addr_prefix_disp];
7439
7440   switch (found_reverse_match)
7441     {
7442     case 0:
7443       break;
7444
7445     case Opcode_FloatR:
7446     case Opcode_FloatR | Opcode_FloatD:
7447       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
7448       found_reverse_match &= Opcode_FloatD;
7449
7450       /* Fall through.  */
7451     default:
7452       /* If we found a reverse match we must alter the opcode direction
7453          bit and clear/flip the regmem modifier one.  found_reverse_match
7454          holds bits to change (different for int & float insns).  */
7455
7456       i.tm.base_opcode ^= found_reverse_match;
7457
7458       /* Certain SIMD insns have their load forms specified in the opcode
7459          table, and hence we need to _set_ RegMem instead of clearing it.
7460          We need to avoid setting the bit though on insns like KMOVW.  */
7461       i.tm.opcode_modifier.regmem
7462         = i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
7463           && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
7464           && !i.tm.opcode_modifier.regmem;
7465
7466       /* Fall through.  */
7467     case ~0:
7468       i.tm.operand_types[0] = operand_types[i.operands - 1];
7469       i.tm.operand_types[i.operands - 1] = operand_types[0];
7470       break;
7471
7472     case Opcode_VexW:
7473       /* Only the first two register operands need reversing, alongside
7474          flipping VEX.W.  */
7475       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
7476
7477       j = i.tm.operand_types[0].bitfield.imm8;
7478       i.tm.operand_types[j] = operand_types[j + 1];
7479       i.tm.operand_types[j + 1] = operand_types[j];
7480       break;
7481     }
7482
7483   return t;
7484 }
7485
7486 static int
7487 check_string (void)
7488 {
7489   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
7490   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
7491
7492   if (i.seg[op] != NULL && i.seg[op] != reg_es)
7493     {
7494       as_bad (_("`%s' operand %u must use `%ses' segment"),
7495               insn_name (&i.tm),
7496               intel_syntax ? i.tm.operands - es_op : es_op + 1,
7497               register_prefix);
7498       return 0;
7499     }
7500
7501   /* There's only ever one segment override allowed per instruction.
7502      This instruction possibly has a legal segment override on the
7503      second operand, so copy the segment to where non-string
7504      instructions store it, allowing common code.  */
7505   i.seg[op] = i.seg[1];
7506
7507   return 1;
7508 }
7509
7510 static int
7511 process_suffix (void)
7512 {
7513   bool is_movx = false;
7514
7515   /* If matched instruction specifies an explicit instruction mnemonic
7516      suffix, use it.  */
7517   if (i.tm.opcode_modifier.size == SIZE16)
7518     i.suffix = WORD_MNEM_SUFFIX;
7519   else if (i.tm.opcode_modifier.size == SIZE32)
7520     i.suffix = LONG_MNEM_SUFFIX;
7521   else if (i.tm.opcode_modifier.size == SIZE64)
7522     i.suffix = QWORD_MNEM_SUFFIX;
7523   else if (i.reg_operands
7524            && (i.operands > 1 || i.types[0].bitfield.class == Reg)
7525            && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
7526     {
7527       unsigned int numop = i.operands;
7528
7529       /* MOVSX/MOVZX */
7530       is_movx = (i.tm.opcode_space == SPACE_0F
7531                  && (i.tm.base_opcode | 8) == 0xbe)
7532                 || (i.tm.opcode_space == SPACE_BASE
7533                     && i.tm.base_opcode == 0x63
7534                     && is_cpu (&i.tm, Cpu64));
7535
7536       /* movsx/movzx want only their source operand considered here, for the
7537          ambiguity checking below.  The suffix will be replaced afterwards
7538          to represent the destination (register).  */
7539       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
7540         --i.operands;
7541
7542       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
7543       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
7544         i.rex |= REX_W;
7545
7546       /* If there's no instruction mnemonic suffix we try to invent one
7547          based on GPR operands.  */
7548       if (!i.suffix)
7549         {
7550           /* We take i.suffix from the last register operand specified,
7551              Destination register type is more significant than source
7552              register type.  crc32 in SSE4.2 prefers source register
7553              type. */
7554           unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
7555
7556           while (op--)
7557             if (i.tm.operand_types[op].bitfield.instance == InstanceNone
7558                 || i.tm.operand_types[op].bitfield.instance == Accum)
7559               {
7560                 if (i.types[op].bitfield.class != Reg)
7561                   continue;
7562                 if (i.types[op].bitfield.byte)
7563                   i.suffix = BYTE_MNEM_SUFFIX;
7564                 else if (i.types[op].bitfield.word)
7565                   i.suffix = WORD_MNEM_SUFFIX;
7566                 else if (i.types[op].bitfield.dword)
7567                   i.suffix = LONG_MNEM_SUFFIX;
7568                 else if (i.types[op].bitfield.qword)
7569                   i.suffix = QWORD_MNEM_SUFFIX;
7570                 else
7571                   continue;
7572                 break;
7573               }
7574
7575           /* As an exception, movsx/movzx silently default to a byte source
7576              in AT&T mode.  */
7577           if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
7578             i.suffix = BYTE_MNEM_SUFFIX;
7579         }
7580       else if (i.suffix == BYTE_MNEM_SUFFIX)
7581         {
7582           if (!check_byte_reg ())
7583             return 0;
7584         }
7585       else if (i.suffix == LONG_MNEM_SUFFIX)
7586         {
7587           if (!check_long_reg ())
7588             return 0;
7589         }
7590       else if (i.suffix == QWORD_MNEM_SUFFIX)
7591         {
7592           if (!check_qword_reg ())
7593             return 0;
7594         }
7595       else if (i.suffix == WORD_MNEM_SUFFIX)
7596         {
7597           if (!check_word_reg ())
7598             return 0;
7599         }
7600       else if (intel_syntax
7601                && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
7602         /* Do nothing if the instruction is going to ignore the prefix.  */
7603         ;
7604       else
7605         abort ();
7606
7607       /* Undo the movsx/movzx change done above.  */
7608       i.operands = numop;
7609     }
7610   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
7611            && !i.suffix)
7612     {
7613       i.suffix = stackop_size;
7614       if (stackop_size == LONG_MNEM_SUFFIX)
7615         {
7616           /* stackop_size is set to LONG_MNEM_SUFFIX for the
7617              .code16gcc directive to support 16-bit mode with
7618              32-bit address.  For IRET without a suffix, generate
7619              16-bit IRET (opcode 0xcf) to return from an interrupt
7620              handler.  */
7621           if (i.tm.base_opcode == 0xcf)
7622             {
7623               i.suffix = WORD_MNEM_SUFFIX;
7624               as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
7625             }
7626           /* Warn about changed behavior for segment register push/pop.  */
7627           else if ((i.tm.base_opcode | 1) == 0x07)
7628             as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
7629                      insn_name (&i.tm));
7630         }
7631     }
7632   else if (!i.suffix
7633            && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
7634                || i.tm.opcode_modifier.jump == JUMP_BYTE
7635                || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
7636                || (i.tm.opcode_space == SPACE_0F
7637                    && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
7638                    && i.tm.extension_opcode <= 3)))
7639     {
7640       switch (flag_code)
7641         {
7642         case CODE_64BIT:
7643           if (!i.tm.opcode_modifier.no_qsuf)
7644             {
7645               if (i.tm.opcode_modifier.jump == JUMP_BYTE
7646                   || i.tm.opcode_modifier.no_lsuf)
7647                 i.suffix = QWORD_MNEM_SUFFIX;
7648               break;
7649             }
7650           /* Fall through.  */
7651         case CODE_32BIT:
7652           if (!i.tm.opcode_modifier.no_lsuf)
7653             i.suffix = LONG_MNEM_SUFFIX;
7654           break;
7655         case CODE_16BIT:
7656           if (!i.tm.opcode_modifier.no_wsuf)
7657             i.suffix = WORD_MNEM_SUFFIX;
7658           break;
7659         }
7660     }
7661
7662   if (!i.suffix
7663       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7664           /* Also cover lret/retf/iret in 64-bit mode.  */
7665           || (flag_code == CODE_64BIT
7666               && !i.tm.opcode_modifier.no_lsuf
7667               && !i.tm.opcode_modifier.no_qsuf))
7668       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7669       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
7670       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
7671       /* Accept FLDENV et al without suffix.  */
7672       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
7673     {
7674       unsigned int suffixes, evex = 0;
7675
7676       suffixes = !i.tm.opcode_modifier.no_bsuf;
7677       if (!i.tm.opcode_modifier.no_wsuf)
7678         suffixes |= 1 << 1;
7679       if (!i.tm.opcode_modifier.no_lsuf)
7680         suffixes |= 1 << 2;
7681       if (!i.tm.opcode_modifier.no_ssuf)
7682         suffixes |= 1 << 4;
7683       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
7684         suffixes |= 1 << 5;
7685
7686       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
7687          also suitable for AT&T syntax mode, it was requested that this be
7688          restricted to just Intel syntax.  */
7689       if (intel_syntax && is_any_vex_encoding (&i.tm)
7690           && !i.broadcast.type && !i.broadcast.bytes)
7691         {
7692           unsigned int op;
7693
7694           for (op = 0; op < i.tm.operands; ++op)
7695             {
7696               if (vector_size < VSZ512)
7697                 {
7698                   i.tm.operand_types[op].bitfield.zmmword = 0;
7699                   if (vector_size < VSZ256)
7700                     {
7701                       i.tm.operand_types[op].bitfield.ymmword = 0;
7702                       if (i.tm.operand_types[op].bitfield.xmmword
7703                           && (i.tm.opcode_modifier.evex == EVEXDYN
7704                               || (!i.tm.opcode_modifier.evex
7705                                   && is_evex_encoding (&i.tm))))
7706                         i.tm.opcode_modifier.evex = EVEX128;
7707                     }
7708                   else if (i.tm.operand_types[op].bitfield.ymmword
7709                            && !i.tm.operand_types[op].bitfield.xmmword
7710                            && (i.tm.opcode_modifier.evex == EVEXDYN
7711                                || (!i.tm.opcode_modifier.evex
7712                                    && is_evex_encoding (&i.tm))))
7713                     i.tm.opcode_modifier.evex = EVEX256;
7714                 }
7715               else if (is_evex_encoding (&i.tm)
7716                        && !cpu_arch_flags.bitfield.cpuavx512vl)
7717                 {
7718                   if (i.tm.operand_types[op].bitfield.ymmword)
7719                     i.tm.operand_types[op].bitfield.xmmword = 0;
7720                   if (i.tm.operand_types[op].bitfield.zmmword)
7721                     i.tm.operand_types[op].bitfield.ymmword = 0;
7722                   if (!i.tm.opcode_modifier.evex
7723                       || i.tm.opcode_modifier.evex == EVEXDYN)
7724                     i.tm.opcode_modifier.evex = EVEX512;
7725                 }
7726
7727               if (i.tm.operand_types[op].bitfield.xmmword
7728                   + i.tm.operand_types[op].bitfield.ymmword
7729                   + i.tm.operand_types[op].bitfield.zmmword < 2)
7730                 continue;
7731
7732               /* Any properly sized operand disambiguates the insn.  */
7733               if (i.types[op].bitfield.xmmword
7734                   || i.types[op].bitfield.ymmword
7735                   || i.types[op].bitfield.zmmword)
7736                 {
7737                   suffixes &= ~(7 << 6);
7738                   evex = 0;
7739                   break;
7740                 }
7741
7742               if ((i.flags[op] & Operand_Mem)
7743                   && i.tm.operand_types[op].bitfield.unspecified)
7744                 {
7745                   if (i.tm.operand_types[op].bitfield.xmmword)
7746                     suffixes |= 1 << 6;
7747                   if (i.tm.operand_types[op].bitfield.ymmword)
7748                     suffixes |= 1 << 7;
7749                   if (i.tm.operand_types[op].bitfield.zmmword)
7750                     suffixes |= 1 << 8;
7751                   if (is_evex_encoding (&i.tm))
7752                     evex = EVEX512;
7753                 }
7754             }
7755         }
7756
7757       /* Are multiple suffixes / operand sizes allowed?  */
7758       if (suffixes & (suffixes - 1))
7759         {
7760           if (intel_syntax
7761               && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
7762                   || operand_check == check_error))
7763             {
7764               as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
7765               return 0;
7766             }
7767           if (operand_check == check_error)
7768             {
7769               as_bad (_("no instruction mnemonic suffix given and "
7770                         "no register operands; can't size `%s'"), insn_name (&i.tm));
7771               return 0;
7772             }
7773           if (operand_check == check_warning)
7774             as_warn (_("%s; using default for `%s'"),
7775                        intel_syntax
7776                        ? _("ambiguous operand size")
7777                        : _("no instruction mnemonic suffix given and "
7778                            "no register operands"),
7779                        insn_name (&i.tm));
7780
7781           if (i.tm.opcode_modifier.floatmf)
7782             i.suffix = SHORT_MNEM_SUFFIX;
7783           else if (is_movx)
7784             /* handled below */;
7785           else if (evex)
7786             i.tm.opcode_modifier.evex = evex;
7787           else if (flag_code == CODE_16BIT)
7788             i.suffix = WORD_MNEM_SUFFIX;
7789           else if (!i.tm.opcode_modifier.no_lsuf)
7790             i.suffix = LONG_MNEM_SUFFIX;
7791           else
7792             i.suffix = QWORD_MNEM_SUFFIX;
7793         }
7794     }
7795
7796   if (is_movx)
7797     {
7798       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
7799          In AT&T syntax, if there is no suffix (warned about above), the default
7800          will be byte extension.  */
7801       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
7802         i.tm.base_opcode |= 1;
7803
7804       /* For further processing, the suffix should represent the destination
7805          (register).  This is already the case when one was used with
7806          mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
7807          no suffix to begin with.  */
7808       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
7809         {
7810           if (i.types[1].bitfield.word)
7811             i.suffix = WORD_MNEM_SUFFIX;
7812           else if (i.types[1].bitfield.qword)
7813             i.suffix = QWORD_MNEM_SUFFIX;
7814           else
7815             i.suffix = LONG_MNEM_SUFFIX;
7816
7817           i.tm.opcode_modifier.w = 0;
7818         }
7819     }
7820
7821   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
7822     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
7823                    != (i.tm.operand_types[1].bitfield.class == Reg);
7824
7825   /* Change the opcode based on the operand size given by i.suffix.  */
7826   switch (i.suffix)
7827     {
7828     /* Size floating point instruction.  */
7829     case LONG_MNEM_SUFFIX:
7830       if (i.tm.opcode_modifier.floatmf)
7831         {
7832           i.tm.base_opcode ^= 4;
7833           break;
7834         }
7835     /* fall through */
7836     case WORD_MNEM_SUFFIX:
7837     case QWORD_MNEM_SUFFIX:
7838       /* It's not a byte, select word/dword operation.  */
7839       if (i.tm.opcode_modifier.w)
7840         {
7841           if (i.short_form)
7842             i.tm.base_opcode |= 8;
7843           else
7844             i.tm.base_opcode |= 1;
7845         }
7846     /* fall through */
7847     case SHORT_MNEM_SUFFIX:
7848       /* Now select between word & dword operations via the operand
7849          size prefix, except for instructions that will ignore this
7850          prefix anyway.  */
7851       if (i.suffix != QWORD_MNEM_SUFFIX
7852           && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
7853           && !i.tm.opcode_modifier.floatmf
7854           && !is_any_vex_encoding (&i.tm)
7855           && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
7856               || (flag_code == CODE_64BIT
7857                   && i.tm.opcode_modifier.jump == JUMP_BYTE)))
7858         {
7859           unsigned int prefix = DATA_PREFIX_OPCODE;
7860
7861           if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
7862             prefix = ADDR_PREFIX_OPCODE;
7863
7864           if (!add_prefix (prefix))
7865             return 0;
7866         }
7867
7868       /* Set mode64 for an operand.  */
7869       if (i.suffix == QWORD_MNEM_SUFFIX
7870           && flag_code == CODE_64BIT
7871           && !i.tm.opcode_modifier.norex64
7872           && !i.tm.opcode_modifier.vexw
7873           /* Special case for xchg %rax,%rax.  It is NOP and doesn't
7874              need rex64. */
7875           && ! (i.operands == 2
7876                 && i.tm.base_opcode == 0x90
7877                 && i.tm.opcode_space == SPACE_BASE
7878                 && i.types[0].bitfield.instance == Accum
7879                 && i.types[0].bitfield.qword
7880                 && i.types[1].bitfield.instance == Accum))
7881         i.rex |= REX_W;
7882
7883       break;
7884
7885     case 0:
7886       /* Select word/dword/qword operation with explicit data sizing prefix
7887          when there are no suitable register operands.  */
7888       if (i.tm.opcode_modifier.w
7889           && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
7890           && (!i.reg_operands
7891               || (i.reg_operands == 1
7892                       /* ShiftCount */
7893                   && (i.tm.operand_types[0].bitfield.instance == RegC
7894                       /* InOutPortReg */
7895                       || i.tm.operand_types[0].bitfield.instance == RegD
7896                       || i.tm.operand_types[1].bitfield.instance == RegD
7897                       || i.tm.mnem_off == MN_crc32))))
7898         i.tm.base_opcode |= 1;
7899       break;
7900     }
7901
7902   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
7903     {
7904       gas_assert (!i.suffix);
7905       gas_assert (i.reg_operands);
7906
7907       if (i.tm.operand_types[0].bitfield.instance == Accum
7908           || i.operands == 1)
7909         {
7910           /* The address size override prefix changes the size of the
7911              first operand.  */
7912           if (flag_code == CODE_64BIT
7913               && i.op[0].regs->reg_type.bitfield.word)
7914             {
7915               as_bad (_("16-bit addressing unavailable for `%s'"),
7916                       insn_name (&i.tm));
7917               return 0;
7918             }
7919
7920           if ((flag_code == CODE_32BIT
7921                ? i.op[0].regs->reg_type.bitfield.word
7922                : i.op[0].regs->reg_type.bitfield.dword)
7923               && !add_prefix (ADDR_PREFIX_OPCODE))
7924             return 0;
7925         }
7926       else
7927         {
7928           /* Check invalid register operand when the address size override
7929              prefix changes the size of register operands.  */
7930           unsigned int op;
7931           enum { need_word, need_dword, need_qword } need;
7932
7933           /* Check the register operand for the address size prefix if
7934              the memory operand has no real registers, like symbol, DISP
7935              or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
7936           if (i.mem_operands == 1
7937               && i.reg_operands == 1
7938               && i.operands == 2
7939               && i.types[1].bitfield.class == Reg
7940               && (flag_code == CODE_32BIT
7941                   ? i.op[1].regs->reg_type.bitfield.word
7942                   : i.op[1].regs->reg_type.bitfield.dword)
7943               && ((i.base_reg == NULL && i.index_reg == NULL)
7944 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
7945                   || (x86_elf_abi == X86_64_X32_ABI
7946                       && i.base_reg
7947                       && i.base_reg->reg_num == RegIP
7948                       && i.base_reg->reg_type.bitfield.qword))
7949 #else
7950                   || 0)
7951 #endif
7952               && !add_prefix (ADDR_PREFIX_OPCODE))
7953             return 0;
7954
7955           if (flag_code == CODE_32BIT)
7956             need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
7957           else if (i.prefix[ADDR_PREFIX])
7958             need = need_dword;
7959           else
7960             need = flag_code == CODE_64BIT ? need_qword : need_word;
7961
7962           for (op = 0; op < i.operands; op++)
7963             {
7964               if (i.types[op].bitfield.class != Reg)
7965                 continue;
7966
7967               switch (need)
7968                 {
7969                 case need_word:
7970                   if (i.op[op].regs->reg_type.bitfield.word)
7971                     continue;
7972                   break;
7973                 case need_dword:
7974                   if (i.op[op].regs->reg_type.bitfield.dword)
7975                     continue;
7976                   break;
7977                 case need_qword:
7978                   if (i.op[op].regs->reg_type.bitfield.qword)
7979                     continue;
7980                   break;
7981                 }
7982
7983               as_bad (_("invalid register operand size for `%s'"),
7984                       insn_name (&i.tm));
7985               return 0;
7986             }
7987         }
7988     }
7989
7990   return 1;
7991 }
7992
7993 static int
7994 check_byte_reg (void)
7995 {
7996   int op;
7997
7998   for (op = i.operands; --op >= 0;)
7999     {
8000       /* Skip non-register operands. */
8001       if (i.types[op].bitfield.class != Reg)
8002         continue;
8003
8004       /* If this is an eight bit register, it's OK.  If it's the 16 or
8005          32 bit version of an eight bit register, we will just use the
8006          low portion, and that's OK too.  */
8007       if (i.types[op].bitfield.byte)
8008         continue;
8009
8010       /* I/O port address operands are OK too.  */
8011       if (i.tm.operand_types[op].bitfield.instance == RegD
8012           && i.tm.operand_types[op].bitfield.word)
8013         continue;
8014
8015       /* crc32 only wants its source operand checked here.  */
8016       if (i.tm.mnem_off == MN_crc32 && op != 0)
8017         continue;
8018
8019       /* Any other register is bad.  */
8020       as_bad (_("`%s%s' not allowed with `%s%c'"),
8021               register_prefix, i.op[op].regs->reg_name,
8022               insn_name (&i.tm), i.suffix);
8023       return 0;
8024     }
8025   return 1;
8026 }
8027
8028 static int
8029 check_long_reg (void)
8030 {
8031   int op;
8032
8033   for (op = i.operands; --op >= 0;)
8034     /* Skip non-register operands. */
8035     if (i.types[op].bitfield.class != Reg)
8036       continue;
8037     /* Reject eight bit registers, except where the template requires
8038        them. (eg. movzb)  */
8039     else if (i.types[op].bitfield.byte
8040              && (i.tm.operand_types[op].bitfield.class == Reg
8041                  || i.tm.operand_types[op].bitfield.instance == Accum)
8042              && (i.tm.operand_types[op].bitfield.word
8043                  || i.tm.operand_types[op].bitfield.dword))
8044       {
8045         as_bad (_("`%s%s' not allowed with `%s%c'"),
8046                 register_prefix,
8047                 i.op[op].regs->reg_name,
8048                 insn_name (&i.tm),
8049                 i.suffix);
8050         return 0;
8051       }
8052     /* Error if the e prefix on a general reg is missing.  */
8053     else if (i.types[op].bitfield.word
8054              && (i.tm.operand_types[op].bitfield.class == Reg
8055                  || i.tm.operand_types[op].bitfield.instance == Accum)
8056              && i.tm.operand_types[op].bitfield.dword)
8057       {
8058         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8059                 register_prefix, i.op[op].regs->reg_name,
8060                 i.suffix);
8061         return 0;
8062       }
8063     /* Warn if the r prefix on a general reg is present.  */
8064     else if (i.types[op].bitfield.qword
8065              && (i.tm.operand_types[op].bitfield.class == Reg
8066                  || i.tm.operand_types[op].bitfield.instance == Accum)
8067              && i.tm.operand_types[op].bitfield.dword)
8068       {
8069         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8070                 register_prefix, i.op[op].regs->reg_name, i.suffix);
8071         return 0;
8072       }
8073   return 1;
8074 }
8075
8076 static int
8077 check_qword_reg (void)
8078 {
8079   int op;
8080
8081   for (op = i.operands; --op >= 0; )
8082     /* Skip non-register operands. */
8083     if (i.types[op].bitfield.class != Reg)
8084       continue;
8085     /* Reject eight bit registers, except where the template requires
8086        them. (eg. movzb)  */
8087     else if (i.types[op].bitfield.byte
8088              && (i.tm.operand_types[op].bitfield.class == Reg
8089                  || i.tm.operand_types[op].bitfield.instance == Accum)
8090              && (i.tm.operand_types[op].bitfield.word
8091                  || i.tm.operand_types[op].bitfield.dword))
8092       {
8093         as_bad (_("`%s%s' not allowed with `%s%c'"),
8094                 register_prefix,
8095                 i.op[op].regs->reg_name,
8096                 insn_name (&i.tm),
8097                 i.suffix);
8098         return 0;
8099       }
8100     /* Warn if the r prefix on a general reg is missing.  */
8101     else if ((i.types[op].bitfield.word
8102               || i.types[op].bitfield.dword)
8103              && (i.tm.operand_types[op].bitfield.class == Reg
8104                  || i.tm.operand_types[op].bitfield.instance == Accum)
8105              && i.tm.operand_types[op].bitfield.qword)
8106       {
8107         /* Prohibit these changes in the 64bit mode, since the
8108            lowering is more complicated.  */
8109         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8110                 register_prefix, i.op[op].regs->reg_name, i.suffix);
8111         return 0;
8112       }
8113   return 1;
8114 }
8115
8116 static int
8117 check_word_reg (void)
8118 {
8119   int op;
8120   for (op = i.operands; --op >= 0;)
8121     /* Skip non-register operands. */
8122     if (i.types[op].bitfield.class != Reg)
8123       continue;
8124     /* Reject eight bit registers, except where the template requires
8125        them. (eg. movzb)  */
8126     else if (i.types[op].bitfield.byte
8127              && (i.tm.operand_types[op].bitfield.class == Reg
8128                  || i.tm.operand_types[op].bitfield.instance == Accum)
8129              && (i.tm.operand_types[op].bitfield.word
8130                  || i.tm.operand_types[op].bitfield.dword))
8131       {
8132         as_bad (_("`%s%s' not allowed with `%s%c'"),
8133                 register_prefix,
8134                 i.op[op].regs->reg_name,
8135                 insn_name (&i.tm),
8136                 i.suffix);
8137         return 0;
8138       }
8139     /* Error if the e or r prefix on a general reg is present.  */
8140     else if ((i.types[op].bitfield.dword
8141                  || i.types[op].bitfield.qword)
8142              && (i.tm.operand_types[op].bitfield.class == Reg
8143                  || i.tm.operand_types[op].bitfield.instance == Accum)
8144              && i.tm.operand_types[op].bitfield.word)
8145       {
8146         as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
8147                 register_prefix, i.op[op].regs->reg_name,
8148                 i.suffix);
8149         return 0;
8150       }
8151   return 1;
8152 }
8153
8154 static int
8155 update_imm (unsigned int j)
8156 {
8157   i386_operand_type overlap = i.types[j];
8158
8159   if (i.tm.operand_types[j].bitfield.imm8
8160       && i.tm.operand_types[j].bitfield.imm8s
8161       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
8162     {
8163       /* This combination is used on 8-bit immediates where e.g. $~0 is
8164          desirable to permit.  We're past operand type matching, so simply
8165          put things back in the shape they were before introducing the
8166          distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
8167       overlap.bitfield.imm8s = 0;
8168     }
8169
8170   if (overlap.bitfield.imm8
8171       + overlap.bitfield.imm8s
8172       + overlap.bitfield.imm16
8173       + overlap.bitfield.imm32
8174       + overlap.bitfield.imm32s
8175       + overlap.bitfield.imm64 > 1)
8176     {
8177       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
8178       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
8179       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
8180       static const i386_operand_type imm16_32 = { .bitfield =
8181         { .imm16 = 1, .imm32 = 1 }
8182       };
8183       static const i386_operand_type imm16_32s =  { .bitfield =
8184         { .imm16 = 1, .imm32s = 1 }
8185       };
8186       static const i386_operand_type imm16_32_32s = { .bitfield =
8187         { .imm16 = 1, .imm32 = 1, .imm32s = 1 }
8188       };
8189
8190       if (i.suffix)
8191         {
8192           i386_operand_type temp;
8193
8194           operand_type_set (&temp, 0);
8195           if (i.suffix == BYTE_MNEM_SUFFIX)
8196             {
8197               temp.bitfield.imm8 = overlap.bitfield.imm8;
8198               temp.bitfield.imm8s = overlap.bitfield.imm8s;
8199             }
8200           else if (i.suffix == WORD_MNEM_SUFFIX)
8201             temp.bitfield.imm16 = overlap.bitfield.imm16;
8202           else if (i.suffix == QWORD_MNEM_SUFFIX)
8203             {
8204               temp.bitfield.imm64 = overlap.bitfield.imm64;
8205               temp.bitfield.imm32s = overlap.bitfield.imm32s;
8206             }
8207           else
8208             temp.bitfield.imm32 = overlap.bitfield.imm32;
8209           overlap = temp;
8210         }
8211       else if (operand_type_equal (&overlap, &imm16_32_32s)
8212                || operand_type_equal (&overlap, &imm16_32)
8213                || operand_type_equal (&overlap, &imm16_32s))
8214         {
8215           if ((flag_code == CODE_16BIT) ^ (i.prefix[DATA_PREFIX] != 0))
8216             overlap = imm16;
8217           else
8218             overlap = imm32s;
8219         }
8220       else if (i.prefix[REX_PREFIX] & REX_W)
8221         overlap = operand_type_and (overlap, imm32s);
8222       else if (i.prefix[DATA_PREFIX])
8223         overlap = operand_type_and (overlap,
8224                                     flag_code != CODE_16BIT ? imm16 : imm32);
8225       if (overlap.bitfield.imm8
8226           + overlap.bitfield.imm8s
8227           + overlap.bitfield.imm16
8228           + overlap.bitfield.imm32
8229           + overlap.bitfield.imm32s
8230           + overlap.bitfield.imm64 != 1)
8231         {
8232           as_bad (_("no instruction mnemonic suffix given; "
8233                     "can't determine immediate size"));
8234           return 0;
8235         }
8236     }
8237   i.types[j] = overlap;
8238
8239   return 1;
8240 }
8241
8242 static int
8243 finalize_imm (void)
8244 {
8245   unsigned int j, n;
8246
8247   /* Update the first 2 immediate operands.  */
8248   n = i.operands > 2 ? 2 : i.operands;
8249   if (n)
8250     {
8251       for (j = 0; j < n; j++)
8252         if (update_imm (j) == 0)
8253           return 0;
8254
8255       /* The 3rd operand can't be immediate operand.  */
8256       gas_assert (operand_type_check (i.types[2], imm) == 0);
8257     }
8258
8259   return 1;
8260 }
8261
8262 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
8263                                  bool do_sse2avx)
8264 {
8265   if (r->reg_flags & RegRex)
8266     {
8267       if (i.rex & rex_bit)
8268         as_bad (_("same type of prefix used twice"));
8269       i.rex |= rex_bit;
8270     }
8271   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
8272     {
8273       gas_assert (i.vex.register_specifier == r);
8274       i.vex.register_specifier += 8;
8275     }
8276
8277   if (r->reg_flags & RegVRex)
8278     i.vrex |= rex_bit;
8279 }
8280
8281 static int
8282 process_operands (void)
8283 {
8284   /* Default segment register this instruction will use for memory
8285      accesses.  0 means unknown.  This is only for optimizing out
8286      unnecessary segment overrides.  */
8287   const reg_entry *default_seg = NULL;
8288
8289   /* We only need to check those implicit registers for instructions
8290      with 3 operands or less.  */
8291   if (i.operands <= 3)
8292     for (unsigned int j = 0; j < i.operands; j++)
8293       if (i.types[j].bitfield.instance != InstanceNone)
8294         i.reg_operands--;
8295
8296   if (i.tm.opcode_modifier.sse2avx)
8297     {
8298       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
8299          need converting.  */
8300       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
8301       i.prefix[REX_PREFIX] = 0;
8302       i.rex_encoding = 0;
8303     }
8304   /* ImmExt should be processed after SSE2AVX.  */
8305   else if (i.tm.opcode_modifier.immext)
8306     process_immext ();
8307
8308   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
8309      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
8310      new destination operand here, while converting the source one to register
8311      number 0.  */
8312   if (i.tm.mnem_off == MN_tilezero)
8313     {
8314       i.op[1].regs = i.op[0].regs;
8315       i.op[0].regs -= i.op[0].regs->reg_num;
8316       i.types[1] = i.types[0];
8317       i.tm.operand_types[1] = i.tm.operand_types[0];
8318       i.flags[1] = i.flags[0];
8319       i.operands++;
8320       i.reg_operands++;
8321       i.tm.operands++;
8322     }
8323
8324   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
8325     {
8326       static const i386_operand_type regxmm = {
8327         .bitfield = { .class = RegSIMD, .xmmword = 1 }
8328       };
8329       unsigned int dupl = i.operands;
8330       unsigned int dest = dupl - 1;
8331       unsigned int j;
8332
8333       /* The destination must be an xmm register.  */
8334       gas_assert (i.reg_operands
8335                   && MAX_OPERANDS > dupl
8336                   && operand_type_equal (&i.types[dest], &regxmm));
8337
8338       if (i.tm.operand_types[0].bitfield.instance == Accum
8339           && i.tm.operand_types[0].bitfield.xmmword)
8340         {
8341           /* Keep xmm0 for instructions with VEX prefix and 3
8342              sources.  */
8343           i.tm.operand_types[0].bitfield.instance = InstanceNone;
8344           i.tm.operand_types[0].bitfield.class = RegSIMD;
8345           i.reg_operands++;
8346           goto duplicate;
8347         }
8348
8349       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
8350         {
8351           gas_assert ((MAX_OPERANDS - 1) > dupl);
8352
8353           /* Add the implicit xmm0 for instructions with VEX prefix
8354              and 3 sources.  */
8355           for (j = i.operands; j > 0; j--)
8356             {
8357               i.op[j] = i.op[j - 1];
8358               i.types[j] = i.types[j - 1];
8359               i.tm.operand_types[j] = i.tm.operand_types[j - 1];
8360               i.flags[j] = i.flags[j - 1];
8361             }
8362           i.op[0].regs
8363             = (const reg_entry *) str_hash_find (reg_hash, "xmm0");
8364           i.types[0] = regxmm;
8365           i.tm.operand_types[0] = regxmm;
8366
8367           i.operands += 2;
8368           i.reg_operands += 2;
8369           i.tm.operands += 2;
8370
8371           dupl++;
8372           dest++;
8373           i.op[dupl] = i.op[dest];
8374           i.types[dupl] = i.types[dest];
8375           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8376           i.flags[dupl] = i.flags[dest];
8377         }
8378       else
8379         {
8380         duplicate:
8381           i.operands++;
8382           i.reg_operands++;
8383           i.tm.operands++;
8384
8385           i.op[dupl] = i.op[dest];
8386           i.types[dupl] = i.types[dest];
8387           i.tm.operand_types[dupl] = i.tm.operand_types[dest];
8388           i.flags[dupl] = i.flags[dest];
8389         }
8390
8391        if (i.tm.opcode_modifier.immext)
8392          process_immext ();
8393     }
8394   else if (i.tm.operand_types[0].bitfield.instance == Accum
8395            && i.tm.opcode_modifier.modrm)
8396     {
8397       unsigned int j;
8398
8399       for (j = 1; j < i.operands; j++)
8400         {
8401           i.op[j - 1] = i.op[j];
8402           i.types[j - 1] = i.types[j];
8403
8404           /* We need to adjust fields in i.tm since they are used by
8405              build_modrm_byte.  */
8406           i.tm.operand_types [j - 1] = i.tm.operand_types [j];
8407
8408           i.flags[j - 1] = i.flags[j];
8409         }
8410
8411       /* No adjustment to i.reg_operands: This was already done at the top
8412          of the function.  */
8413       i.operands--;
8414       i.tm.operands--;
8415     }
8416   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_QUAD_GROUP)
8417     {
8418       unsigned int regnum, first_reg_in_group, last_reg_in_group;
8419
8420       /* The second operand must be {x,y,z}mmN, where N is a multiple of 4. */
8421       gas_assert (i.operands >= 2 && i.types[1].bitfield.class == RegSIMD);
8422       regnum = register_number (i.op[1].regs);
8423       first_reg_in_group = regnum & ~3;
8424       last_reg_in_group = first_reg_in_group + 3;
8425       if (regnum != first_reg_in_group)
8426         as_warn (_("source register `%s%s' implicitly denotes"
8427                    " `%s%.3s%u' to `%s%.3s%u' source group in `%s'"),
8428                  register_prefix, i.op[1].regs->reg_name,
8429                  register_prefix, i.op[1].regs->reg_name, first_reg_in_group,
8430                  register_prefix, i.op[1].regs->reg_name, last_reg_in_group,
8431                  insn_name (&i.tm));
8432     }
8433   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
8434     {
8435       /* The imul $imm, %reg instruction is converted into
8436          imul $imm, %reg, %reg, and the clr %reg instruction
8437          is converted into xor %reg, %reg.  */
8438
8439       unsigned int first_reg_op;
8440
8441       if (operand_type_check (i.types[0], reg))
8442         first_reg_op = 0;
8443       else
8444         first_reg_op = 1;
8445       /* Pretend we saw the extra register operand.  */
8446       gas_assert (i.reg_operands == 1
8447                   && i.op[first_reg_op + 1].regs == 0);
8448       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
8449       i.types[first_reg_op + 1] = i.types[first_reg_op];
8450       i.operands++;
8451       i.reg_operands++;
8452     }
8453
8454   if (i.tm.opcode_modifier.modrm)
8455     {
8456       /* The opcode is completed (modulo i.tm.extension_opcode which
8457          must be put into the modrm byte).  Now, we make the modrm and
8458          index base bytes based on all the info we've collected.  */
8459
8460       default_seg = build_modrm_byte ();
8461
8462       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
8463         {
8464           /* Warn about some common errors, but press on regardless.  */
8465           if (i.operands == 2)
8466             {
8467               /* Reversed arguments on faddp or fmulp.  */
8468               as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
8469                        register_prefix, i.op[!intel_syntax].regs->reg_name,
8470                        register_prefix, i.op[intel_syntax].regs->reg_name);
8471             }
8472           else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
8473             {
8474               /* Extraneous `l' suffix on fp insn.  */
8475               as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
8476                        register_prefix, i.op[0].regs->reg_name);
8477             }
8478         }
8479     }
8480   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
8481     {
8482       if (flag_code != CODE_64BIT
8483           ? i.tm.base_opcode == POP_SEG_SHORT
8484             && i.op[0].regs->reg_num == 1
8485           : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
8486             && i.op[0].regs->reg_num < 4)
8487         {
8488           as_bad (_("you can't `%s %s%s'"),
8489                   insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
8490           return 0;
8491         }
8492       if (i.op[0].regs->reg_num > 3
8493           && i.tm.opcode_space == SPACE_BASE )
8494         {
8495           i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
8496           i.tm.opcode_space = SPACE_0F;
8497         }
8498       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
8499     }
8500   else if (i.tm.opcode_space == SPACE_BASE
8501            && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
8502     {
8503       default_seg = reg_ds;
8504     }
8505   else if (i.tm.opcode_modifier.isstring)
8506     {
8507       /* For the string instructions that allow a segment override
8508          on one of their operands, the default segment is ds.  */
8509       default_seg = reg_ds;
8510     }
8511   else if (i.short_form)
8512     {
8513       /* The register operand is in the 1st or 2nd non-immediate operand.  */
8514       const reg_entry *r = i.op[i.imm_operands].regs;
8515
8516       if (!dot_insn ()
8517           && r->reg_type.bitfield.instance == Accum
8518           && i.op[i.imm_operands + 1].regs)
8519         r = i.op[i.imm_operands + 1].regs;
8520       /* Register goes in low 3 bits of opcode.  */
8521       i.tm.base_opcode |= r->reg_num;
8522       set_rex_vrex (r, REX_B, false);
8523
8524       if (dot_insn () && i.reg_operands == 2)
8525         {
8526           gas_assert (is_any_vex_encoding (&i.tm)
8527                       || i.vec_encoding != vex_encoding_default);
8528           i.vex.register_specifier = i.op[i.operands - 1].regs;
8529         }
8530     }
8531   else if (i.reg_operands == 1
8532            && !i.flags[i.operands - 1]
8533            && i.tm.operand_types[i.operands - 1].bitfield.instance
8534               == InstanceNone)
8535     {
8536       gas_assert (is_any_vex_encoding (&i.tm)
8537                   || i.vec_encoding != vex_encoding_default);
8538       i.vex.register_specifier = i.op[i.operands - 1].regs;
8539     }
8540
8541   if ((i.seg[0] || i.prefix[SEG_PREFIX])
8542       && i.tm.mnem_off == MN_lea)
8543     {
8544       if (!quiet_warnings)
8545         as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
8546       if (optimize && !i.no_optimize)
8547         {
8548           i.seg[0] = NULL;
8549           i.prefix[SEG_PREFIX] = 0;
8550         }
8551     }
8552
8553   /* If a segment was explicitly specified, and the specified segment
8554      is neither the default nor the one already recorded from a prefix,
8555      use an opcode prefix to select it.  If we never figured out what
8556      the default segment is, then default_seg will be zero at this
8557      point, and the specified segment prefix will always be used.  */
8558   if (i.seg[0]
8559       && i.seg[0] != default_seg
8560       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
8561     {
8562       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
8563         return 0;
8564     }
8565   return 1;
8566 }
8567
8568 static const reg_entry *
8569 build_modrm_byte (void)
8570 {
8571   const reg_entry *default_seg = NULL;
8572   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
8573                         /* Compensate for kludge in md_assemble().  */
8574                         + i.tm.operand_types[0].bitfield.imm1;
8575   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
8576   unsigned int v, op, reg_slot = ~0;
8577
8578   /* Accumulator (in particular %st), shift count (%cl), and alike need
8579      to be skipped just like immediate operands do.  */
8580   if (i.tm.operand_types[source].bitfield.instance)
8581     ++source;
8582   while (i.tm.operand_types[dest].bitfield.instance)
8583     --dest;
8584
8585   for (op = source; op < i.operands; ++op)
8586     if (i.tm.operand_types[op].bitfield.baseindex)
8587       break;
8588
8589   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None) == 4)
8590     {
8591       expressionS *exp;
8592
8593       /* There are 2 kinds of instructions:
8594          1. 5 operands: 4 register operands or 3 register operands
8595          plus 1 memory operand plus one Imm4 operand, VexXDS, and
8596          VexW0 or VexW1.  The destination must be either XMM, YMM or
8597          ZMM register.
8598          2. 4 operands: 4 register operands or 3 register operands
8599          plus 1 memory operand, with VexXDS.
8600          3. Other equivalent combinations when coming from s_insn().  */
8601       gas_assert (i.tm.opcode_modifier.vexvvvv
8602                   && i.tm.opcode_modifier.vexw);
8603       gas_assert (dot_insn ()
8604                   || i.tm.operand_types[dest].bitfield.class == RegSIMD);
8605
8606       /* Of the first two non-immediate operands the one with the template
8607          not allowing for a memory one is encoded in the immediate operand.  */
8608       if (source == op)
8609         reg_slot = source + 1;
8610       else
8611         reg_slot = source++;
8612
8613       if (!dot_insn ())
8614         {
8615           gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
8616           gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
8617         }
8618       else
8619         gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
8620
8621       if (i.imm_operands == 0)
8622         {
8623           /* When there is no immediate operand, generate an 8bit
8624              immediate operand to encode the first operand.  */
8625           exp = &im_expressions[i.imm_operands++];
8626           i.op[i.operands].imms = exp;
8627           i.types[i.operands].bitfield.imm8 = 1;
8628           i.operands++;
8629
8630           exp->X_op = O_constant;
8631         }
8632       else
8633         {
8634           gas_assert (i.imm_operands == 1);
8635           gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
8636           gas_assert (!i.tm.opcode_modifier.immext);
8637
8638           /* Turn on Imm8 again so that output_imm will generate it.  */
8639           i.types[0].bitfield.imm8 = 1;
8640
8641           exp = i.op[0].imms;
8642         }
8643       exp->X_add_number |= register_number (i.op[reg_slot].regs)
8644                            << (3 + !(is_evex_encoding (&i.tm)
8645                                      || i.vec_encoding == vex_encoding_evex));
8646     }
8647
8648   for (v = source + 1; v < dest; ++v)
8649     if (v != reg_slot)
8650       break;
8651   if (v >= dest)
8652     v = ~0;
8653   if (i.tm.extension_opcode != None)
8654     {
8655       if (dest != source)
8656         v = dest;
8657       dest = ~0;
8658     }
8659   gas_assert (source < dest);
8660   if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
8661       && source != op)
8662     {
8663       unsigned int tmp = source;
8664
8665       source = v;
8666       v = tmp;
8667     }
8668
8669   if (v < MAX_OPERANDS)
8670     {
8671       gas_assert (i.tm.opcode_modifier.vexvvvv);
8672       i.vex.register_specifier = i.op[v].regs;
8673     }
8674
8675   if (op < i.operands)
8676     {
8677       if (i.mem_operands)
8678         {
8679           unsigned int fake_zero_displacement = 0;
8680
8681           gas_assert (i.flags[op] & Operand_Mem);
8682
8683           if (i.tm.opcode_modifier.sib)
8684             {
8685               /* The index register of VSIB shouldn't be RegIZ.  */
8686               if (i.tm.opcode_modifier.sib != SIBMEM
8687                   && i.index_reg->reg_num == RegIZ)
8688                 abort ();
8689
8690               i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8691               if (!i.base_reg)
8692                 {
8693                   i.sib.base = NO_BASE_REGISTER;
8694                   i.sib.scale = i.log2_scale_factor;
8695                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8696                   i.types[op].bitfield.disp32 = 1;
8697                 }
8698
8699               /* Since the mandatory SIB always has index register, so
8700                  the code logic remains unchanged. The non-mandatory SIB
8701                  without index register is allowed and will be handled
8702                  later.  */
8703               if (i.index_reg)
8704                 {
8705                   if (i.index_reg->reg_num == RegIZ)
8706                     i.sib.index = NO_INDEX_REGISTER;
8707                   else
8708                     i.sib.index = i.index_reg->reg_num;
8709                   set_rex_vrex (i.index_reg, REX_X, false);
8710                 }
8711             }
8712
8713           default_seg = reg_ds;
8714
8715           if (i.base_reg == 0)
8716             {
8717               i.rm.mode = 0;
8718               if (!i.disp_operands)
8719                 fake_zero_displacement = 1;
8720               if (i.index_reg == 0)
8721                 {
8722                   /* Both check for VSIB and mandatory non-vector SIB. */
8723                   gas_assert (!i.tm.opcode_modifier.sib
8724                               || i.tm.opcode_modifier.sib == SIBMEM);
8725                   /* Operand is just <disp>  */
8726                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8727                   if (flag_code == CODE_64BIT)
8728                     {
8729                       /* 64bit mode overwrites the 32bit absolute
8730                          addressing by RIP relative addressing and
8731                          absolute addressing is encoded by one of the
8732                          redundant SIB forms.  */
8733                       i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8734                       i.sib.base = NO_BASE_REGISTER;
8735                       i.sib.index = NO_INDEX_REGISTER;
8736                       i.types[op].bitfield.disp32 = 1;
8737                     }
8738                   else if ((flag_code == CODE_16BIT)
8739                            ^ (i.prefix[ADDR_PREFIX] != 0))
8740                     {
8741                       i.rm.regmem = NO_BASE_REGISTER_16;
8742                       i.types[op].bitfield.disp16 = 1;
8743                     }
8744                   else
8745                     {
8746                       i.rm.regmem = NO_BASE_REGISTER;
8747                       i.types[op].bitfield.disp32 = 1;
8748                     }
8749                 }
8750               else if (!i.tm.opcode_modifier.sib)
8751                 {
8752                   /* !i.base_reg && i.index_reg  */
8753                   if (i.index_reg->reg_num == RegIZ)
8754                     i.sib.index = NO_INDEX_REGISTER;
8755                   else
8756                     i.sib.index = i.index_reg->reg_num;
8757                   i.sib.base = NO_BASE_REGISTER;
8758                   i.sib.scale = i.log2_scale_factor;
8759                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8760                   i.types[op] = operand_type_and_not (i.types[op], anydisp);
8761                   i.types[op].bitfield.disp32 = 1;
8762                   if ((i.index_reg->reg_flags & RegRex) != 0)
8763                     i.rex |= REX_X;
8764                 }
8765             }
8766           /* RIP addressing for 64bit mode.  */
8767           else if (i.base_reg->reg_num == RegIP)
8768             {
8769               gas_assert (!i.tm.opcode_modifier.sib);
8770               i.rm.regmem = NO_BASE_REGISTER;
8771               i.types[op].bitfield.disp8 = 0;
8772               i.types[op].bitfield.disp16 = 0;
8773               i.types[op].bitfield.disp32 = 1;
8774               i.types[op].bitfield.disp64 = 0;
8775               i.flags[op] |= Operand_PCrel;
8776               if (! i.disp_operands)
8777                 fake_zero_displacement = 1;
8778             }
8779           else if (i.base_reg->reg_type.bitfield.word)
8780             {
8781               gas_assert (!i.tm.opcode_modifier.sib);
8782               switch (i.base_reg->reg_num)
8783                 {
8784                 case 3: /* (%bx)  */
8785                   if (i.index_reg == 0)
8786                     i.rm.regmem = 7;
8787                   else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
8788                     i.rm.regmem = i.index_reg->reg_num - 6;
8789                   break;
8790                 case 5: /* (%bp)  */
8791                   default_seg = reg_ss;
8792                   if (i.index_reg == 0)
8793                     {
8794                       i.rm.regmem = 6;
8795                       if (operand_type_check (i.types[op], disp) == 0)
8796                         {
8797                           /* fake (%bp) into 0(%bp)  */
8798                           if (i.disp_encoding == disp_encoding_16bit)
8799                             i.types[op].bitfield.disp16 = 1;
8800                           else
8801                             i.types[op].bitfield.disp8 = 1;
8802                           fake_zero_displacement = 1;
8803                         }
8804                     }
8805                   else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
8806                     i.rm.regmem = i.index_reg->reg_num - 6 + 2;
8807                   break;
8808                 default: /* (%si) -> 4 or (%di) -> 5  */
8809                   i.rm.regmem = i.base_reg->reg_num - 6 + 4;
8810                 }
8811               if (!fake_zero_displacement
8812                   && !i.disp_operands
8813                   && i.disp_encoding)
8814                 {
8815                   fake_zero_displacement = 1;
8816                   if (i.disp_encoding == disp_encoding_8bit)
8817                     i.types[op].bitfield.disp8 = 1;
8818                   else
8819                     i.types[op].bitfield.disp16 = 1;
8820                 }
8821               i.rm.mode = mode_from_disp_size (i.types[op]);
8822             }
8823           else /* i.base_reg and 32/64 bit mode  */
8824             {
8825               if (operand_type_check (i.types[op], disp))
8826                 {
8827                   i.types[op].bitfield.disp16 = 0;
8828                   i.types[op].bitfield.disp64 = 0;
8829                   i.types[op].bitfield.disp32 = 1;
8830                 }
8831
8832               if (!i.tm.opcode_modifier.sib)
8833                 i.rm.regmem = i.base_reg->reg_num;
8834               if ((i.base_reg->reg_flags & RegRex) != 0)
8835                 i.rex |= REX_B;
8836               i.sib.base = i.base_reg->reg_num;
8837               /* x86-64 ignores REX prefix bit here to avoid decoder
8838                  complications.  */
8839               if (!(i.base_reg->reg_flags & RegRex)
8840                   && (i.base_reg->reg_num == EBP_REG_NUM
8841                    || i.base_reg->reg_num == ESP_REG_NUM))
8842                   default_seg = reg_ss;
8843               if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
8844                 {
8845                   fake_zero_displacement = 1;
8846                   if (i.disp_encoding == disp_encoding_32bit)
8847                     i.types[op].bitfield.disp32 = 1;
8848                   else
8849                     i.types[op].bitfield.disp8 = 1;
8850                 }
8851               i.sib.scale = i.log2_scale_factor;
8852               if (i.index_reg == 0)
8853                 {
8854                   /* Only check for VSIB. */
8855                   gas_assert (i.tm.opcode_modifier.sib != VECSIB128
8856                               && i.tm.opcode_modifier.sib != VECSIB256
8857                               && i.tm.opcode_modifier.sib != VECSIB512);
8858
8859                   /* <disp>(%esp) becomes two byte modrm with no index
8860                      register.  We've already stored the code for esp
8861                      in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
8862                      Any base register besides %esp will not use the
8863                      extra modrm byte.  */
8864                   i.sib.index = NO_INDEX_REGISTER;
8865                 }
8866               else if (!i.tm.opcode_modifier.sib)
8867                 {
8868                   if (i.index_reg->reg_num == RegIZ)
8869                     i.sib.index = NO_INDEX_REGISTER;
8870                   else
8871                     i.sib.index = i.index_reg->reg_num;
8872                   i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
8873                   if ((i.index_reg->reg_flags & RegRex) != 0)
8874                     i.rex |= REX_X;
8875                 }
8876
8877               if (i.disp_operands
8878                   && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
8879                       || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
8880                 i.rm.mode = 0;
8881               else
8882                 {
8883                   if (!fake_zero_displacement
8884                       && !i.disp_operands
8885                       && i.disp_encoding)
8886                     {
8887                       fake_zero_displacement = 1;
8888                       if (i.disp_encoding == disp_encoding_8bit)
8889                         i.types[op].bitfield.disp8 = 1;
8890                       else
8891                         i.types[op].bitfield.disp32 = 1;
8892                     }
8893                   i.rm.mode = mode_from_disp_size (i.types[op]);
8894                 }
8895             }
8896
8897           if (fake_zero_displacement)
8898             {
8899               /* Fakes a zero displacement assuming that i.types[op]
8900                  holds the correct displacement size.  */
8901               expressionS *exp;
8902
8903               gas_assert (i.op[op].disps == 0);
8904               exp = &disp_expressions[i.disp_operands++];
8905               i.op[op].disps = exp;
8906               exp->X_op = O_constant;
8907               exp->X_add_number = 0;
8908               exp->X_add_symbol = (symbolS *) 0;
8909               exp->X_op_symbol = (symbolS *) 0;
8910             }
8911         }
8912     else
8913         {
8914       i.rm.mode = 3;
8915       i.rm.regmem = i.op[op].regs->reg_num;
8916       set_rex_vrex (i.op[op].regs, REX_B, false);
8917         }
8918
8919       if (op == dest)
8920         dest = ~0;
8921       if (op == source)
8922         source = ~0;
8923     }
8924   else
8925     {
8926       i.rm.mode = 3;
8927       if (!i.tm.opcode_modifier.regmem)
8928         {
8929           gas_assert (source < MAX_OPERANDS);
8930           i.rm.regmem = i.op[source].regs->reg_num;
8931           set_rex_vrex (i.op[source].regs, REX_B,
8932                         dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
8933           source = ~0;
8934         }
8935       else
8936         {
8937           gas_assert (dest < MAX_OPERANDS);
8938           i.rm.regmem = i.op[dest].regs->reg_num;
8939           set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
8940           dest = ~0;
8941         }
8942     }
8943
8944   /* Fill in i.rm.reg field with extension opcode (if any) or the
8945      appropriate register.  */
8946   if (i.tm.extension_opcode != None)
8947     i.rm.reg = i.tm.extension_opcode;
8948   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
8949     {
8950       i.rm.reg = i.op[dest].regs->reg_num;
8951       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
8952     }
8953   else
8954     {
8955       gas_assert (source < MAX_OPERANDS);
8956       i.rm.reg = i.op[source].regs->reg_num;
8957       set_rex_vrex (i.op[source].regs, REX_R, false);
8958     }
8959
8960   if (flag_code != CODE_64BIT && (i.rex & REX_R))
8961     {
8962       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
8963       i.rex &= ~REX_R;
8964       add_prefix (LOCK_PREFIX_OPCODE);
8965     }
8966
8967   return default_seg;
8968 }
8969
8970 static INLINE void
8971 frag_opcode_byte (unsigned char byte)
8972 {
8973   if (now_seg != absolute_section)
8974     FRAG_APPEND_1_CHAR (byte);
8975   else
8976     ++abs_section_offset;
8977 }
8978
8979 static unsigned int
8980 flip_code16 (unsigned int code16)
8981 {
8982   gas_assert (i.tm.operands == 1);
8983
8984   return !(i.prefix[REX_PREFIX] & REX_W)
8985          && (code16 ? i.tm.operand_types[0].bitfield.disp32
8986                     : i.tm.operand_types[0].bitfield.disp16)
8987          ? CODE16 : 0;
8988 }
8989
8990 static void
8991 output_branch (void)
8992 {
8993   char *p;
8994   int size;
8995   int code16;
8996   int prefix;
8997   relax_substateT subtype;
8998   symbolS *sym;
8999   offsetT off;
9000
9001   if (now_seg == absolute_section)
9002     {
9003       as_bad (_("relaxable branches not supported in absolute section"));
9004       return;
9005     }
9006
9007   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
9008   size = i.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
9009
9010   prefix = 0;
9011   if (i.prefix[DATA_PREFIX] != 0)
9012     {
9013       prefix = 1;
9014       i.prefixes -= 1;
9015       code16 ^= flip_code16(code16);
9016     }
9017   /* Pentium4 branch hints.  */
9018   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9019       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9020     {
9021       prefix++;
9022       i.prefixes--;
9023     }
9024   if (i.prefix[REX_PREFIX] != 0)
9025     {
9026       prefix++;
9027       i.prefixes--;
9028     }
9029
9030   /* BND prefixed jump.  */
9031   if (i.prefix[BND_PREFIX] != 0)
9032     {
9033       prefix++;
9034       i.prefixes--;
9035     }
9036
9037   if (i.prefixes != 0)
9038     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9039
9040   /* It's always a symbol;  End frag & setup for relax.
9041      Make sure there is enough room in this frag for the largest
9042      instruction we may generate in md_convert_frag.  This is 2
9043      bytes for the opcode and room for the prefix and largest
9044      displacement.  */
9045   frag_grow (prefix + 2 + 4);
9046   /* Prefix and 1 opcode byte go in fr_fix.  */
9047   p = frag_more (prefix + 1);
9048   if (i.prefix[DATA_PREFIX] != 0)
9049     *p++ = DATA_PREFIX_OPCODE;
9050   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
9051       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
9052     *p++ = i.prefix[SEG_PREFIX];
9053   if (i.prefix[BND_PREFIX] != 0)
9054     *p++ = BND_PREFIX_OPCODE;
9055   if (i.prefix[REX_PREFIX] != 0)
9056     *p++ = i.prefix[REX_PREFIX];
9057   *p = i.tm.base_opcode;
9058
9059   if ((unsigned char) *p == JUMP_PC_RELATIVE)
9060     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
9061   else if (cpu_arch_flags.bitfield.cpui386)
9062     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
9063   else
9064     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
9065   subtype |= code16;
9066
9067   sym = i.op[0].disps->X_add_symbol;
9068   off = i.op[0].disps->X_add_number;
9069
9070   if (i.op[0].disps->X_op != O_constant
9071       && i.op[0].disps->X_op != O_symbol)
9072     {
9073       /* Handle complex expressions.  */
9074       sym = make_expr_symbol (i.op[0].disps);
9075       off = 0;
9076     }
9077
9078   frag_now->tc_frag_data.code64 = flag_code == CODE_64BIT;
9079
9080   /* 1 possible extra opcode + 4 byte displacement go in var part.
9081      Pass reloc in fr_var.  */
9082   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
9083 }
9084
9085 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9086 /* Return TRUE iff PLT32 relocation should be used for branching to
9087    symbol S.  */
9088
9089 static bool
9090 need_plt32_p (symbolS *s)
9091 {
9092   /* PLT32 relocation is ELF only.  */
9093   if (!IS_ELF)
9094     return false;
9095
9096 #ifdef TE_SOLARIS
9097   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
9098      krtld support it.  */
9099   return false;
9100 #endif
9101
9102   /* Since there is no need to prepare for PLT branch on x86-64, we
9103      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
9104      be used as a marker for 32-bit PC-relative branches.  */
9105   if (!object_64bit)
9106     return false;
9107
9108   if (s == NULL)
9109     return false;
9110
9111   /* Weak or undefined symbol need PLT32 relocation.  */
9112   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
9113     return true;
9114
9115   /* Non-global symbol doesn't need PLT32 relocation.  */
9116   if (! S_IS_EXTERNAL (s))
9117     return false;
9118
9119   /* Other global symbols need PLT32 relocation.  NB: Symbol with
9120      non-default visibilities are treated as normal global symbol
9121      so that PLT32 relocation can be used as a marker for 32-bit
9122      PC-relative branches.  It is useful for linker relaxation.  */
9123   return true;
9124 }
9125 #endif
9126
9127 static void
9128 output_jump (void)
9129 {
9130   char *p;
9131   int size;
9132   fixS *fixP;
9133   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
9134
9135   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
9136     {
9137       /* This is a loop or jecxz type instruction.  */
9138       size = 1;
9139       if (i.prefix[ADDR_PREFIX] != 0)
9140         {
9141           frag_opcode_byte (ADDR_PREFIX_OPCODE);
9142           i.prefixes -= 1;
9143         }
9144       /* Pentium4 branch hints.  */
9145       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
9146           || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
9147         {
9148           frag_opcode_byte (i.prefix[SEG_PREFIX]);
9149           i.prefixes--;
9150         }
9151     }
9152   else
9153     {
9154       int code16;
9155
9156       code16 = 0;
9157       if (flag_code == CODE_16BIT)
9158         code16 = CODE16;
9159
9160       if (i.prefix[DATA_PREFIX] != 0)
9161         {
9162           frag_opcode_byte (DATA_PREFIX_OPCODE);
9163           i.prefixes -= 1;
9164           code16 ^= flip_code16(code16);
9165         }
9166
9167       size = 4;
9168       if (code16)
9169         size = 2;
9170     }
9171
9172   /* BND prefixed jump.  */
9173   if (i.prefix[BND_PREFIX] != 0)
9174     {
9175       frag_opcode_byte (i.prefix[BND_PREFIX]);
9176       i.prefixes -= 1;
9177     }
9178
9179   if (i.prefix[REX_PREFIX] != 0)
9180     {
9181       frag_opcode_byte (i.prefix[REX_PREFIX]);
9182       i.prefixes -= 1;
9183     }
9184
9185   if (i.prefixes != 0)
9186     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9187
9188   if (now_seg == absolute_section)
9189     {
9190       abs_section_offset += i.opcode_length + size;
9191       return;
9192     }
9193
9194   p = frag_more (i.opcode_length + size);
9195   switch (i.opcode_length)
9196     {
9197     case 2:
9198       *p++ = i.tm.base_opcode >> 8;
9199       /* Fall through.  */
9200     case 1:
9201       *p++ = i.tm.base_opcode;
9202       break;
9203     default:
9204       abort ();
9205     }
9206
9207 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9208   if (flag_code == CODE_64BIT && size == 4
9209       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
9210       && need_plt32_p (i.op[0].disps->X_add_symbol))
9211     jump_reloc = BFD_RELOC_X86_64_PLT32;
9212 #endif
9213
9214   jump_reloc = reloc (size, 1, 1, jump_reloc);
9215
9216   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9217                       i.op[0].disps, 1, jump_reloc);
9218
9219   /* All jumps handled here are signed, but don't unconditionally use a
9220      signed limit check for 32 and 16 bit jumps as we want to allow wrap
9221      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
9222      respectively.  */
9223   switch (size)
9224     {
9225     case 1:
9226       fixP->fx_signed = 1;
9227       break;
9228
9229     case 2:
9230       if (i.tm.mnem_off == MN_xbegin)
9231         fixP->fx_signed = 1;
9232       break;
9233
9234     case 4:
9235       if (flag_code == CODE_64BIT)
9236         fixP->fx_signed = 1;
9237       break;
9238     }
9239 }
9240
9241 static void
9242 output_interseg_jump (void)
9243 {
9244   char *p;
9245   int size;
9246   int prefix;
9247   int code16;
9248
9249   code16 = 0;
9250   if (flag_code == CODE_16BIT)
9251     code16 = CODE16;
9252
9253   prefix = 0;
9254   if (i.prefix[DATA_PREFIX] != 0)
9255     {
9256       prefix = 1;
9257       i.prefixes -= 1;
9258       code16 ^= CODE16;
9259     }
9260
9261   gas_assert (!i.prefix[REX_PREFIX]);
9262
9263   size = 4;
9264   if (code16)
9265     size = 2;
9266
9267   if (i.prefixes != 0)
9268     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
9269
9270   if (now_seg == absolute_section)
9271     {
9272       abs_section_offset += prefix + 1 + 2 + size;
9273       return;
9274     }
9275
9276   /* 1 opcode; 2 segment; offset  */
9277   p = frag_more (prefix + 1 + 2 + size);
9278
9279   if (i.prefix[DATA_PREFIX] != 0)
9280     *p++ = DATA_PREFIX_OPCODE;
9281
9282   if (i.prefix[REX_PREFIX] != 0)
9283     *p++ = i.prefix[REX_PREFIX];
9284
9285   *p++ = i.tm.base_opcode;
9286   if (i.op[1].imms->X_op == O_constant)
9287     {
9288       offsetT n = i.op[1].imms->X_add_number;
9289
9290       if (size == 2
9291           && !fits_in_unsigned_word (n)
9292           && !fits_in_signed_word (n))
9293         {
9294           as_bad (_("16-bit jump out of range"));
9295           return;
9296         }
9297       md_number_to_chars (p, n, size);
9298     }
9299   else
9300     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
9301                  i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
9302
9303   p += size;
9304   if (i.op[0].imms->X_op == O_constant)
9305     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
9306   else
9307     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
9308                  i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
9309 }
9310
9311 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9312 void
9313 x86_cleanup (void)
9314 {
9315   char *p;
9316   asection *seg = now_seg;
9317   subsegT subseg = now_subseg;
9318   asection *sec;
9319   unsigned int alignment, align_size_1;
9320   unsigned int isa_1_descsz, feature_2_descsz, descsz;
9321   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
9322   unsigned int padding;
9323
9324   if (!IS_ELF || !x86_used_note)
9325     return;
9326
9327   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
9328
9329   /* The .note.gnu.property section layout:
9330
9331      Field      Length          Contents
9332      ----       ----            ----
9333      n_namsz    4               4
9334      n_descsz   4               The note descriptor size
9335      n_type     4               NT_GNU_PROPERTY_TYPE_0
9336      n_name     4               "GNU"
9337      n_desc     n_descsz        The program property array
9338      ....       ....            ....
9339    */
9340
9341   /* Create the .note.gnu.property section.  */
9342   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
9343   bfd_set_section_flags (sec,
9344                          (SEC_ALLOC
9345                           | SEC_LOAD
9346                           | SEC_DATA
9347                           | SEC_HAS_CONTENTS
9348                           | SEC_READONLY));
9349
9350   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
9351     {
9352       align_size_1 = 7;
9353       alignment = 3;
9354     }
9355   else
9356     {
9357       align_size_1 = 3;
9358       alignment = 2;
9359     }
9360
9361   bfd_set_section_alignment (sec, alignment);
9362   elf_section_type (sec) = SHT_NOTE;
9363
9364   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
9365                                   + 4-byte data  */
9366   isa_1_descsz_raw = 4 + 4 + 4;
9367   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
9368   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
9369
9370   feature_2_descsz_raw = isa_1_descsz;
9371   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
9372                                       + 4-byte data  */
9373   feature_2_descsz_raw += 4 + 4 + 4;
9374   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
9375   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
9376                       & ~align_size_1);
9377
9378   descsz = feature_2_descsz;
9379   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
9380   p = frag_more (4 + 4 + 4 + 4 + descsz);
9381
9382   /* Write n_namsz.  */
9383   md_number_to_chars (p, (valueT) 4, 4);
9384
9385   /* Write n_descsz.  */
9386   md_number_to_chars (p + 4, (valueT) descsz, 4);
9387
9388   /* Write n_type.  */
9389   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
9390
9391   /* Write n_name.  */
9392   memcpy (p + 4 * 3, "GNU", 4);
9393
9394   /* Write 4-byte type.  */
9395   md_number_to_chars (p + 4 * 4,
9396                       (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
9397
9398   /* Write 4-byte data size.  */
9399   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
9400
9401   /* Write 4-byte data.  */
9402   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
9403
9404   /* Zero out paddings.  */
9405   padding = isa_1_descsz - isa_1_descsz_raw;
9406   if (padding)
9407     memset (p + 4 * 7, 0, padding);
9408
9409   /* Write 4-byte type.  */
9410   md_number_to_chars (p + isa_1_descsz + 4 * 4,
9411                       (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
9412
9413   /* Write 4-byte data size.  */
9414   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
9415
9416   /* Write 4-byte data.  */
9417   md_number_to_chars (p + isa_1_descsz + 4 * 6,
9418                       (valueT) x86_feature_2_used, 4);
9419
9420   /* Zero out paddings.  */
9421   padding = feature_2_descsz - feature_2_descsz_raw;
9422   if (padding)
9423     memset (p + isa_1_descsz + 4 * 7, 0, padding);
9424
9425   /* We probably can't restore the current segment, for there likely
9426      isn't one yet...  */
9427   if (seg && subseg)
9428     subseg_set (seg, subseg);
9429 }
9430
9431 bool
9432 x86_support_sframe_p (void)
9433 {
9434   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
9435   return (x86_elf_abi == X86_64_ABI);
9436 }
9437
9438 bool
9439 x86_sframe_ra_tracking_p (void)
9440 {
9441   /* In AMD64, return address is always stored on the stack at a fixed offset
9442      from the CFA (provided via x86_sframe_cfa_ra_offset ()).
9443      Do not track explicitly via an SFrame Frame Row Entry.  */
9444   return false;
9445 }
9446
9447 offsetT
9448 x86_sframe_cfa_ra_offset (void)
9449 {
9450   gas_assert (x86_elf_abi == X86_64_ABI);
9451   return (offsetT) -8;
9452 }
9453
9454 unsigned char
9455 x86_sframe_get_abi_arch (void)
9456 {
9457   unsigned char sframe_abi_arch = 0;
9458
9459   if (x86_support_sframe_p ())
9460     {
9461       gas_assert (!target_big_endian);
9462       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
9463     }
9464
9465   return sframe_abi_arch;
9466 }
9467
9468 #endif
9469
9470 static unsigned int
9471 encoding_length (const fragS *start_frag, offsetT start_off,
9472                  const char *frag_now_ptr)
9473 {
9474   unsigned int len = 0;
9475
9476   if (start_frag != frag_now)
9477     {
9478       const fragS *fr = start_frag;
9479
9480       do {
9481         len += fr->fr_fix;
9482         fr = fr->fr_next;
9483       } while (fr && fr != frag_now);
9484     }
9485
9486   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
9487 }
9488
9489 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
9490    be macro-fused with conditional jumps.
9491    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
9492    or is one of the following format:
9493
9494     cmp m, imm
9495     add m, imm
9496     sub m, imm
9497    test m, imm
9498     and m, imm
9499     inc m
9500     dec m
9501
9502    it is unfusible.  */
9503
9504 static int
9505 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
9506 {
9507   /* No RIP address.  */
9508   if (i.base_reg && i.base_reg->reg_num == RegIP)
9509     return 0;
9510
9511   /* No opcodes outside of base encoding space.  */
9512   if (i.tm.opcode_space != SPACE_BASE)
9513     return 0;
9514
9515   /* add, sub without add/sub m, imm.  */
9516   if (i.tm.base_opcode <= 5
9517       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
9518       || ((i.tm.base_opcode | 3) == 0x83
9519           && (i.tm.extension_opcode == 0x5
9520               || i.tm.extension_opcode == 0x0)))
9521     {
9522       *mf_cmp_p = mf_cmp_alu_cmp;
9523       return !(i.mem_operands && i.imm_operands);
9524     }
9525
9526   /* and without and m, imm.  */
9527   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
9528       || ((i.tm.base_opcode | 3) == 0x83
9529           && i.tm.extension_opcode == 0x4))
9530     {
9531       *mf_cmp_p = mf_cmp_test_and;
9532       return !(i.mem_operands && i.imm_operands);
9533     }
9534
9535   /* test without test m imm.  */
9536   if ((i.tm.base_opcode | 1) == 0x85
9537       || (i.tm.base_opcode | 1) == 0xa9
9538       || ((i.tm.base_opcode | 1) == 0xf7
9539           && i.tm.extension_opcode == 0))
9540     {
9541       *mf_cmp_p = mf_cmp_test_and;
9542       return !(i.mem_operands && i.imm_operands);
9543     }
9544
9545   /* cmp without cmp m, imm.  */
9546   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
9547       || ((i.tm.base_opcode | 3) == 0x83
9548           && (i.tm.extension_opcode == 0x7)))
9549     {
9550       *mf_cmp_p = mf_cmp_alu_cmp;
9551       return !(i.mem_operands && i.imm_operands);
9552     }
9553
9554   /* inc, dec without inc/dec m.   */
9555   if ((is_cpu (&i.tm, CpuNo64)
9556        && (i.tm.base_opcode | 0xf) == 0x4f)
9557       || ((i.tm.base_opcode | 1) == 0xff
9558           && i.tm.extension_opcode <= 0x1))
9559     {
9560       *mf_cmp_p = mf_cmp_incdec;
9561       return !i.mem_operands;
9562     }
9563
9564   return 0;
9565 }
9566
9567 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
9568
9569 static int
9570 add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
9571 {
9572   /* NB: Don't work with COND_JUMP86 without i386.  */
9573   if (!align_branch_power
9574       || now_seg == absolute_section
9575       || !cpu_arch_flags.bitfield.cpui386
9576       || !(align_branch & align_branch_fused_bit))
9577     return 0;
9578
9579   if (maybe_fused_with_jcc_p (mf_cmp_p))
9580     {
9581       if (last_insn.kind == last_insn_other
9582           || last_insn.seg != now_seg)
9583         return 1;
9584       if (flag_debug)
9585         as_warn_where (last_insn.file, last_insn.line,
9586                        _("`%s` skips -malign-branch-boundary on `%s`"),
9587                        last_insn.name, insn_name (&i.tm));
9588     }
9589
9590   return 0;
9591 }
9592
9593 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
9594
9595 static int
9596 add_branch_prefix_frag_p (void)
9597 {
9598   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
9599      to PadLock instructions since they include prefixes in opcode.  */
9600   if (!align_branch_power
9601       || !align_branch_prefix_size
9602       || now_seg == absolute_section
9603       || is_cpu (&i.tm, CpuPadLock)
9604       || !cpu_arch_flags.bitfield.cpui386)
9605     return 0;
9606
9607   /* Don't add prefix if it is a prefix or there is no operand in case
9608      that segment prefix is special.  */
9609   if (!i.operands || i.tm.opcode_modifier.isprefix)
9610     return 0;
9611
9612   if (last_insn.kind == last_insn_other
9613       || last_insn.seg != now_seg)
9614     return 1;
9615
9616   if (flag_debug)
9617     as_warn_where (last_insn.file, last_insn.line,
9618                    _("`%s` skips -malign-branch-boundary on `%s`"),
9619                    last_insn.name, insn_name (&i.tm));
9620
9621   return 0;
9622 }
9623
9624 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
9625
9626 static int
9627 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
9628                            enum mf_jcc_kind *mf_jcc_p)
9629 {
9630   int add_padding;
9631
9632   /* NB: Don't work with COND_JUMP86 without i386.  */
9633   if (!align_branch_power
9634       || now_seg == absolute_section
9635       || !cpu_arch_flags.bitfield.cpui386
9636       || i.tm.opcode_space != SPACE_BASE)
9637     return 0;
9638
9639   add_padding = 0;
9640
9641   /* Check for jcc and direct jmp.  */
9642   if (i.tm.opcode_modifier.jump == JUMP)
9643     {
9644       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
9645         {
9646           *branch_p = align_branch_jmp;
9647           add_padding = align_branch & align_branch_jmp_bit;
9648         }
9649       else
9650         {
9651           /* Because J<cc> and JN<cc> share same group in macro-fusible table,
9652              igore the lowest bit.  */
9653           *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
9654           *branch_p = align_branch_jcc;
9655           if ((align_branch & align_branch_jcc_bit))
9656             add_padding = 1;
9657         }
9658     }
9659   else if ((i.tm.base_opcode | 1) == 0xc3)
9660     {
9661       /* Near ret.  */
9662       *branch_p = align_branch_ret;
9663       if ((align_branch & align_branch_ret_bit))
9664         add_padding = 1;
9665     }
9666   else
9667     {
9668       /* Check for indirect jmp, direct and indirect calls.  */
9669       if (i.tm.base_opcode == 0xe8)
9670         {
9671           /* Direct call.  */
9672           *branch_p = align_branch_call;
9673           if ((align_branch & align_branch_call_bit))
9674             add_padding = 1;
9675         }
9676       else if (i.tm.base_opcode == 0xff
9677                && (i.tm.extension_opcode == 2
9678                    || i.tm.extension_opcode == 4))
9679         {
9680           /* Indirect call and jmp.  */
9681           *branch_p = align_branch_indirect;
9682           if ((align_branch & align_branch_indirect_bit))
9683             add_padding = 1;
9684         }
9685
9686       if (add_padding
9687           && i.disp_operands
9688           && tls_get_addr
9689           && (i.op[0].disps->X_op == O_symbol
9690               || (i.op[0].disps->X_op == O_subtract
9691                   && i.op[0].disps->X_op_symbol == GOT_symbol)))
9692         {
9693           symbolS *s = i.op[0].disps->X_add_symbol;
9694           /* No padding to call to global or undefined tls_get_addr.  */
9695           if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
9696               && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
9697             return 0;
9698         }
9699     }
9700
9701   if (add_padding
9702       && last_insn.kind != last_insn_other
9703       && last_insn.seg == now_seg)
9704     {
9705       if (flag_debug)
9706         as_warn_where (last_insn.file, last_insn.line,
9707                        _("`%s` skips -malign-branch-boundary on `%s`"),
9708                        last_insn.name, insn_name (&i.tm));
9709       return 0;
9710     }
9711
9712   return add_padding;
9713 }
9714
9715 static void
9716 output_insn (void)
9717 {
9718   fragS *insn_start_frag;
9719   offsetT insn_start_off;
9720   fragS *fragP = NULL;
9721   enum align_branch_kind branch = align_branch_none;
9722   /* The initializer is arbitrary just to avoid uninitialized error.
9723      it's actually either assigned in add_branch_padding_frag_p
9724      or never be used.  */
9725   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
9726
9727 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
9728   if (IS_ELF && x86_used_note && now_seg != absolute_section)
9729     {
9730       if ((i.xstate & xstate_tmm) == xstate_tmm
9731           || is_cpu (&i.tm, CpuAMX_TILE))
9732         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
9733
9734       if (is_cpu (&i.tm, Cpu8087)
9735           || is_cpu (&i.tm, Cpu287)
9736           || is_cpu (&i.tm, Cpu387)
9737           || is_cpu (&i.tm, Cpu687)
9738           || is_cpu (&i.tm, CpuFISTTP))
9739         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
9740
9741       if ((i.xstate & xstate_mmx)
9742           || i.tm.mnem_off == MN_emms
9743           || i.tm.mnem_off == MN_femms)
9744         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
9745
9746       if (i.index_reg)
9747         {
9748           if (i.index_reg->reg_type.bitfield.zmmword)
9749             i.xstate |= xstate_zmm;
9750           else if (i.index_reg->reg_type.bitfield.ymmword)
9751             i.xstate |= xstate_ymm;
9752           else if (i.index_reg->reg_type.bitfield.xmmword)
9753             i.xstate |= xstate_xmm;
9754         }
9755
9756       /* vzeroall / vzeroupper */
9757       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
9758         i.xstate |= xstate_ymm;
9759
9760       if ((i.xstate & xstate_xmm)
9761           /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
9762           || (i.tm.base_opcode == 0xae
9763               && (is_cpu (&i.tm, CpuSSE)
9764                   || is_cpu (&i.tm, CpuAVX)))
9765           || is_cpu (&i.tm, CpuWideKL)
9766           || is_cpu (&i.tm, CpuKL))
9767         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
9768
9769       if ((i.xstate & xstate_ymm) == xstate_ymm)
9770         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
9771       if ((i.xstate & xstate_zmm) == xstate_zmm)
9772         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
9773       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
9774         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
9775       if (is_cpu (&i.tm, CpuFXSR))
9776         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
9777       if (is_cpu (&i.tm, CpuXsave))
9778         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
9779       if (is_cpu (&i.tm, CpuXsaveopt))
9780         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
9781       if (is_cpu (&i.tm, CpuXSAVEC))
9782         x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
9783
9784       if (x86_feature_2_used
9785           || is_cpu (&i.tm, CpuCMOV)
9786           || is_cpu (&i.tm, CpuSYSCALL)
9787           || i.tm.mnem_off == MN_cmpxchg8b)
9788         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
9789       if (is_cpu (&i.tm, CpuSSE3)
9790           || is_cpu (&i.tm, CpuSSSE3)
9791           || is_cpu (&i.tm, CpuSSE4_1)
9792           || is_cpu (&i.tm, CpuSSE4_2)
9793           || is_cpu (&i.tm, CpuCX16)
9794           || is_cpu (&i.tm, CpuPOPCNT)
9795           /* LAHF-SAHF insns in 64-bit mode.  */
9796           || (flag_code == CODE_64BIT
9797               && (i.tm.base_opcode | 1) == 0x9f
9798               && i.tm.opcode_space == SPACE_BASE))
9799         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
9800       if (is_cpu (&i.tm, CpuAVX)
9801           || is_cpu (&i.tm, CpuAVX2)
9802           /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
9803              XOP, FMA4, LPW, TBM, and AMX.  */
9804           || (i.tm.opcode_modifier.vex
9805               && !is_cpu (&i.tm, CpuAVX512F)
9806               && !is_cpu (&i.tm, CpuAVX512BW)
9807               && !is_cpu (&i.tm, CpuAVX512DQ)
9808               && !is_cpu (&i.tm, CpuXOP)
9809               && !is_cpu (&i.tm, CpuFMA4)
9810               && !is_cpu (&i.tm, CpuLWP)
9811               && !is_cpu (&i.tm, CpuTBM)
9812               && !(x86_feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
9813           || is_cpu (&i.tm, CpuF16C)
9814           || is_cpu (&i.tm, CpuFMA)
9815           || is_cpu (&i.tm, CpuLZCNT)
9816           || is_cpu (&i.tm, CpuMovbe)
9817           || is_cpu (&i.tm, CpuXSAVES)
9818           || (x86_feature_2_used
9819               & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
9820                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
9821                  | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
9822         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
9823       if (is_cpu (&i.tm, CpuAVX512F)
9824           || is_cpu (&i.tm, CpuAVX512BW)
9825           || is_cpu (&i.tm, CpuAVX512DQ)
9826           || is_cpu (&i.tm, CpuAVX512VL)
9827           /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
9828              AVX512-4FMAPS, and AVX512-4VNNIW.  */
9829           || (i.tm.opcode_modifier.evex
9830               && !is_cpu (&i.tm, CpuAVX512ER)
9831               && !is_cpu (&i.tm, CpuAVX512PF)
9832               && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
9833               && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
9834         x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
9835     }
9836 #endif
9837
9838   /* Tie dwarf2 debug info to the address at the start of the insn.
9839      We can't do this after the insn has been output as the current
9840      frag may have been closed off.  eg. by frag_var.  */
9841   dwarf2_emit_insn (0);
9842
9843   insn_start_frag = frag_now;
9844   insn_start_off = frag_now_fix ();
9845
9846   if (add_branch_padding_frag_p (&branch, &mf_jcc))
9847     {
9848       char *p;
9849       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
9850       unsigned int max_branch_padding_size = 14;
9851
9852       /* Align section to boundary.  */
9853       record_alignment (now_seg, align_branch_power);
9854
9855       /* Make room for padding.  */
9856       frag_grow (max_branch_padding_size);
9857
9858       /* Start of the padding.  */
9859       p = frag_more (0);
9860
9861       fragP = frag_now;
9862
9863       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
9864                 ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
9865                 NULL, 0, p);
9866
9867       fragP->tc_frag_data.mf_type = mf_jcc;
9868       fragP->tc_frag_data.branch_type = branch;
9869       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
9870     }
9871
9872   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
9873       && !pre_386_16bit_warned)
9874     {
9875       as_warn (_("use .code16 to ensure correct addressing mode"));
9876       pre_386_16bit_warned = true;
9877     }
9878
9879   /* Output jumps.  */
9880   if (i.tm.opcode_modifier.jump == JUMP)
9881     output_branch ();
9882   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
9883            || i.tm.opcode_modifier.jump == JUMP_DWORD)
9884     output_jump ();
9885   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
9886     output_interseg_jump ();
9887   else
9888     {
9889       /* Output normal instructions here.  */
9890       char *p;
9891       unsigned char *q;
9892       unsigned int j;
9893       enum mf_cmp_kind mf_cmp;
9894
9895       if (avoid_fence
9896           && (i.tm.base_opcode == 0xaee8
9897               || i.tm.base_opcode == 0xaef0
9898               || i.tm.base_opcode == 0xaef8))
9899         {
9900           /* Encode lfence, mfence, and sfence as
9901              f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
9902           if (flag_code == CODE_16BIT)
9903             as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
9904           else if (omit_lock_prefix)
9905             as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
9906                     insn_name (&i.tm));
9907           else if (now_seg != absolute_section)
9908             {
9909               offsetT val = 0x240483f0ULL;
9910
9911               p = frag_more (5);
9912               md_number_to_chars (p, val, 5);
9913             }
9914           else
9915             abs_section_offset += 5;
9916           return;
9917         }
9918
9919       /* Some processors fail on LOCK prefix. This options makes
9920          assembler ignore LOCK prefix and serves as a workaround.  */
9921       if (omit_lock_prefix)
9922         {
9923           if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
9924               && i.tm.opcode_modifier.isprefix)
9925             return;
9926           i.prefix[LOCK_PREFIX] = 0;
9927         }
9928
9929       if (branch)
9930         /* Skip if this is a branch.  */
9931         ;
9932       else if (add_fused_jcc_padding_frag_p (&mf_cmp))
9933         {
9934           /* Make room for padding.  */
9935           frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
9936           p = frag_more (0);
9937
9938           fragP = frag_now;
9939
9940           frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
9941                     ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
9942                     NULL, 0, p);
9943
9944           fragP->tc_frag_data.mf_type = mf_cmp;
9945           fragP->tc_frag_data.branch_type = align_branch_fused;
9946           fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
9947         }
9948       else if (add_branch_prefix_frag_p ())
9949         {
9950           unsigned int max_prefix_size = align_branch_prefix_size;
9951
9952           /* Make room for padding.  */
9953           frag_grow (max_prefix_size);
9954           p = frag_more (0);
9955
9956           fragP = frag_now;
9957
9958           frag_var (rs_machine_dependent, max_prefix_size, 0,
9959                     ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
9960                     NULL, 0, p);
9961
9962           fragP->tc_frag_data.max_bytes = max_prefix_size;
9963         }
9964
9965       /* Since the VEX/EVEX prefix contains the implicit prefix, we
9966          don't need the explicit prefix.  */
9967       if (!is_any_vex_encoding (&i.tm))
9968         {
9969           switch (i.tm.opcode_modifier.opcodeprefix)
9970             {
9971             case PREFIX_0X66:
9972               add_prefix (0x66);
9973               break;
9974             case PREFIX_0XF2:
9975               add_prefix (0xf2);
9976               break;
9977             case PREFIX_0XF3:
9978               if (!is_cpu (&i.tm, CpuPadLock)
9979                   || (i.prefix[REP_PREFIX] != 0xf3))
9980                 add_prefix (0xf3);
9981               break;
9982             case PREFIX_NONE:
9983               switch (i.opcode_length)
9984                 {
9985                 case 2:
9986                   break;
9987                 case 1:
9988                   /* Check for pseudo prefixes.  */
9989                   if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
9990                     break;
9991                   as_bad_where (insn_start_frag->fr_file,
9992                                 insn_start_frag->fr_line,
9993                                 _("pseudo prefix without instruction"));
9994                   return;
9995                 default:
9996                   abort ();
9997                 }
9998               break;
9999             default:
10000               abort ();
10001             }
10002
10003 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
10004           /* For x32, add a dummy REX_OPCODE prefix for mov/add with
10005              R_X86_64_GOTTPOFF relocation so that linker can safely
10006              perform IE->LE optimization.  A dummy REX_OPCODE prefix
10007              is also needed for lea with R_X86_64_GOTPC32_TLSDESC
10008              relocation for GDesc -> IE/LE optimization.  */
10009           if (x86_elf_abi == X86_64_X32_ABI
10010               && i.operands == 2
10011               && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
10012                   || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
10013               && i.prefix[REX_PREFIX] == 0)
10014             add_prefix (REX_OPCODE);
10015 #endif
10016
10017           /* The prefix bytes.  */
10018           for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
10019             if (*q)
10020               frag_opcode_byte (*q);
10021         }
10022       else
10023         {
10024           for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
10025             if (*q)
10026               switch (j)
10027                 {
10028                 case SEG_PREFIX:
10029                 case ADDR_PREFIX:
10030                   frag_opcode_byte (*q);
10031                   break;
10032                 default:
10033                   /* There should be no other prefixes for instructions
10034                      with VEX prefix.  */
10035                   abort ();
10036                 }
10037
10038           /* For EVEX instructions i.vrex should become 0 after
10039              build_evex_prefix.  For VEX instructions upper 16 registers
10040              aren't available, so VREX should be 0.  */
10041           if (i.vrex)
10042             abort ();
10043           /* Now the VEX prefix.  */
10044           if (now_seg != absolute_section)
10045             {
10046               p = frag_more (i.vex.length);
10047               for (j = 0; j < i.vex.length; j++)
10048                 p[j] = i.vex.bytes[j];
10049             }
10050           else
10051             abs_section_offset += i.vex.length;
10052         }
10053
10054       /* Now the opcode; be careful about word order here!  */
10055       j = i.opcode_length;
10056       if (!i.vex.length)
10057         switch (i.tm.opcode_space)
10058           {
10059           case SPACE_BASE:
10060             break;
10061           case SPACE_0F:
10062             ++j;
10063             break;
10064           case SPACE_0F38:
10065           case SPACE_0F3A:
10066             j += 2;
10067             break;
10068           default:
10069             abort ();
10070           }
10071
10072       if (now_seg == absolute_section)
10073         abs_section_offset += j;
10074       else if (j == 1)
10075         {
10076           FRAG_APPEND_1_CHAR (i.tm.base_opcode);
10077         }
10078       else
10079         {
10080           p = frag_more (j);
10081           if (!i.vex.length
10082               && i.tm.opcode_space != SPACE_BASE)
10083             {
10084               *p++ = 0x0f;
10085               if (i.tm.opcode_space != SPACE_0F)
10086                 *p++ = i.tm.opcode_space == SPACE_0F38
10087                        ? 0x38 : 0x3a;
10088             }
10089
10090           switch (i.opcode_length)
10091             {
10092             case 2:
10093               /* Put out high byte first: can't use md_number_to_chars!  */
10094               *p++ = (i.tm.base_opcode >> 8) & 0xff;
10095               /* Fall through.  */
10096             case 1:
10097               *p = i.tm.base_opcode & 0xff;
10098               break;
10099             default:
10100               abort ();
10101               break;
10102             }
10103
10104         }
10105
10106       /* Now the modrm byte and sib byte (if present).  */
10107       if (i.tm.opcode_modifier.modrm)
10108         {
10109           frag_opcode_byte ((i.rm.regmem << 0)
10110                              | (i.rm.reg << 3)
10111                              | (i.rm.mode << 6));
10112           /* If i.rm.regmem == ESP (4)
10113              && i.rm.mode != (Register mode)
10114              && not 16 bit
10115              ==> need second modrm byte.  */
10116           if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
10117               && i.rm.mode != 3
10118               && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
10119             frag_opcode_byte ((i.sib.base << 0)
10120                               | (i.sib.index << 3)
10121                               | (i.sib.scale << 6));
10122         }
10123
10124       if (i.disp_operands)
10125         output_disp (insn_start_frag, insn_start_off);
10126
10127       if (i.imm_operands)
10128         output_imm (insn_start_frag, insn_start_off);
10129
10130       /*
10131        * frag_now_fix () returning plain abs_section_offset when we're in the
10132        * absolute section, and abs_section_offset not getting updated as data
10133        * gets added to the frag breaks the logic below.
10134        */
10135       if (now_seg != absolute_section)
10136         {
10137           j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
10138           if (j > 15)
10139             as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
10140                      j);
10141           else if (fragP)
10142             {
10143               /* NB: Don't add prefix with GOTPC relocation since
10144                  output_disp() above depends on the fixed encoding
10145                  length.  Can't add prefix with TLS relocation since
10146                  it breaks TLS linker optimization.  */
10147               unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
10148               /* Prefix count on the current instruction.  */
10149               unsigned int count = i.vex.length;
10150               unsigned int k;
10151               for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
10152                 /* REX byte is encoded in VEX/EVEX prefix.  */
10153                 if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
10154                   count++;
10155
10156               /* Count prefixes for extended opcode maps.  */
10157               if (!i.vex.length)
10158                 switch (i.tm.opcode_space)
10159                   {
10160                   case SPACE_BASE:
10161                     break;
10162                   case SPACE_0F:
10163                     count++;
10164                     break;
10165                   case SPACE_0F38:
10166                   case SPACE_0F3A:
10167                     count += 2;
10168                     break;
10169                   default:
10170                     abort ();
10171                   }
10172
10173               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
10174                   == BRANCH_PREFIX)
10175                 {
10176                   /* Set the maximum prefix size in BRANCH_PREFIX
10177                      frag.  */
10178                   if (fragP->tc_frag_data.max_bytes > max)
10179                     fragP->tc_frag_data.max_bytes = max;
10180                   if (fragP->tc_frag_data.max_bytes > count)
10181                     fragP->tc_frag_data.max_bytes -= count;
10182                   else
10183                     fragP->tc_frag_data.max_bytes = 0;
10184                 }
10185               else
10186                 {
10187                   /* Remember the maximum prefix size in FUSED_JCC_PADDING
10188                      frag.  */
10189                   unsigned int max_prefix_size;
10190                   if (align_branch_prefix_size > max)
10191                     max_prefix_size = max;
10192                   else
10193                     max_prefix_size = align_branch_prefix_size;
10194                   if (max_prefix_size > count)
10195                     fragP->tc_frag_data.max_prefix_length
10196                       = max_prefix_size - count;
10197                 }
10198
10199               /* Use existing segment prefix if possible.  Use CS
10200                  segment prefix in 64-bit mode.  In 32-bit mode, use SS
10201                  segment prefix with ESP/EBP base register and use DS
10202                  segment prefix without ESP/EBP base register.  */
10203               if (i.prefix[SEG_PREFIX])
10204                 fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
10205               else if (flag_code == CODE_64BIT)
10206                 fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
10207               else if (i.base_reg
10208                        && (i.base_reg->reg_num == 4
10209                            || i.base_reg->reg_num == 5))
10210                 fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
10211               else
10212                 fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
10213             }
10214         }
10215     }
10216
10217   /* NB: Don't work with COND_JUMP86 without i386.  */
10218   if (align_branch_power
10219       && now_seg != absolute_section
10220       && cpu_arch_flags.bitfield.cpui386)
10221     {
10222       /* Terminate each frag so that we can add prefix and check for
10223          fused jcc.  */
10224       frag_wane (frag_now);
10225       frag_new (0);
10226     }
10227
10228 #ifdef DEBUG386
10229   if (flag_debug)
10230     {
10231       pi ("" /*line*/, &i);
10232     }
10233 #endif /* DEBUG386  */
10234 }
10235
10236 /* Return the size of the displacement operand N.  */
10237
10238 static int
10239 disp_size (unsigned int n)
10240 {
10241   int size = 4;
10242
10243   if (i.types[n].bitfield.disp64)
10244     size = 8;
10245   else if (i.types[n].bitfield.disp8)
10246     size = 1;
10247   else if (i.types[n].bitfield.disp16)
10248     size = 2;
10249   return size;
10250 }
10251
10252 /* Return the size of the immediate operand N.  */
10253
10254 static int
10255 imm_size (unsigned int n)
10256 {
10257   int size = 4;
10258   if (i.types[n].bitfield.imm64)
10259     size = 8;
10260   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
10261     size = 1;
10262   else if (i.types[n].bitfield.imm16)
10263     size = 2;
10264   return size;
10265 }
10266
10267 static void
10268 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
10269 {
10270   char *p;
10271   unsigned int n;
10272
10273   for (n = 0; n < i.operands; n++)
10274     {
10275       if (operand_type_check (i.types[n], disp))
10276         {
10277           int size = disp_size (n);
10278
10279           if (now_seg == absolute_section)
10280             abs_section_offset += size;
10281           else if (i.op[n].disps->X_op == O_constant)
10282             {
10283               offsetT val = i.op[n].disps->X_add_number;
10284
10285               val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
10286                                      size);
10287               p = frag_more (size);
10288               md_number_to_chars (p, val, size);
10289             }
10290           else
10291             {
10292               enum bfd_reloc_code_real reloc_type;
10293               bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
10294               bool sign = (flag_code == CODE_64BIT && size == 4
10295                            && (!want_disp32 (&i.tm)
10296                                || (i.tm.opcode_modifier.jump && !i.jumpabsolute
10297                                    && !i.types[n].bitfield.baseindex)))
10298                           || pcrel;
10299               fixS *fixP;
10300
10301               /* We can't have 8 bit displacement here.  */
10302               gas_assert (!i.types[n].bitfield.disp8);
10303
10304               /* The PC relative address is computed relative
10305                  to the instruction boundary, so in case immediate
10306                  fields follows, we need to adjust the value.  */
10307               if (pcrel && i.imm_operands)
10308                 {
10309                   unsigned int n1;
10310                   int sz = 0;
10311
10312                   for (n1 = 0; n1 < i.operands; n1++)
10313                     if (operand_type_check (i.types[n1], imm))
10314                       {
10315                         /* Only one immediate is allowed for PC
10316                            relative address, except with .insn.  */
10317                         gas_assert (sz == 0 || dot_insn ());
10318                         sz += imm_size (n1);
10319                       }
10320                   /* We should find at least one immediate.  */
10321                   gas_assert (sz != 0);
10322                   i.op[n].disps->X_add_number -= sz;
10323                 }
10324
10325               p = frag_more (size);
10326               reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
10327               if (GOT_symbol
10328                   && GOT_symbol == i.op[n].disps->X_add_symbol
10329                   && (((reloc_type == BFD_RELOC_32
10330                         || reloc_type == BFD_RELOC_X86_64_32S
10331                         || (reloc_type == BFD_RELOC_64
10332                             && object_64bit))
10333                        && (i.op[n].disps->X_op == O_symbol
10334                            || (i.op[n].disps->X_op == O_add
10335                                && ((symbol_get_value_expression
10336                                     (i.op[n].disps->X_op_symbol)->X_op)
10337                                    == O_subtract))))
10338                       || reloc_type == BFD_RELOC_32_PCREL))
10339                 {
10340                   if (!object_64bit)
10341                     {
10342                       reloc_type = BFD_RELOC_386_GOTPC;
10343                       i.has_gotpc_tls_reloc = true;
10344                       i.op[n].disps->X_add_number +=
10345                         encoding_length (insn_start_frag, insn_start_off, p);
10346                     }
10347                   else if (reloc_type == BFD_RELOC_64)
10348                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10349                   else
10350                     /* Don't do the adjustment for x86-64, as there
10351                        the pcrel addressing is relative to the _next_
10352                        insn, and that is taken care of in other code.  */
10353                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10354                 }
10355               else if (align_branch_power)
10356                 {
10357                   switch (reloc_type)
10358                     {
10359                     case BFD_RELOC_386_TLS_GD:
10360                     case BFD_RELOC_386_TLS_LDM:
10361                     case BFD_RELOC_386_TLS_IE:
10362                     case BFD_RELOC_386_TLS_IE_32:
10363                     case BFD_RELOC_386_TLS_GOTIE:
10364                     case BFD_RELOC_386_TLS_GOTDESC:
10365                     case BFD_RELOC_386_TLS_DESC_CALL:
10366                     case BFD_RELOC_X86_64_TLSGD:
10367                     case BFD_RELOC_X86_64_TLSLD:
10368                     case BFD_RELOC_X86_64_GOTTPOFF:
10369                     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
10370                     case BFD_RELOC_X86_64_TLSDESC_CALL:
10371                       i.has_gotpc_tls_reloc = true;
10372                     default:
10373                       break;
10374                     }
10375                 }
10376               fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
10377                                   size, i.op[n].disps, pcrel,
10378                                   reloc_type);
10379
10380               if (flag_code == CODE_64BIT && size == 4 && pcrel
10381                   && !i.prefix[ADDR_PREFIX])
10382                 fixP->fx_signed = 1;
10383
10384               /* Check for "call/jmp *mem", "mov mem, %reg",
10385                  "test %reg, mem" and "binop mem, %reg" where binop
10386                  is one of adc, add, and, cmp, or, sbb, sub, xor
10387                  instructions without data prefix.  Always generate
10388                  R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
10389               if (i.prefix[DATA_PREFIX] == 0
10390                   && (generate_relax_relocations
10391                       || (!object_64bit
10392                           && i.rm.mode == 0
10393                           && i.rm.regmem == 5))
10394                   && (i.rm.mode == 2
10395                       || (i.rm.mode == 0 && i.rm.regmem == 5))
10396                   && i.tm.opcode_space == SPACE_BASE
10397                   && ((i.operands == 1
10398                        && i.tm.base_opcode == 0xff
10399                        && (i.rm.reg == 2 || i.rm.reg == 4))
10400                       || (i.operands == 2
10401                           && (i.tm.base_opcode == 0x8b
10402                               || i.tm.base_opcode == 0x85
10403                               || (i.tm.base_opcode & ~0x38) == 0x03))))
10404                 {
10405                   if (object_64bit)
10406                     {
10407                       fixP->fx_tcbit = i.rex != 0;
10408                       if (i.base_reg
10409                           && (i.base_reg->reg_num == RegIP))
10410                       fixP->fx_tcbit2 = 1;
10411                     }
10412                   else
10413                     fixP->fx_tcbit2 = 1;
10414                 }
10415             }
10416         }
10417     }
10418 }
10419
10420 static void
10421 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
10422 {
10423   char *p;
10424   unsigned int n;
10425
10426   for (n = 0; n < i.operands; n++)
10427     {
10428       if (operand_type_check (i.types[n], imm))
10429         {
10430           int size = imm_size (n);
10431
10432           if (now_seg == absolute_section)
10433             abs_section_offset += size;
10434           else if (i.op[n].imms->X_op == O_constant)
10435             {
10436               offsetT val;
10437
10438               val = offset_in_range (i.op[n].imms->X_add_number,
10439                                      size);
10440               p = frag_more (size);
10441               md_number_to_chars (p, val, size);
10442             }
10443           else
10444             {
10445               /* Not absolute_section.
10446                  Need a 32-bit fixup (don't support 8bit
10447                  non-absolute imms).  Try to support other
10448                  sizes ...  */
10449               enum bfd_reloc_code_real reloc_type;
10450               int sign;
10451
10452               if (i.types[n].bitfield.imm32s
10453                   && (i.suffix == QWORD_MNEM_SUFFIX
10454                       || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
10455                       || dot_insn ()))
10456                 sign = 1;
10457               else
10458                 sign = 0;
10459
10460               p = frag_more (size);
10461               reloc_type = reloc (size, 0, sign, i.reloc[n]);
10462
10463               /*   This is tough to explain.  We end up with this one if we
10464                * have operands that look like
10465                * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
10466                * obtain the absolute address of the GOT, and it is strongly
10467                * preferable from a performance point of view to avoid using
10468                * a runtime relocation for this.  The actual sequence of
10469                * instructions often look something like:
10470                *
10471                *        call    .L66
10472                * .L66:
10473                *        popl    %ebx
10474                *        addl    $_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
10475                *
10476                *   The call and pop essentially return the absolute address
10477                * of the label .L66 and store it in %ebx.  The linker itself
10478                * will ultimately change the first operand of the addl so
10479                * that %ebx points to the GOT, but to keep things simple, the
10480                * .o file must have this operand set so that it generates not
10481                * the absolute address of .L66, but the absolute address of
10482                * itself.  This allows the linker itself simply treat a GOTPC
10483                * relocation as asking for a pcrel offset to the GOT to be
10484                * added in, and the addend of the relocation is stored in the
10485                * operand field for the instruction itself.
10486                *
10487                *   Our job here is to fix the operand so that it would add
10488                * the correct offset so that %ebx would point to itself.  The
10489                * thing that is tricky is that .-.L66 will point to the
10490                * beginning of the instruction, so we need to further modify
10491                * the operand so that it will point to itself.  There are
10492                * other cases where you have something like:
10493                *
10494                *        .long   $_GLOBAL_OFFSET_TABLE_+[.-.L66]
10495                *
10496                * and here no correction would be required.  Internally in
10497                * the assembler we treat operands of this form as not being
10498                * pcrel since the '.' is explicitly mentioned, and I wonder
10499                * whether it would simplify matters to do it this way.  Who
10500                * knows.  In earlier versions of the PIC patches, the
10501                * pcrel_adjust field was used to store the correction, but
10502                * since the expression is not pcrel, I felt it would be
10503                * confusing to do it this way.  */
10504
10505               if ((reloc_type == BFD_RELOC_32
10506                    || reloc_type == BFD_RELOC_X86_64_32S
10507                    || reloc_type == BFD_RELOC_64)
10508                   && GOT_symbol
10509                   && GOT_symbol == i.op[n].imms->X_add_symbol
10510                   && (i.op[n].imms->X_op == O_symbol
10511                       || (i.op[n].imms->X_op == O_add
10512                           && ((symbol_get_value_expression
10513                                (i.op[n].imms->X_op_symbol)->X_op)
10514                               == O_subtract))))
10515                 {
10516                   if (!object_64bit)
10517                     reloc_type = BFD_RELOC_386_GOTPC;
10518                   else if (size == 4)
10519                     reloc_type = BFD_RELOC_X86_64_GOTPC32;
10520                   else if (size == 8)
10521                     reloc_type = BFD_RELOC_X86_64_GOTPC64;
10522                   i.has_gotpc_tls_reloc = true;
10523                   i.op[n].imms->X_add_number +=
10524                     encoding_length (insn_start_frag, insn_start_off, p);
10525                 }
10526               fix_new_exp (frag_now, p - frag_now->fr_literal, size,
10527                            i.op[n].imms, 0, reloc_type);
10528             }
10529         }
10530     }
10531 }
10532 \f
10533 /* x86_cons_fix_new is called via the expression parsing code when a
10534    reloc is needed.  We use this hook to get the correct .got reloc.  */
10535 static int cons_sign = -1;
10536
10537 void
10538 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
10539                   expressionS *exp, bfd_reloc_code_real_type r)
10540 {
10541   r = reloc (len, 0, cons_sign, r);
10542
10543 #ifdef TE_PE
10544   if (exp->X_op == O_secrel)
10545     {
10546       exp->X_op = O_symbol;
10547       r = BFD_RELOC_32_SECREL;
10548     }
10549   else if (exp->X_op == O_secidx)
10550     r = BFD_RELOC_16_SECIDX;
10551 #endif
10552
10553   fix_new_exp (frag, off, len, exp, 0, r);
10554 }
10555
10556 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
10557    purpose of the `.dc.a' internal pseudo-op.  */
10558
10559 int
10560 x86_address_bytes (void)
10561 {
10562   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
10563     return 4;
10564   return stdoutput->arch_info->bits_per_address / 8;
10565 }
10566
10567 #if (!(defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) || defined (OBJ_MACH_O)) \
10568      || defined (LEX_AT)) && !defined (TE_PE)
10569 # define lex_got(reloc, adjust, types) NULL
10570 #else
10571 /* Parse operands of the form
10572    <symbol>@GOTOFF+<nnn>
10573    and similar .plt or .got references.
10574
10575    If we find one, set up the correct relocation in RELOC and copy the
10576    input string, minus the `@GOTOFF' into a malloc'd buffer for
10577    parsing by the calling routine.  Return this buffer, and if ADJUST
10578    is non-null set it to the length of the string we removed from the
10579    input line.  Otherwise return NULL.  */
10580 static char *
10581 lex_got (enum bfd_reloc_code_real *rel,
10582          int *adjust,
10583          i386_operand_type *types)
10584 {
10585   /* Some of the relocations depend on the size of what field is to
10586      be relocated.  But in our callers i386_immediate and i386_displacement
10587      we don't yet know the operand size (this will be set by insn
10588      matching).  Hence we record the word32 relocation here,
10589      and adjust the reloc according to the real size in reloc().  */
10590   static const struct
10591   {
10592     const char *str;
10593     int len;
10594     const enum bfd_reloc_code_real rel[2];
10595     const i386_operand_type types64;
10596     bool need_GOT_symbol;
10597   }
10598     gotrel[] =
10599   {
10600
10601 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
10602   { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
10603 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
10604   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
10605 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
10606   { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
10607 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
10608   { .imm64 = 1, .disp64 = 1 } }
10609
10610 #ifndef TE_PE
10611 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
10612     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
10613                                         BFD_RELOC_SIZE32 },
10614       { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
10615 #endif
10616     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
10617                                        BFD_RELOC_X86_64_PLTOFF64 },
10618       { .bitfield = { .imm64 = 1 } }, true },
10619     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
10620                                        BFD_RELOC_X86_64_PLT32    },
10621       OPERAND_TYPE_IMM32_32S_DISP32, false },
10622     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
10623                                        BFD_RELOC_X86_64_GOTPLT64 },
10624       OPERAND_TYPE_IMM64_DISP64, true },
10625     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
10626                                        BFD_RELOC_X86_64_GOTOFF64 },
10627       OPERAND_TYPE_IMM64_DISP64, true },
10628     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
10629                                        BFD_RELOC_X86_64_GOTPCREL },
10630       OPERAND_TYPE_IMM32_32S_DISP32, true },
10631     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
10632                                        BFD_RELOC_X86_64_TLSGD    },
10633       OPERAND_TYPE_IMM32_32S_DISP32, true },
10634     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
10635                                        _dummy_first_bfd_reloc_code_real },
10636       OPERAND_TYPE_NONE, true },
10637     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
10638                                        BFD_RELOC_X86_64_TLSLD    },
10639       OPERAND_TYPE_IMM32_32S_DISP32, true },
10640     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
10641                                        BFD_RELOC_X86_64_GOTTPOFF },
10642       OPERAND_TYPE_IMM32_32S_DISP32, true },
10643     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
10644                                        BFD_RELOC_X86_64_TPOFF32  },
10645       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10646     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
10647                                        _dummy_first_bfd_reloc_code_real },
10648       OPERAND_TYPE_NONE, true },
10649     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
10650                                        BFD_RELOC_X86_64_DTPOFF32 },
10651       OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
10652     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
10653                                        _dummy_first_bfd_reloc_code_real },
10654       OPERAND_TYPE_NONE, true },
10655     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
10656                                        _dummy_first_bfd_reloc_code_real },
10657       OPERAND_TYPE_NONE, true },
10658     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
10659                                        BFD_RELOC_X86_64_GOT32    },
10660       OPERAND_TYPE_IMM32_32S_64_DISP32, true },
10661     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
10662                                        BFD_RELOC_X86_64_GOTPC32_TLSDESC },
10663       OPERAND_TYPE_IMM32_32S_DISP32, true },
10664     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
10665                                        BFD_RELOC_X86_64_TLSDESC_CALL },
10666       OPERAND_TYPE_IMM32_32S_DISP32, true },
10667 #else /* TE_PE */
10668     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
10669                                        BFD_RELOC_32_SECREL },
10670       OPERAND_TYPE_IMM32_32S_64_DISP32_64, false },
10671 #endif
10672
10673 #undef OPERAND_TYPE_IMM32_32S_DISP32
10674 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
10675 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
10676 #undef OPERAND_TYPE_IMM64_DISP64
10677
10678   };
10679   char *cp;
10680   unsigned int j;
10681
10682 #if defined (OBJ_MAYBE_ELF) && !defined (TE_PE)
10683   if (!IS_ELF)
10684     return NULL;
10685 #endif
10686
10687   for (cp = input_line_pointer; *cp != '@'; cp++)
10688     if (is_end_of_line[(unsigned char) *cp] || *cp == ',')
10689       return NULL;
10690
10691   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
10692     {
10693       int len = gotrel[j].len;
10694       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
10695         {
10696           if (gotrel[j].rel[object_64bit] != 0)
10697             {
10698               int first, second;
10699               char *tmpbuf, *past_reloc;
10700
10701               *rel = gotrel[j].rel[object_64bit];
10702
10703               if (types)
10704                 {
10705                   if (flag_code != CODE_64BIT)
10706                     {
10707                       types->bitfield.imm32 = 1;
10708                       types->bitfield.disp32 = 1;
10709                     }
10710                   else
10711                     *types = gotrel[j].types64;
10712                 }
10713
10714               if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
10715                 GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
10716
10717               /* The length of the first part of our input line.  */
10718               first = cp - input_line_pointer;
10719
10720               /* The second part goes from after the reloc token until
10721                  (and including) an end_of_line char or comma.  */
10722               past_reloc = cp + 1 + len;
10723               cp = past_reloc;
10724               while (!is_end_of_line[(unsigned char) *cp] && *cp != ',')
10725                 ++cp;
10726               second = cp + 1 - past_reloc;
10727
10728               /* Allocate and copy string.  The trailing NUL shouldn't
10729                  be necessary, but be safe.  */
10730               tmpbuf = XNEWVEC (char, first + second + 2);
10731               memcpy (tmpbuf, input_line_pointer, first);
10732               if (second != 0 && *past_reloc != ' ')
10733                 /* Replace the relocation token with ' ', so that
10734                    errors like foo@GOTOFF1 will be detected.  */
10735                 tmpbuf[first++] = ' ';
10736               else
10737                 /* Increment length by 1 if the relocation token is
10738                    removed.  */
10739                 len++;
10740               if (adjust)
10741                 *adjust = len;
10742               memcpy (tmpbuf + first, past_reloc, second);
10743               tmpbuf[first + second] = '\0';
10744               return tmpbuf;
10745             }
10746
10747           as_bad (_("@%s reloc is not supported with %d-bit output format"),
10748                   gotrel[j].str, 1 << (5 + object_64bit));
10749           return NULL;
10750         }
10751     }
10752
10753   /* Might be a symbol version string.  Don't as_bad here.  */
10754   return NULL;
10755 }
10756 #endif
10757
10758 bfd_reloc_code_real_type
10759 x86_cons (expressionS *exp, int size)
10760 {
10761   bfd_reloc_code_real_type got_reloc = NO_RELOC;
10762
10763   intel_syntax = -intel_syntax;
10764   exp->X_md = 0;
10765   expr_mode = expr_operator_none;
10766
10767 #if ((defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)) \
10768       && !defined (LEX_AT)) \
10769     || defined (TE_PE)
10770   if (size == 4 || (object_64bit && size == 8))
10771     {
10772       /* Handle @GOTOFF and the like in an expression.  */
10773       char *save;
10774       char *gotfree_input_line;
10775       int adjust = 0;
10776
10777       save = input_line_pointer;
10778       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
10779       if (gotfree_input_line)
10780         input_line_pointer = gotfree_input_line;
10781
10782       expression (exp);
10783
10784       if (gotfree_input_line)
10785         {
10786           /* expression () has merrily parsed up to the end of line,
10787              or a comma - in the wrong buffer.  Transfer how far
10788              input_line_pointer has moved to the right buffer.  */
10789           input_line_pointer = (save
10790                                 + (input_line_pointer - gotfree_input_line)
10791                                 + adjust);
10792           free (gotfree_input_line);
10793           if (exp->X_op == O_constant
10794               || exp->X_op == O_absent
10795               || exp->X_op == O_illegal
10796               || exp->X_op == O_register
10797               || exp->X_op == O_big)
10798             {
10799               char c = *input_line_pointer;
10800               *input_line_pointer = 0;
10801               as_bad (_("missing or invalid expression `%s'"), save);
10802               *input_line_pointer = c;
10803             }
10804           else if ((got_reloc == BFD_RELOC_386_PLT32
10805                     || got_reloc == BFD_RELOC_X86_64_PLT32)
10806                    && exp->X_op != O_symbol)
10807             {
10808               char c = *input_line_pointer;
10809               *input_line_pointer = 0;
10810               as_bad (_("invalid PLT expression `%s'"), save);
10811               *input_line_pointer = c;
10812             }
10813         }
10814     }
10815   else
10816 #endif
10817     expression (exp);
10818
10819   intel_syntax = -intel_syntax;
10820
10821   if (intel_syntax)
10822     i386_intel_simplify (exp);
10823
10824   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
10825   if (size <= 4 && expr_mode == expr_operator_present
10826       && exp->X_op == O_constant && !object_64bit)
10827     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
10828
10829   return got_reloc;
10830 }
10831
10832 static void
10833 signed_cons (int size)
10834 {
10835   if (object_64bit)
10836     cons_sign = 1;
10837   cons (size);
10838   cons_sign = -1;
10839 }
10840
10841 static void
10842 s_insn (int dummy ATTRIBUTE_UNUSED)
10843 {
10844   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
10845   char *saved_ilp = find_end_of_line (line, false), saved_char;
10846   const char *end;
10847   unsigned int j;
10848   valueT val;
10849   bool vex = false, xop = false, evex = false;
10850   static const templates tt = { &i.tm, &i.tm + 1 };
10851
10852   init_globals ();
10853
10854   saved_char = *saved_ilp;
10855   *saved_ilp = 0;
10856
10857   end = parse_insn (line, mnemonic, true);
10858   if (end == NULL)
10859     {
10860   bad:
10861       *saved_ilp = saved_char;
10862       ignore_rest_of_line ();
10863       i.tm.mnem_off = 0;
10864       return;
10865     }
10866   line += end - line;
10867
10868   current_templates = &tt;
10869   i.tm.mnem_off = MN__insn;
10870   i.tm.extension_opcode = None;
10871
10872   if (startswith (line, "VEX")
10873       && (line[3] == '.' || is_space_char (line[3])))
10874     {
10875       vex = true;
10876       line += 3;
10877     }
10878   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
10879     {
10880       char *e;
10881       unsigned long n = strtoul (line + 3, &e, 16);
10882
10883       if (e == line + 5 && n >= 0x08 && n <= 0x1f
10884           && (*e == '.' || is_space_char (*e)))
10885         {
10886           xop = true;
10887           /* Arrange for build_vex_prefix() to emit 0x8f.  */
10888           i.tm.opcode_space = SPACE_XOP08;
10889           i.insn_opcode_space = n;
10890           line = e;
10891         }
10892     }
10893   else if (startswith (line, "EVEX")
10894            && (line[4] == '.' || is_space_char (line[4])))
10895     {
10896       evex = true;
10897       line += 4;
10898     }
10899
10900   if (vex || xop
10901       ? i.vec_encoding == vex_encoding_evex
10902       : evex
10903         ? i.vec_encoding == vex_encoding_vex
10904           || i.vec_encoding == vex_encoding_vex3
10905         : i.vec_encoding != vex_encoding_default)
10906     {
10907       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
10908       goto bad;
10909     }
10910
10911   if (line > end && i.vec_encoding == vex_encoding_default)
10912     i.vec_encoding = evex ? vex_encoding_evex : vex_encoding_vex;
10913
10914   if (i.vec_encoding != vex_encoding_default)
10915     {
10916       /* Only address size and segment override prefixes are permitted with
10917          VEX/XOP/EVEX encodings.  */
10918       const unsigned char *p = i.prefix;
10919
10920       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
10921         {
10922           if (!*p)
10923             continue;
10924
10925           switch (j)
10926             {
10927             case SEG_PREFIX:
10928             case ADDR_PREFIX:
10929               break;
10930             default:
10931                   as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
10932                   goto bad;
10933             }
10934         }
10935     }
10936
10937   if (line > end && *line == '.')
10938     {
10939       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
10940       switch (line[1])
10941         {
10942         case 'L':
10943           switch (line[2])
10944             {
10945             case '0':
10946               if (evex)
10947                 i.tm.opcode_modifier.evex = EVEX128;
10948               else
10949                 i.tm.opcode_modifier.vex = VEX128;
10950               break;
10951
10952             case '1':
10953               if (evex)
10954                 i.tm.opcode_modifier.evex = EVEX256;
10955               else
10956                 i.tm.opcode_modifier.vex = VEX256;
10957               break;
10958
10959             case '2':
10960               if (evex)
10961                 i.tm.opcode_modifier.evex = EVEX512;
10962               break;
10963
10964             case '3':
10965               if (evex)
10966                 i.tm.opcode_modifier.evex = EVEX_L3;
10967               break;
10968
10969             case 'I':
10970               if (line[3] == 'G')
10971                 {
10972                   if (evex)
10973                     i.tm.opcode_modifier.evex = EVEXLIG;
10974                   else
10975                     i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
10976                   ++line;
10977                 }
10978               break;
10979             }
10980
10981           if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
10982             line += 3;
10983           break;
10984
10985         case '1':
10986           if (line[2] == '2' && line[3] == '8')
10987             {
10988               if (evex)
10989                 i.tm.opcode_modifier.evex = EVEX128;
10990               else
10991                 i.tm.opcode_modifier.vex = VEX128;
10992               line += 4;
10993             }
10994           break;
10995
10996         case '2':
10997           if (line[2] == '5' && line[3] == '6')
10998             {
10999               if (evex)
11000                 i.tm.opcode_modifier.evex = EVEX256;
11001               else
11002                 i.tm.opcode_modifier.vex = VEX256;
11003               line += 4;
11004             }
11005           break;
11006
11007         case '5':
11008           if (evex && line[2] == '1' && line[3] == '2')
11009             {
11010               i.tm.opcode_modifier.evex = EVEX512;
11011               line += 4;
11012             }
11013           break;
11014         }
11015     }
11016
11017   if (line > end && *line == '.')
11018     {
11019       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
11020       switch (line[1])
11021         {
11022         case 'N':
11023           if (line[2] == 'P')
11024             line += 3;
11025           break;
11026
11027         case '6':
11028           if (line[2] == '6')
11029             {
11030               i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
11031               line += 3;
11032             }
11033           break;
11034
11035         case 'F': case 'f':
11036           if (line[2] == '3')
11037             {
11038               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
11039               line += 3;
11040             }
11041           else if (line[2] == '2')
11042             {
11043               i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
11044               line += 3;
11045             }
11046           break;
11047         }
11048     }
11049
11050   if (line > end && !xop && *line == '.')
11051     {
11052       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
11053       switch (line[1])
11054         {
11055         case '0':
11056           if (TOUPPER (line[2]) != 'F')
11057             break;
11058           if (line[3] == '.' || is_space_char (line[3]))
11059             {
11060               i.insn_opcode_space = SPACE_0F;
11061               line += 3;
11062             }
11063           else if (line[3] == '3'
11064                    && (line[4] == '8' || TOUPPER (line[4]) == 'A')
11065                    && (line[5] == '.' || is_space_char (line[5])))
11066             {
11067               i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
11068               line += 5;
11069             }
11070           break;
11071
11072         case 'M':
11073           if (ISDIGIT (line[2]) && line[2] != '0')
11074             {
11075               char *e;
11076               unsigned long n = strtoul (line + 2, &e, 10);
11077
11078               if (n <= (evex ? 15 : 31)
11079                   && (*e == '.' || is_space_char (*e)))
11080                 {
11081                   i.insn_opcode_space = n;
11082                   line = e;
11083                 }
11084             }
11085           break;
11086         }
11087     }
11088
11089   if (line > end && *line == '.' && line[1] == 'W')
11090     {
11091       /* VEX.W, XOP.W, EVEX.W  */
11092       switch (line[2])
11093         {
11094         case '0':
11095           i.tm.opcode_modifier.vexw = VEXW0;
11096           break;
11097
11098         case '1':
11099           i.tm.opcode_modifier.vexw = VEXW1;
11100           break;
11101
11102         case 'I':
11103           if (line[3] == 'G')
11104             {
11105               i.tm.opcode_modifier.vexw = VEXWIG;
11106               ++line;
11107             }
11108           break;
11109         }
11110
11111       if (i.tm.opcode_modifier.vexw)
11112         line += 3;
11113     }
11114
11115   if (line > end && *line && !is_space_char (*line))
11116     {
11117       /* Improve diagnostic a little.  */
11118       if (*line == '.' && line[1] && !is_space_char (line[1]))
11119         ++line;
11120       goto done;
11121     }
11122
11123   /* Before processing the opcode expression, find trailing "+r" or
11124      "/<digit>" specifiers.  */
11125   for (ptr = line; ; ++ptr)
11126     {
11127       unsigned long n;
11128       char *e;
11129
11130       ptr = strpbrk (ptr, "+/,");
11131       if (ptr == NULL || *ptr == ',')
11132         break;
11133
11134       if (*ptr == '+' && ptr[1] == 'r'
11135           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
11136         {
11137           *ptr = ' ';
11138           ptr[1] = ' ';
11139           i.short_form = true;
11140           break;
11141         }
11142
11143       if (*ptr == '/' && ISDIGIT (ptr[1])
11144           && (n = strtoul (ptr + 1, &e, 8)) < 8
11145           && e == ptr + 2
11146           && (ptr[2] == ',' || (is_space_char (ptr[2]) && ptr[3] == ',')))
11147         {
11148           *ptr = ' ';
11149           ptr[1] = ' ';
11150           i.tm.extension_opcode = n;
11151           i.tm.opcode_modifier.modrm = 1;
11152           break;
11153         }
11154     }
11155
11156   input_line_pointer = line;
11157   val = get_absolute_expression ();
11158   line = input_line_pointer;
11159
11160   if (i.short_form && (val & 7))
11161     as_warn ("`+r' assumes low three opcode bits to be clear");
11162
11163   for (j = 1; j < sizeof(val); ++j)
11164     if (!(val >> (j * 8)))
11165       break;
11166
11167   /* Trim off a prefix if present.  */
11168   if (j > 1 && !vex && !xop && !evex)
11169     {
11170       uint8_t byte = val >> ((j - 1) * 8);
11171
11172       switch (byte)
11173         {
11174         case DATA_PREFIX_OPCODE:
11175         case REPE_PREFIX_OPCODE:
11176         case REPNE_PREFIX_OPCODE:
11177           if (!add_prefix (byte))
11178             goto bad;
11179           val &= ((uint64_t)1 << (--j * 8)) - 1;
11180           break;
11181         }
11182     }
11183
11184   /* Trim off encoding space.  */
11185   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
11186     {
11187       uint8_t byte = val >> ((--j - 1) * 8);
11188
11189       i.insn_opcode_space = SPACE_0F;
11190       switch (byte & -(j > 1))
11191         {
11192         case 0x38:
11193           i.insn_opcode_space = SPACE_0F38;
11194           --j;
11195           break;
11196         case 0x3a:
11197           i.insn_opcode_space = SPACE_0F3A;
11198           --j;
11199           break;
11200         }
11201       i.tm.opcode_space = i.insn_opcode_space;
11202       val &= ((uint64_t)1 << (j * 8)) - 1;
11203     }
11204   if (!i.tm.opcode_space && (vex || evex))
11205     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
11206        Also avoid hitting abort() there or in build_evex_prefix().  */
11207     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
11208                                                    : SPACE_0F38;
11209
11210   if (j > 2)
11211     {
11212       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
11213       goto bad;
11214     }
11215   i.opcode_length = j;
11216
11217   /* Handle operands, if any.  */
11218   if (*line == ',')
11219     {
11220       i386_operand_type combined;
11221       expressionS *disp_exp = NULL;
11222       bool changed;
11223
11224       i.memshift = -1;
11225
11226       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
11227       this_operand = -1;
11228       if (!ptr)
11229         goto bad;
11230       line = ptr;
11231
11232       if (!i.operands)
11233         {
11234           as_bad (_("expecting operand after ','; got nothing"));
11235           goto done;
11236         }
11237
11238       if (i.mem_operands > 1)
11239         {
11240           as_bad (_("too many memory references for `%s'"),
11241                   &i386_mnemonics[MN__insn]);
11242           goto done;
11243         }
11244
11245       /* Are we to emit ModR/M encoding?  */
11246       if (!i.short_form
11247           && (i.mem_operands
11248               || i.reg_operands > (i.vec_encoding != vex_encoding_default)
11249               || i.tm.extension_opcode != None))
11250         i.tm.opcode_modifier.modrm = 1;
11251
11252       if (!i.tm.opcode_modifier.modrm
11253           && (i.reg_operands
11254               > i.short_form + 0U + (i.vec_encoding != vex_encoding_default)
11255               || i.mem_operands))
11256         {
11257           as_bad (_("too many register/memory operands"));
11258           goto done;
11259         }
11260
11261       /* Enforce certain constraints on operands.  */
11262       switch (i.reg_operands + i.mem_operands
11263               + (i.tm.extension_opcode != None))
11264         {
11265         case 0:
11266           if (i.short_form)
11267             {
11268               as_bad (_("too few register/memory operands"));
11269               goto done;
11270             }
11271           /* Fall through.  */
11272         case 1:
11273           if (i.tm.opcode_modifier.modrm)
11274             {
11275               as_bad (_("too few register/memory operands"));
11276               goto done;
11277             }
11278           break;
11279
11280         case 2:
11281           break;
11282
11283         case 4:
11284           if (i.imm_operands
11285               && (i.op[0].imms->X_op != O_constant
11286                   || !fits_in_imm4 (i.op[0].imms->X_add_number)))
11287             {
11288               as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
11289               goto done;
11290             }
11291           /* Fall through.  */
11292         case 3:
11293           if (i.vec_encoding != vex_encoding_default)
11294             {
11295               i.tm.opcode_modifier.vexvvvv = 1;
11296               break;
11297             }
11298           /* Fall through.  */
11299         default:
11300           as_bad (_("too many register/memory operands"));
11301           goto done;
11302         }
11303
11304       /* Bring operands into canonical order (imm, mem, reg).  */
11305       do
11306         {
11307           changed = false;
11308
11309           for (j = 1; j < i.operands; ++j)
11310             {
11311               if ((!operand_type_check (i.types[j - 1], imm)
11312                    && operand_type_check (i.types[j], imm))
11313                   || (i.types[j - 1].bitfield.class != ClassNone
11314                       && i.types[j].bitfield.class == ClassNone))
11315                 {
11316                   swap_2_operands (j - 1, j);
11317                   changed = true;
11318                 }
11319             }
11320         }
11321       while (changed);
11322
11323       /* For Intel syntax swap the order of register operands.  */
11324       if (intel_syntax)
11325         switch (i.reg_operands)
11326           {
11327           case 0:
11328           case 1:
11329             break;
11330
11331           case 4:
11332             swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
11333             /* Fall through.  */
11334           case 3:
11335           case 2:
11336             swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
11337             break;
11338
11339           default:
11340             abort ();
11341           }
11342
11343       /* Enforce constraints when using VSIB.  */
11344       if (i.index_reg
11345           && (i.index_reg->reg_type.bitfield.xmmword
11346               || i.index_reg->reg_type.bitfield.ymmword
11347               || i.index_reg->reg_type.bitfield.zmmword))
11348         {
11349           if (i.vec_encoding == vex_encoding_default)
11350             {
11351               as_bad (_("VSIB unavailable with legacy encoding"));
11352               goto done;
11353             }
11354
11355           if (i.vec_encoding == vex_encoding_evex
11356               && i.reg_operands > 1)
11357             {
11358               /* We could allow two register operands, encoding the 2nd one in
11359                  an 8-bit immediate like for 4-register-operand insns, but that
11360                  would require ugly fiddling with process_operands() and/or
11361                  build_modrm_byte().  */
11362               as_bad (_("too many register operands with VSIB"));
11363               goto done;
11364             }
11365
11366           i.tm.opcode_modifier.sib = 1;
11367         }
11368
11369       /* Establish operand size encoding.  */
11370       operand_type_set (&combined, 0);
11371
11372       for (j = i.imm_operands; j < i.operands; ++j)
11373         {
11374           i.types[j].bitfield.instance = InstanceNone;
11375
11376           if (operand_type_check (i.types[j], disp))
11377             {
11378               i.types[j].bitfield.baseindex = 1;
11379               disp_exp = i.op[j].disps;
11380             }
11381
11382           if (evex && i.types[j].bitfield.baseindex)
11383             {
11384               unsigned int n = i.memshift;
11385
11386               if (i.types[j].bitfield.byte)
11387                 n = 0;
11388               else if (i.types[j].bitfield.word)
11389                 n = 1;
11390               else if (i.types[j].bitfield.dword)
11391                 n = 2;
11392               else if (i.types[j].bitfield.qword)
11393                 n = 3;
11394               else if (i.types[j].bitfield.xmmword)
11395                 n = 4;
11396               else if (i.types[j].bitfield.ymmword)
11397                 n = 5;
11398               else if (i.types[j].bitfield.zmmword)
11399                 n = 6;
11400
11401               if (i.memshift < 32 && n != i.memshift)
11402                 as_warn ("conflicting memory operand size specifiers");
11403               i.memshift = n;
11404             }
11405
11406           if ((i.broadcast.type || i.broadcast.bytes)
11407               && j == i.broadcast.operand)
11408             continue;
11409
11410           combined = operand_type_or (combined, i.types[j]);
11411           combined.bitfield.class = ClassNone;
11412         }
11413
11414       switch ((i.broadcast.type ? i.broadcast.type : 1)
11415               << (i.memshift < 32 ? i.memshift : 0))
11416         {
11417         case 64: combined.bitfield.zmmword = 1; break;
11418         case 32: combined.bitfield.ymmword = 1; break;
11419         case 16: combined.bitfield.xmmword = 1; break;
11420         case  8: combined.bitfield.qword = 1; break;
11421         case  4: combined.bitfield.dword = 1; break;
11422         }
11423
11424       if (i.vec_encoding == vex_encoding_default)
11425         {
11426           if (flag_code == CODE_64BIT && combined.bitfield.qword)
11427             i.rex |= REX_W;
11428           else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
11429                                             : combined.bitfield.word)
11430                    && !add_prefix (DATA_PREFIX_OPCODE))
11431             goto done;
11432         }
11433       else if (!i.tm.opcode_modifier.vexw)
11434         {
11435           if (flag_code == CODE_64BIT)
11436             {
11437               if (combined.bitfield.qword)
11438                 i.tm.opcode_modifier.vexw = VEXW1;
11439               else if (combined.bitfield.dword)
11440                 i.tm.opcode_modifier.vexw = VEXW0;
11441             }
11442
11443           if (!i.tm.opcode_modifier.vexw)
11444             i.tm.opcode_modifier.vexw = VEXWIG;
11445         }
11446
11447       if (vex || xop)
11448         {
11449           if (!i.tm.opcode_modifier.vex)
11450             {
11451               if (combined.bitfield.ymmword)
11452                 i.tm.opcode_modifier.vex = VEX256;
11453               else if (combined.bitfield.xmmword)
11454                 i.tm.opcode_modifier.vex = VEX128;
11455             }
11456         }
11457       else if (evex)
11458         {
11459           if (!i.tm.opcode_modifier.evex)
11460             {
11461               /* Do _not_ consider AVX512VL here.  */
11462               if (i.rounding.type != rc_none || combined.bitfield.zmmword)
11463                 i.tm.opcode_modifier.evex = EVEX512;
11464               else if (combined.bitfield.ymmword)
11465                 i.tm.opcode_modifier.evex = EVEX256;
11466               else if (combined.bitfield.xmmword)
11467                 i.tm.opcode_modifier.evex = EVEX128;
11468             }
11469
11470           if (i.memshift >= 32)
11471             {
11472               unsigned int n = 0;
11473
11474               switch (i.tm.opcode_modifier.evex)
11475                 {
11476                 case EVEX512: n = 64; break;
11477                 case EVEX256: n = 32; break;
11478                 case EVEX128: n = 16; break;
11479                 }
11480
11481               if (i.broadcast.type)
11482                 n /= i.broadcast.type;
11483
11484               if (n > 0)
11485                 for (i.memshift = 0; !(n & 1); n >>= 1)
11486                   ++i.memshift;
11487               else if (disp_exp != NULL && disp_exp->X_op == O_constant
11488                        && disp_exp->X_add_number != 0
11489                        && i.disp_encoding != disp_encoding_32bit)
11490                 {
11491                   if (!quiet_warnings)
11492                     as_warn ("cannot determine memory operand size");
11493                   i.disp_encoding = disp_encoding_32bit;
11494                 }
11495             }
11496         }
11497
11498       if (i.memshift >= 32)
11499         i.memshift = 0;
11500       else if (!evex)
11501         i.vec_encoding = vex_encoding_error;
11502
11503       if (i.disp_operands && !optimize_disp (&i.tm))
11504         goto done;
11505
11506       /* Establish size for immediate operands.  */
11507       for (j = 0; j < i.imm_operands; ++j)
11508         {
11509           expressionS *expP = i.op[j].imms;
11510
11511           gas_assert (operand_type_check (i.types[j], imm));
11512           operand_type_set (&i.types[j], 0);
11513
11514           if (i.imm_bits[j] > 32)
11515             i.types[j].bitfield.imm64 = 1;
11516           else if (i.imm_bits[j] > 16)
11517             {
11518               if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
11519                 i.types[j].bitfield.imm32s = 1;
11520               else
11521                 i.types[j].bitfield.imm32 = 1;
11522             }
11523           else if (i.imm_bits[j] > 8)
11524             i.types[j].bitfield.imm16 = 1;
11525           else if (i.imm_bits[j] > 0)
11526             {
11527               if (i.flags[j] & Operand_Signed)
11528                 i.types[j].bitfield.imm8s = 1;
11529               else
11530                 i.types[j].bitfield.imm8 = 1;
11531             }
11532           else if (expP->X_op == O_constant)
11533             {
11534               i.types[j] = smallest_imm_type (expP->X_add_number);
11535               i.types[j].bitfield.imm1 = 0;
11536               /* Oddly enough imm_size() checks imm64 first, so the bit needs
11537                  zapping since smallest_imm_type() sets it unconditionally.  */
11538               if (flag_code != CODE_64BIT)
11539                 {
11540                   i.types[j].bitfield.imm64 = 0;
11541                   i.types[j].bitfield.imm32s = 0;
11542                   i.types[j].bitfield.imm32 = 1;
11543                 }
11544               else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
11545                 i.types[j].bitfield.imm64 = 0;
11546             }
11547           else
11548             /* Non-constant expressions are sized heuristically.  */
11549             switch (flag_code)
11550               {
11551               case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
11552               case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
11553               case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
11554               }
11555         }
11556
11557       for (j = 0; j < i.operands; ++j)
11558         i.tm.operand_types[j] = i.types[j];
11559
11560       process_operands ();
11561     }
11562
11563   /* Don't set opcode until after processing operands, to avoid any
11564      potential special casing there.  */
11565   i.tm.base_opcode |= val;
11566
11567   if (i.vec_encoding == vex_encoding_error
11568       || (i.vec_encoding != vex_encoding_evex
11569           ? i.broadcast.type || i.broadcast.bytes
11570             || i.rounding.type != rc_none
11571             || i.mask.reg
11572           : (i.broadcast.type || i.broadcast.bytes)
11573             && i.rounding.type != rc_none))
11574     {
11575       as_bad (_("conflicting .insn operands"));
11576       goto done;
11577     }
11578
11579   if (vex || xop)
11580     {
11581       if (!i.tm.opcode_modifier.vex)
11582         i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
11583
11584       build_vex_prefix (NULL);
11585       i.rex &= REX_OPCODE;
11586     }
11587   else if (evex)
11588     {
11589       if (!i.tm.opcode_modifier.evex)
11590         i.tm.opcode_modifier.evex = EVEXLIG;
11591
11592       build_evex_prefix ();
11593       i.rex &= REX_OPCODE;
11594     }
11595   else if (i.rex != 0)
11596     add_prefix (REX_OPCODE | i.rex);
11597
11598   output_insn ();
11599
11600  done:
11601   *saved_ilp = saved_char;
11602   input_line_pointer = line;
11603
11604   demand_empty_rest_of_line ();
11605
11606   /* Make sure dot_insn() won't yield "true" anymore.  */
11607   i.tm.mnem_off = 0;
11608 }
11609
11610 #ifdef TE_PE
11611 static void
11612 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
11613 {
11614   expressionS exp;
11615
11616   do
11617     {
11618       expression (&exp);
11619       if (exp.X_op == O_symbol)
11620         exp.X_op = O_secrel;
11621
11622       emit_expr (&exp, 4);
11623     }
11624   while (*input_line_pointer++ == ',');
11625
11626   input_line_pointer--;
11627   demand_empty_rest_of_line ();
11628 }
11629
11630 static void
11631 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
11632 {
11633   expressionS exp;
11634
11635   do
11636     {
11637       expression (&exp);
11638       if (exp.X_op == O_symbol)
11639         exp.X_op = O_secidx;
11640
11641       emit_expr (&exp, 2);
11642     }
11643   while (*input_line_pointer++ == ',');
11644
11645   input_line_pointer--;
11646   demand_empty_rest_of_line ();
11647 }
11648 #endif
11649
11650 /* Handle Rounding Control / SAE specifiers.  */
11651
11652 static char *
11653 RC_SAE_specifier (const char *pstr)
11654 {
11655   unsigned int j;
11656
11657   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
11658     {
11659       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
11660         {
11661           if (i.rounding.type != rc_none)
11662             {
11663               as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
11664               return NULL;
11665             }
11666
11667           i.rounding.type = RC_NamesTable[j].type;
11668
11669           return (char *)(pstr + RC_NamesTable[j].len);
11670         }
11671     }
11672
11673   return NULL;
11674 }
11675
11676 /* Handle Vector operations.  */
11677
11678 static char *
11679 check_VecOperations (char *op_string)
11680 {
11681   const reg_entry *mask;
11682   const char *saved;
11683   char *end_op;
11684
11685   while (*op_string)
11686     {
11687       saved = op_string;
11688       if (*op_string == '{')
11689         {
11690           op_string++;
11691
11692           /* Check broadcasts.  */
11693           if (startswith (op_string, "1to"))
11694             {
11695               unsigned int bcst_type;
11696
11697               if (i.broadcast.type)
11698                 goto duplicated_vec_op;
11699
11700               op_string += 3;
11701               if (*op_string == '8')
11702                 bcst_type = 8;
11703               else if (*op_string == '4')
11704                 bcst_type = 4;
11705               else if (*op_string == '2')
11706                 bcst_type = 2;
11707               else if (*op_string == '1'
11708                        && *(op_string+1) == '6')
11709                 {
11710                   bcst_type = 16;
11711                   op_string++;
11712                 }
11713               else if (*op_string == '3'
11714                        && *(op_string+1) == '2')
11715                 {
11716                   bcst_type = 32;
11717                   op_string++;
11718                 }
11719               else
11720                 {
11721                   as_bad (_("Unsupported broadcast: `%s'"), saved);
11722                   return NULL;
11723                 }
11724               op_string++;
11725
11726               i.broadcast.type = bcst_type;
11727               i.broadcast.operand = this_operand;
11728
11729               /* For .insn a data size specifier may be appended.  */
11730               if (dot_insn () && *op_string == ':')
11731                 goto dot_insn_modifier;
11732             }
11733           /* Check .insn special cases.  */
11734           else if (dot_insn () && *op_string == ':')
11735             {
11736             dot_insn_modifier:
11737               switch (op_string[1])
11738                 {
11739                   unsigned long n;
11740
11741                 case 'd':
11742                   if (i.memshift < 32)
11743                     goto duplicated_vec_op;
11744
11745                   n = strtoul (op_string + 2, &end_op, 0);
11746                   if (n)
11747                     for (i.memshift = 0; !(n & 1); n >>= 1)
11748                       ++i.memshift;
11749                   if (i.memshift < 32 && n == 1)
11750                     op_string = end_op;
11751                   break;
11752
11753                 case 's': case 'u':
11754                   /* This isn't really a "vector" operation, but a sign/size
11755                      specifier for immediate operands of .insn.  Note that AT&T
11756                      syntax handles the same in i386_immediate().  */
11757                   if (!intel_syntax)
11758                     break;
11759
11760                   if (i.imm_bits[this_operand])
11761                     goto duplicated_vec_op;
11762
11763                   n = strtoul (op_string + 2, &end_op, 0);
11764                   if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11765                     {
11766                       i.imm_bits[this_operand] = n;
11767                       if (op_string[1] == 's')
11768                         i.flags[this_operand] |= Operand_Signed;
11769                       op_string = end_op;
11770                     }
11771                   break;
11772                 }
11773             }
11774           /* Check masking operation.  */
11775           else if ((mask = parse_register (op_string, &end_op)) != NULL)
11776             {
11777               if (mask == &bad_reg)
11778                 return NULL;
11779
11780               /* k0 can't be used for write mask.  */
11781               if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
11782                 {
11783                   as_bad (_("`%s%s' can't be used for write mask"),
11784                           register_prefix, mask->reg_name);
11785                   return NULL;
11786                 }
11787
11788               if (!i.mask.reg)
11789                 {
11790                   i.mask.reg = mask;
11791                   i.mask.operand = this_operand;
11792                 }
11793               else if (i.mask.reg->reg_num)
11794                 goto duplicated_vec_op;
11795               else
11796                 {
11797                   i.mask.reg = mask;
11798
11799                   /* Only "{z}" is allowed here.  No need to check
11800                      zeroing mask explicitly.  */
11801                   if (i.mask.operand != (unsigned int) this_operand)
11802                     {
11803                       as_bad (_("invalid write mask `%s'"), saved);
11804                       return NULL;
11805                     }
11806                 }
11807
11808               op_string = end_op;
11809             }
11810           /* Check zeroing-flag for masking operation.  */
11811           else if (*op_string == 'z')
11812             {
11813               if (!i.mask.reg)
11814                 {
11815                   i.mask.reg = reg_k0;
11816                   i.mask.zeroing = 1;
11817                   i.mask.operand = this_operand;
11818                 }
11819               else
11820                 {
11821                   if (i.mask.zeroing)
11822                     {
11823                     duplicated_vec_op:
11824                       as_bad (_("duplicated `%s'"), saved);
11825                       return NULL;
11826                     }
11827
11828                   i.mask.zeroing = 1;
11829
11830                   /* Only "{%k}" is allowed here.  No need to check mask
11831                      register explicitly.  */
11832                   if (i.mask.operand != (unsigned int) this_operand)
11833                     {
11834                       as_bad (_("invalid zeroing-masking `%s'"),
11835                               saved);
11836                       return NULL;
11837                     }
11838                 }
11839
11840               op_string++;
11841             }
11842           else if (intel_syntax
11843                    && (op_string = RC_SAE_specifier (op_string)) != NULL)
11844             i.rounding.modifier = true;
11845           else
11846             goto unknown_vec_op;
11847
11848           if (*op_string != '}')
11849             {
11850               as_bad (_("missing `}' in `%s'"), saved);
11851               return NULL;
11852             }
11853           op_string++;
11854
11855           /* Strip whitespace since the addition of pseudo prefixes
11856              changed how the scrubber treats '{'.  */
11857           if (is_space_char (*op_string))
11858             ++op_string;
11859
11860           continue;
11861         }
11862     unknown_vec_op:
11863       /* We don't know this one.  */
11864       as_bad (_("unknown vector operation: `%s'"), saved);
11865       return NULL;
11866     }
11867
11868   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
11869     {
11870       as_bad (_("zeroing-masking only allowed with write mask"));
11871       return NULL;
11872     }
11873
11874   return op_string;
11875 }
11876
11877 static int
11878 i386_immediate (char *imm_start)
11879 {
11880   char *save_input_line_pointer;
11881   char *gotfree_input_line;
11882   segT exp_seg = 0;
11883   expressionS *exp;
11884   i386_operand_type types;
11885
11886   operand_type_set (&types, ~0);
11887
11888   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
11889     {
11890       as_bad (_("at most %d immediate operands are allowed"),
11891               MAX_IMMEDIATE_OPERANDS);
11892       return 0;
11893     }
11894
11895   exp = &im_expressions[i.imm_operands++];
11896   i.op[this_operand].imms = exp;
11897
11898   if (is_space_char (*imm_start))
11899     ++imm_start;
11900
11901   save_input_line_pointer = input_line_pointer;
11902   input_line_pointer = imm_start;
11903
11904   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
11905   if (gotfree_input_line)
11906     input_line_pointer = gotfree_input_line;
11907
11908   expr_mode = expr_operator_none;
11909   exp_seg = expression (exp);
11910
11911   /* For .insn immediates there may be a size specifier.  */
11912   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
11913       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
11914     {
11915       char *e;
11916       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
11917
11918       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
11919         {
11920           i.imm_bits[this_operand] = n;
11921           if (input_line_pointer[2] == 's')
11922             i.flags[this_operand] |= Operand_Signed;
11923           input_line_pointer = e + 1;
11924         }
11925     }
11926
11927   SKIP_WHITESPACE ();
11928   if (*input_line_pointer)
11929     as_bad (_("junk `%s' after expression"), input_line_pointer);
11930
11931   input_line_pointer = save_input_line_pointer;
11932   if (gotfree_input_line)
11933     {
11934       free (gotfree_input_line);
11935
11936       if (exp->X_op == O_constant)
11937         exp->X_op = O_illegal;
11938     }
11939
11940   if (exp_seg == reg_section)
11941     {
11942       as_bad (_("illegal immediate register operand %s"), imm_start);
11943       return 0;
11944     }
11945
11946   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
11947 }
11948
11949 static int
11950 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
11951                          i386_operand_type types, const char *imm_start)
11952 {
11953   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
11954     {
11955       if (imm_start)
11956         as_bad (_("missing or invalid immediate expression `%s'"),
11957                 imm_start);
11958       return 0;
11959     }
11960   else if (exp->X_op == O_constant)
11961     {
11962       /* Size it properly later.  */
11963       i.types[this_operand].bitfield.imm64 = 1;
11964
11965       /* If not 64bit, sign/zero extend val, to account for wraparound
11966          when !BFD64.  */
11967       if (expr_mode == expr_operator_present
11968           && flag_code != CODE_64BIT && !object_64bit)
11969         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
11970     }
11971 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
11972   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
11973            && exp_seg != absolute_section
11974            && exp_seg != text_section
11975            && exp_seg != data_section
11976            && exp_seg != bss_section
11977            && exp_seg != undefined_section
11978            && !bfd_is_com_section (exp_seg))
11979     {
11980       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
11981       return 0;
11982     }
11983 #endif
11984   else
11985     {
11986       /* This is an address.  The size of the address will be
11987          determined later, depending on destination register,
11988          suffix, or the default for the section.  */
11989       i.types[this_operand].bitfield.imm8 = 1;
11990       i.types[this_operand].bitfield.imm16 = 1;
11991       i.types[this_operand].bitfield.imm32 = 1;
11992       i.types[this_operand].bitfield.imm32s = 1;
11993       i.types[this_operand].bitfield.imm64 = 1;
11994       i.types[this_operand] = operand_type_and (i.types[this_operand],
11995                                                 types);
11996     }
11997
11998   return 1;
11999 }
12000
12001 static char *
12002 i386_scale (char *scale)
12003 {
12004   offsetT val;
12005   char *save = input_line_pointer;
12006
12007   input_line_pointer = scale;
12008   val = get_absolute_expression ();
12009
12010   switch (val)
12011     {
12012     case 1:
12013       i.log2_scale_factor = 0;
12014       break;
12015     case 2:
12016       i.log2_scale_factor = 1;
12017       break;
12018     case 4:
12019       i.log2_scale_factor = 2;
12020       break;
12021     case 8:
12022       i.log2_scale_factor = 3;
12023       break;
12024     default:
12025       {
12026         char sep = *input_line_pointer;
12027
12028         *input_line_pointer = '\0';
12029         as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
12030                 scale);
12031         *input_line_pointer = sep;
12032         input_line_pointer = save;
12033         return NULL;
12034       }
12035     }
12036   if (i.log2_scale_factor != 0 && i.index_reg == 0)
12037     {
12038       as_warn (_("scale factor of %d without an index register"),
12039                1 << i.log2_scale_factor);
12040       i.log2_scale_factor = 0;
12041     }
12042   scale = input_line_pointer;
12043   input_line_pointer = save;
12044   return scale;
12045 }
12046
12047 static int
12048 i386_displacement (char *disp_start, char *disp_end)
12049 {
12050   expressionS *exp;
12051   segT exp_seg = 0;
12052   char *save_input_line_pointer;
12053   char *gotfree_input_line;
12054   int override;
12055   i386_operand_type bigdisp, types = anydisp;
12056   int ret;
12057
12058   if (i.disp_operands == MAX_MEMORY_OPERANDS)
12059     {
12060       as_bad (_("at most %d displacement operands are allowed"),
12061               MAX_MEMORY_OPERANDS);
12062       return 0;
12063     }
12064
12065   operand_type_set (&bigdisp, 0);
12066   if (i.jumpabsolute
12067       || i.types[this_operand].bitfield.baseindex
12068       || (current_templates->start->opcode_modifier.jump != JUMP
12069           && current_templates->start->opcode_modifier.jump != JUMP_DWORD))
12070     {
12071       i386_addressing_mode ();
12072       override = (i.prefix[ADDR_PREFIX] != 0);
12073       if (flag_code == CODE_64BIT)
12074         {
12075           bigdisp.bitfield.disp32 = 1;
12076           if (!override)
12077             bigdisp.bitfield.disp64 = 1;
12078         }
12079       else if ((flag_code == CODE_16BIT) ^ override)
12080           bigdisp.bitfield.disp16 = 1;
12081       else
12082           bigdisp.bitfield.disp32 = 1;
12083     }
12084   else
12085     {
12086       /* For PC-relative branches, the width of the displacement may be
12087          dependent upon data size, but is never dependent upon address size.
12088          Also make sure to not unintentionally match against a non-PC-relative
12089          branch template.  */
12090       static templates aux_templates;
12091       const insn_template *t = current_templates->start;
12092       bool has_intel64 = false;
12093
12094       aux_templates.start = t;
12095       while (++t < current_templates->end)
12096         {
12097           if (t->opcode_modifier.jump
12098               != current_templates->start->opcode_modifier.jump)
12099             break;
12100           if ((t->opcode_modifier.isa64 >= INTEL64))
12101             has_intel64 = true;
12102         }
12103       if (t < current_templates->end)
12104         {
12105           aux_templates.end = t;
12106           current_templates = &aux_templates;
12107         }
12108
12109       override = (i.prefix[DATA_PREFIX] != 0);
12110       if (flag_code == CODE_64BIT)
12111         {
12112           if ((override || i.suffix == WORD_MNEM_SUFFIX)
12113               && (!intel64 || !has_intel64))
12114             bigdisp.bitfield.disp16 = 1;
12115           else
12116             bigdisp.bitfield.disp32 = 1;
12117         }
12118       else
12119         {
12120           if (!override)
12121             override = (i.suffix == (flag_code != CODE_16BIT
12122                                      ? WORD_MNEM_SUFFIX
12123                                      : LONG_MNEM_SUFFIX));
12124           bigdisp.bitfield.disp32 = 1;
12125           if ((flag_code == CODE_16BIT) ^ override)
12126             {
12127               bigdisp.bitfield.disp32 = 0;
12128               bigdisp.bitfield.disp16 = 1;
12129             }
12130         }
12131     }
12132   i.types[this_operand] = operand_type_or (i.types[this_operand],
12133                                            bigdisp);
12134
12135   exp = &disp_expressions[i.disp_operands];
12136   i.op[this_operand].disps = exp;
12137   i.disp_operands++;
12138   save_input_line_pointer = input_line_pointer;
12139   input_line_pointer = disp_start;
12140   END_STRING_AND_SAVE (disp_end);
12141
12142 #ifndef GCC_ASM_O_HACK
12143 #define GCC_ASM_O_HACK 0
12144 #endif
12145 #if GCC_ASM_O_HACK
12146   END_STRING_AND_SAVE (disp_end + 1);
12147   if (i.types[this_operand].bitfield.baseIndex
12148       && displacement_string_end[-1] == '+')
12149     {
12150       /* This hack is to avoid a warning when using the "o"
12151          constraint within gcc asm statements.
12152          For instance:
12153
12154          #define _set_tssldt_desc(n,addr,limit,type) \
12155          __asm__ __volatile__ ( \
12156          "movw %w2,%0\n\t" \
12157          "movw %w1,2+%0\n\t" \
12158          "rorl $16,%1\n\t" \
12159          "movb %b1,4+%0\n\t" \
12160          "movb %4,5+%0\n\t" \
12161          "movb $0,6+%0\n\t" \
12162          "movb %h1,7+%0\n\t" \
12163          "rorl $16,%1" \
12164          : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
12165
12166          This works great except that the output assembler ends
12167          up looking a bit weird if it turns out that there is
12168          no offset.  You end up producing code that looks like:
12169
12170          #APP
12171          movw $235,(%eax)
12172          movw %dx,2+(%eax)
12173          rorl $16,%edx
12174          movb %dl,4+(%eax)
12175          movb $137,5+(%eax)
12176          movb $0,6+(%eax)
12177          movb %dh,7+(%eax)
12178          rorl $16,%edx
12179          #NO_APP
12180
12181          So here we provide the missing zero.  */
12182
12183       *displacement_string_end = '0';
12184     }
12185 #endif
12186   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
12187   if (gotfree_input_line)
12188     input_line_pointer = gotfree_input_line;
12189
12190   expr_mode = expr_operator_none;
12191   exp_seg = expression (exp);
12192
12193   SKIP_WHITESPACE ();
12194   if (*input_line_pointer)
12195     as_bad (_("junk `%s' after expression"), input_line_pointer);
12196 #if GCC_ASM_O_HACK
12197   RESTORE_END_STRING (disp_end + 1);
12198 #endif
12199   input_line_pointer = save_input_line_pointer;
12200   if (gotfree_input_line)
12201     {
12202       free (gotfree_input_line);
12203
12204       if (exp->X_op == O_constant || exp->X_op == O_register)
12205         exp->X_op = O_illegal;
12206     }
12207
12208   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
12209
12210   RESTORE_END_STRING (disp_end);
12211
12212   return ret;
12213 }
12214
12215 static int
12216 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
12217                             i386_operand_type types, const char *disp_start)
12218 {
12219   int ret = 1;
12220
12221   /* We do this to make sure that the section symbol is in
12222      the symbol table.  We will ultimately change the relocation
12223      to be relative to the beginning of the section.  */
12224   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
12225       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
12226       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
12227     {
12228       if (exp->X_op != O_symbol)
12229         goto inv_disp;
12230
12231       if (S_IS_LOCAL (exp->X_add_symbol)
12232           && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
12233           && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
12234         section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
12235       exp->X_op = O_subtract;
12236       exp->X_op_symbol = GOT_symbol;
12237       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
12238         i.reloc[this_operand] = BFD_RELOC_32_PCREL;
12239       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
12240         i.reloc[this_operand] = BFD_RELOC_64;
12241       else
12242         i.reloc[this_operand] = BFD_RELOC_32;
12243     }
12244
12245   else if (exp->X_op == O_absent
12246            || exp->X_op == O_illegal
12247            || exp->X_op == O_big)
12248     {
12249     inv_disp:
12250       as_bad (_("missing or invalid displacement expression `%s'"),
12251               disp_start);
12252       ret = 0;
12253     }
12254
12255   else if (exp->X_op == O_constant)
12256     {
12257       /* Sizing gets taken care of by optimize_disp().
12258
12259          If not 64bit, sign/zero extend val, to account for wraparound
12260          when !BFD64.  */
12261       if (expr_mode == expr_operator_present
12262           && flag_code != CODE_64BIT && !object_64bit)
12263         exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
12264     }
12265
12266 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
12267   else if (OUTPUT_FLAVOR == bfd_target_aout_flavour
12268            && exp_seg != absolute_section
12269            && exp_seg != text_section
12270            && exp_seg != data_section
12271            && exp_seg != bss_section
12272            && exp_seg != undefined_section
12273            && !bfd_is_com_section (exp_seg))
12274     {
12275       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
12276       ret = 0;
12277     }
12278 #endif
12279
12280   else if (current_templates->start->opcode_modifier.jump == JUMP_BYTE)
12281     i.types[this_operand].bitfield.disp8 = 1;
12282
12283   /* Check if this is a displacement only operand.  */
12284   if (!i.types[this_operand].bitfield.baseindex)
12285     i.types[this_operand] =
12286       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
12287                        operand_type_and (i.types[this_operand], types));
12288
12289   return ret;
12290 }
12291
12292 /* Return the active addressing mode, taking address override and
12293    registers forming the address into consideration.  Update the
12294    address override prefix if necessary.  */
12295
12296 static enum flag_code
12297 i386_addressing_mode (void)
12298 {
12299   enum flag_code addr_mode;
12300
12301   if (i.prefix[ADDR_PREFIX])
12302     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
12303   else if (flag_code == CODE_16BIT
12304            && is_cpu (current_templates->start, CpuMPX)
12305            /* Avoid replacing the "16-bit addressing not allowed" diagnostic
12306               from md_assemble() by "is not a valid base/index expression"
12307               when there is a base and/or index.  */
12308            && !i.types[this_operand].bitfield.baseindex)
12309     {
12310       /* MPX insn memory operands with neither base nor index must be forced
12311          to use 32-bit addressing in 16-bit mode.  */
12312       addr_mode = CODE_32BIT;
12313       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12314       ++i.prefixes;
12315       gas_assert (!i.types[this_operand].bitfield.disp16);
12316       gas_assert (!i.types[this_operand].bitfield.disp32);
12317     }
12318   else
12319     {
12320       addr_mode = flag_code;
12321
12322 #if INFER_ADDR_PREFIX
12323       if (i.mem_operands == 0)
12324         {
12325           /* Infer address prefix from the first memory operand.  */
12326           const reg_entry *addr_reg = i.base_reg;
12327
12328           if (addr_reg == NULL)
12329             addr_reg = i.index_reg;
12330
12331           if (addr_reg)
12332             {
12333               if (addr_reg->reg_type.bitfield.dword)
12334                 addr_mode = CODE_32BIT;
12335               else if (flag_code != CODE_64BIT
12336                        && addr_reg->reg_type.bitfield.word)
12337                 addr_mode = CODE_16BIT;
12338
12339               if (addr_mode != flag_code)
12340                 {
12341                   i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
12342                   i.prefixes += 1;
12343                   /* Change the size of any displacement too.  At most one
12344                      of Disp16 or Disp32 is set.
12345                      FIXME.  There doesn't seem to be any real need for
12346                      separate Disp16 and Disp32 flags.  The same goes for
12347                      Imm16 and Imm32.  Removing them would probably clean
12348                      up the code quite a lot.  */
12349                   if (flag_code != CODE_64BIT
12350                       && (i.types[this_operand].bitfield.disp16
12351                           || i.types[this_operand].bitfield.disp32))
12352                     {
12353                       static const i386_operand_type disp16_32 = {
12354                         .bitfield = { .disp16 = 1, .disp32 = 1 }
12355                       };
12356
12357                       i.types[this_operand]
12358                         = operand_type_xor (i.types[this_operand], disp16_32);
12359                     }
12360                 }
12361             }
12362         }
12363 #endif
12364     }
12365
12366   return addr_mode;
12367 }
12368
12369 /* Make sure the memory operand we've been dealt is valid.
12370    Return 1 on success, 0 on a failure.  */
12371
12372 static int
12373 i386_index_check (const char *operand_string)
12374 {
12375   const char *kind = "base/index";
12376   enum flag_code addr_mode = i386_addressing_mode ();
12377   const insn_template *t = current_templates->end - 1;
12378
12379   if (t->opcode_modifier.isstring)
12380     {
12381       /* Memory operands of string insns are special in that they only allow
12382          a single register (rDI, rSI, or rBX) as their memory address.  */
12383       const reg_entry *expected_reg;
12384       static const char di_si[][2][4] =
12385         {
12386           { "esi", "edi" },
12387           { "si", "di" },
12388           { "rsi", "rdi" }
12389         };
12390       static const char bx[][4] = { "ebx", "bx", "rbx" };
12391
12392       kind = "string address";
12393
12394       if (t->opcode_modifier.prefixok == PrefixRep)
12395         {
12396           int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
12397           int op = 0;
12398
12399           if (!t->operand_types[0].bitfield.baseindex
12400               || ((!i.mem_operands != !intel_syntax)
12401                   && t->operand_types[1].bitfield.baseindex))
12402             op = 1;
12403           expected_reg
12404             = (const reg_entry *) str_hash_find (reg_hash,
12405                                                  di_si[addr_mode][op == es_op]);
12406         }
12407       else
12408         expected_reg
12409           = (const reg_entry *)str_hash_find (reg_hash, bx[addr_mode]);
12410
12411       if (i.base_reg != expected_reg
12412           || i.index_reg
12413           || operand_type_check (i.types[this_operand], disp))
12414         {
12415           /* The second memory operand must have the same size as
12416              the first one.  */
12417           if (i.mem_operands
12418               && i.base_reg
12419               && !((addr_mode == CODE_64BIT
12420                     && i.base_reg->reg_type.bitfield.qword)
12421                    || (addr_mode == CODE_32BIT
12422                        ? i.base_reg->reg_type.bitfield.dword
12423                        : i.base_reg->reg_type.bitfield.word)))
12424             goto bad_address;
12425
12426           as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
12427                    operand_string,
12428                    intel_syntax ? '[' : '(',
12429                    register_prefix,
12430                    expected_reg->reg_name,
12431                    intel_syntax ? ']' : ')');
12432           return 1;
12433         }
12434       else
12435         return 1;
12436
12437     bad_address:
12438       as_bad (_("`%s' is not a valid %s expression"),
12439               operand_string, kind);
12440       return 0;
12441     }
12442   else
12443     {
12444       t = current_templates->start;
12445
12446       if (addr_mode != CODE_16BIT)
12447         {
12448           /* 32-bit/64-bit checks.  */
12449           if (i.disp_encoding == disp_encoding_16bit)
12450             {
12451             bad_disp:
12452               as_bad (_("invalid `%s' prefix"),
12453                       addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
12454               return 0;
12455             }
12456
12457           if ((i.base_reg
12458                && ((addr_mode == CODE_64BIT
12459                     ? !i.base_reg->reg_type.bitfield.qword
12460                     : !i.base_reg->reg_type.bitfield.dword)
12461                    || (i.index_reg && i.base_reg->reg_num == RegIP)
12462                    || i.base_reg->reg_num == RegIZ))
12463               || (i.index_reg
12464                   && !i.index_reg->reg_type.bitfield.xmmword
12465                   && !i.index_reg->reg_type.bitfield.ymmword
12466                   && !i.index_reg->reg_type.bitfield.zmmword
12467                   && ((addr_mode == CODE_64BIT
12468                        ? !i.index_reg->reg_type.bitfield.qword
12469                        : !i.index_reg->reg_type.bitfield.dword)
12470                       || !i.index_reg->reg_type.bitfield.baseindex)))
12471             goto bad_address;
12472
12473           /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
12474           if (t->mnem_off == MN_bndmk
12475               || t->mnem_off == MN_bndldx
12476               || t->mnem_off == MN_bndstx
12477               || t->opcode_modifier.sib == SIBMEM)
12478             {
12479               /* They cannot use RIP-relative addressing. */
12480               if (i.base_reg && i.base_reg->reg_num == RegIP)
12481                 {
12482                   as_bad (_("`%s' cannot be used here"), operand_string);
12483                   return 0;
12484                 }
12485
12486               /* bndldx and bndstx ignore their scale factor. */
12487               if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
12488                   && i.log2_scale_factor)
12489                 as_warn (_("register scaling is being ignored here"));
12490             }
12491         }
12492       else
12493         {
12494           /* 16-bit checks.  */
12495           if (i.disp_encoding == disp_encoding_32bit)
12496             goto bad_disp;
12497
12498           if ((i.base_reg
12499                && (!i.base_reg->reg_type.bitfield.word
12500                    || !i.base_reg->reg_type.bitfield.baseindex))
12501               || (i.index_reg
12502                   && (!i.index_reg->reg_type.bitfield.word
12503                       || !i.index_reg->reg_type.bitfield.baseindex
12504                       || !(i.base_reg
12505                            && i.base_reg->reg_num < 6
12506                            && i.index_reg->reg_num >= 6
12507                            && i.log2_scale_factor == 0))))
12508             goto bad_address;
12509         }
12510     }
12511   return 1;
12512 }
12513
12514 /* Handle vector immediates.  */
12515
12516 static int
12517 RC_SAE_immediate (const char *imm_start)
12518 {
12519   const char *pstr = imm_start;
12520
12521   if (*pstr != '{')
12522     return 0;
12523
12524   pstr = RC_SAE_specifier (pstr + 1);
12525   if (pstr == NULL)
12526     return 0;
12527
12528   if (*pstr++ != '}')
12529     {
12530       as_bad (_("Missing '}': '%s'"), imm_start);
12531       return 0;
12532     }
12533   /* RC/SAE immediate string should contain nothing more.  */;
12534   if (*pstr != 0)
12535     {
12536       as_bad (_("Junk after '}': '%s'"), imm_start);
12537       return 0;
12538     }
12539
12540   /* Internally this doesn't count as an operand.  */
12541   --i.operands;
12542
12543   return 1;
12544 }
12545
12546 static INLINE bool starts_memory_operand (char c)
12547 {
12548   return ISDIGIT (c)
12549          || is_name_beginner (c)
12550          || strchr ("([\"+-!~", c);
12551 }
12552
12553 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
12554    on error.  */
12555
12556 static int
12557 i386_att_operand (char *operand_string)
12558 {
12559   const reg_entry *r;
12560   char *end_op;
12561   char *op_string = operand_string;
12562
12563   if (is_space_char (*op_string))
12564     ++op_string;
12565
12566   /* We check for an absolute prefix (differentiating,
12567      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
12568   if (*op_string == ABSOLUTE_PREFIX
12569       && current_templates->start->opcode_modifier.jump)
12570     {
12571       ++op_string;
12572       if (is_space_char (*op_string))
12573         ++op_string;
12574       i.jumpabsolute = true;
12575     }
12576
12577   /* Check if operand is a register.  */
12578   if ((r = parse_register (op_string, &end_op)) != NULL)
12579     {
12580       i386_operand_type temp;
12581
12582       if (r == &bad_reg)
12583         return 0;
12584
12585       /* Check for a segment override by searching for ':' after a
12586          segment register.  */
12587       op_string = end_op;
12588       if (is_space_char (*op_string))
12589         ++op_string;
12590       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
12591         {
12592           i.seg[i.mem_operands] = r;
12593
12594           /* Skip the ':' and whitespace.  */
12595           ++op_string;
12596           if (is_space_char (*op_string))
12597             ++op_string;
12598
12599           /* Handle case of %es:*foo.  */
12600           if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
12601               && current_templates->start->opcode_modifier.jump)
12602             {
12603               ++op_string;
12604               if (is_space_char (*op_string))
12605                 ++op_string;
12606               i.jumpabsolute = true;
12607             }
12608
12609           if (!starts_memory_operand (*op_string))
12610             {
12611               as_bad (_("bad memory operand `%s'"), op_string);
12612               return 0;
12613             }
12614           goto do_memory_reference;
12615         }
12616
12617       /* Handle vector operations.  */
12618       if (*op_string == '{')
12619         {
12620           op_string = check_VecOperations (op_string);
12621           if (op_string == NULL)
12622             return 0;
12623         }
12624
12625       if (*op_string)
12626         {
12627           as_bad (_("junk `%s' after register"), op_string);
12628           return 0;
12629         }
12630
12631        /* Reject pseudo registers for .insn.  */
12632       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
12633         {
12634           as_bad (_("`%s%s' cannot be used here"),
12635                   register_prefix, r->reg_name);
12636           return 0;
12637         }
12638
12639       temp = r->reg_type;
12640       temp.bitfield.baseindex = 0;
12641       i.types[this_operand] = operand_type_or (i.types[this_operand],
12642                                                temp);
12643       i.types[this_operand].bitfield.unspecified = 0;
12644       i.op[this_operand].regs = r;
12645       i.reg_operands++;
12646
12647       /* A GPR may follow an RC or SAE immediate only if a (vector) register
12648          operand was also present earlier on.  */
12649       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
12650           && i.reg_operands == 1)
12651         {
12652           unsigned int j;
12653
12654           for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
12655             if (i.rounding.type == RC_NamesTable[j].type)
12656               break;
12657           as_bad (_("`%s': misplaced `{%s}'"),
12658                   insn_name (current_templates->start), RC_NamesTable[j].name);
12659           return 0;
12660         }
12661     }
12662   else if (*op_string == REGISTER_PREFIX)
12663     {
12664       as_bad (_("bad register name `%s'"), op_string);
12665       return 0;
12666     }
12667   else if (*op_string == IMMEDIATE_PREFIX)
12668     {
12669       ++op_string;
12670       if (i.jumpabsolute)
12671         {
12672           as_bad (_("immediate operand illegal with absolute jump"));
12673           return 0;
12674         }
12675       if (!i386_immediate (op_string))
12676         return 0;
12677       if (i.rounding.type != rc_none)
12678         {
12679           as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
12680                   insn_name (current_templates->start));
12681           return 0;
12682         }
12683     }
12684   else if (RC_SAE_immediate (operand_string))
12685     {
12686       /* If it is a RC or SAE immediate, do the necessary placement check:
12687          Only another immediate or a GPR may precede it.  */
12688       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
12689           || (i.reg_operands == 1
12690               && i.op[0].regs->reg_type.bitfield.class != Reg))
12691         {
12692           as_bad (_("`%s': misplaced `%s'"),
12693                   insn_name (current_templates->start), operand_string);
12694           return 0;
12695         }
12696     }
12697   else if (starts_memory_operand (*op_string))
12698     {
12699       /* This is a memory reference of some sort.  */
12700       char *base_string;
12701
12702       /* Start and end of displacement string expression (if found).  */
12703       char *displacement_string_start;
12704       char *displacement_string_end;
12705
12706     do_memory_reference:
12707       /* Check for base index form.  We detect the base index form by
12708          looking for an ')' at the end of the operand, searching
12709          for the '(' matching it, and finding a REGISTER_PREFIX or ','
12710          after the '('.  */
12711       base_string = op_string + strlen (op_string);
12712
12713       /* Handle vector operations.  */
12714       --base_string;
12715       if (is_space_char (*base_string))
12716         --base_string;
12717
12718       if (*base_string == '}')
12719         {
12720           char *vop_start = NULL;
12721
12722           while (base_string-- > op_string)
12723             {
12724               if (*base_string == '"')
12725                 break;
12726               if (*base_string != '{')
12727                 continue;
12728
12729               vop_start = base_string;
12730
12731               --base_string;
12732               if (is_space_char (*base_string))
12733                 --base_string;
12734
12735               if (*base_string != '}')
12736                 break;
12737
12738               vop_start = NULL;
12739             }
12740
12741           if (!vop_start)
12742             {
12743               as_bad (_("unbalanced figure braces"));
12744               return 0;
12745             }
12746
12747           if (check_VecOperations (vop_start) == NULL)
12748             return 0;
12749         }
12750
12751       /* If we only have a displacement, set-up for it to be parsed later.  */
12752       displacement_string_start = op_string;
12753       displacement_string_end = base_string + 1;
12754
12755       if (*base_string == ')')
12756         {
12757           char *temp_string;
12758           unsigned int parens_not_balanced = 0;
12759           bool in_quotes = false;
12760
12761           /* We've already checked that the number of left & right ()'s are
12762              equal, and that there's a matching set of double quotes.  */
12763           end_op = base_string;
12764           for (temp_string = op_string; temp_string < end_op; temp_string++)
12765             {
12766               if (*temp_string == '\\' && temp_string[1] == '"')
12767                 ++temp_string;
12768               else if (*temp_string == '"')
12769                 in_quotes = !in_quotes;
12770               else if (!in_quotes)
12771                 {
12772                   if (*temp_string == '(' && !parens_not_balanced++)
12773                     base_string = temp_string;
12774                   if (*temp_string == ')')
12775                     --parens_not_balanced;
12776                 }
12777             }
12778
12779           temp_string = base_string;
12780
12781           /* Skip past '(' and whitespace.  */
12782           gas_assert (*base_string == '(');
12783           ++base_string;
12784           if (is_space_char (*base_string))
12785             ++base_string;
12786
12787           if (*base_string == ','
12788               || ((i.base_reg = parse_register (base_string, &end_op))
12789                   != NULL))
12790             {
12791               displacement_string_end = temp_string;
12792
12793               i.types[this_operand].bitfield.baseindex = 1;
12794
12795               if (i.base_reg)
12796                 {
12797                   if (i.base_reg == &bad_reg)
12798                     return 0;
12799                   base_string = end_op;
12800                   if (is_space_char (*base_string))
12801                     ++base_string;
12802                 }
12803
12804               /* There may be an index reg or scale factor here.  */
12805               if (*base_string == ',')
12806                 {
12807                   ++base_string;
12808                   if (is_space_char (*base_string))
12809                     ++base_string;
12810
12811                   if ((i.index_reg = parse_register (base_string, &end_op))
12812                       != NULL)
12813                     {
12814                       if (i.index_reg == &bad_reg)
12815                         return 0;
12816                       base_string = end_op;
12817                       if (is_space_char (*base_string))
12818                         ++base_string;
12819                       if (*base_string == ',')
12820                         {
12821                           ++base_string;
12822                           if (is_space_char (*base_string))
12823                             ++base_string;
12824                         }
12825                       else if (*base_string != ')')
12826                         {
12827                           as_bad (_("expecting `,' or `)' "
12828                                     "after index register in `%s'"),
12829                                   operand_string);
12830                           return 0;
12831                         }
12832                     }
12833                   else if (*base_string == REGISTER_PREFIX)
12834                     {
12835                       end_op = strchr (base_string, ',');
12836                       if (end_op)
12837                         *end_op = '\0';
12838                       as_bad (_("bad register name `%s'"), base_string);
12839                       return 0;
12840                     }
12841
12842                   /* Check for scale factor.  */
12843                   if (*base_string != ')')
12844                     {
12845                       char *end_scale = i386_scale (base_string);
12846
12847                       if (!end_scale)
12848                         return 0;
12849
12850                       base_string = end_scale;
12851                       if (is_space_char (*base_string))
12852                         ++base_string;
12853                       if (*base_string != ')')
12854                         {
12855                           as_bad (_("expecting `)' "
12856                                     "after scale factor in `%s'"),
12857                                   operand_string);
12858                           return 0;
12859                         }
12860                     }
12861                   else if (!i.index_reg)
12862                     {
12863                       as_bad (_("expecting index register or scale factor "
12864                                 "after `,'; got '%c'"),
12865                               *base_string);
12866                       return 0;
12867                     }
12868                 }
12869               else if (*base_string != ')')
12870                 {
12871                   as_bad (_("expecting `,' or `)' "
12872                             "after base register in `%s'"),
12873                           operand_string);
12874                   return 0;
12875                 }
12876             }
12877           else if (*base_string == REGISTER_PREFIX)
12878             {
12879               end_op = strchr (base_string, ',');
12880               if (end_op)
12881                 *end_op = '\0';
12882               as_bad (_("bad register name `%s'"), base_string);
12883               return 0;
12884             }
12885         }
12886
12887       /* If there's an expression beginning the operand, parse it,
12888          assuming displacement_string_start and
12889          displacement_string_end are meaningful.  */
12890       if (displacement_string_start != displacement_string_end)
12891         {
12892           if (!i386_displacement (displacement_string_start,
12893                                   displacement_string_end))
12894             return 0;
12895         }
12896
12897       /* Special case for (%dx) while doing input/output op.  */
12898       if (i.base_reg
12899           && i.base_reg->reg_type.bitfield.instance == RegD
12900           && i.base_reg->reg_type.bitfield.word
12901           && i.index_reg == 0
12902           && i.log2_scale_factor == 0
12903           && i.seg[i.mem_operands] == 0
12904           && !operand_type_check (i.types[this_operand], disp))
12905         {
12906           i.types[this_operand] = i.base_reg->reg_type;
12907           i.input_output_operand = true;
12908           return 1;
12909         }
12910
12911       if (i386_index_check (operand_string) == 0)
12912         return 0;
12913       i.flags[this_operand] |= Operand_Mem;
12914       i.mem_operands++;
12915     }
12916   else
12917     {
12918       /* It's not a memory operand; argh!  */
12919       as_bad (_("invalid char %s beginning operand %d `%s'"),
12920               output_invalid (*op_string),
12921               this_operand + 1,
12922               op_string);
12923       return 0;
12924     }
12925   return 1;                     /* Normal return.  */
12926 }
12927 \f
12928 /* Calculate the maximum variable size (i.e., excluding fr_fix)
12929    that an rs_machine_dependent frag may reach.  */
12930
12931 unsigned int
12932 i386_frag_max_var (fragS *frag)
12933 {
12934   /* The only relaxable frags are for jumps.
12935      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
12936   gas_assert (frag->fr_type == rs_machine_dependent);
12937   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
12938 }
12939
12940 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
12941 static int
12942 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
12943 {
12944   /* STT_GNU_IFUNC symbol must go through PLT.  */
12945   if ((symbol_get_bfdsym (fr_symbol)->flags
12946        & BSF_GNU_INDIRECT_FUNCTION) != 0)
12947     return 0;
12948
12949   if (!S_IS_EXTERNAL (fr_symbol))
12950     /* Symbol may be weak or local.  */
12951     return !S_IS_WEAK (fr_symbol);
12952
12953   /* Global symbols with non-default visibility can't be preempted. */
12954   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
12955     return 1;
12956
12957   if (fr_var != NO_RELOC)
12958     switch ((enum bfd_reloc_code_real) fr_var)
12959       {
12960       case BFD_RELOC_386_PLT32:
12961       case BFD_RELOC_X86_64_PLT32:
12962         /* Symbol with PLT relocation may be preempted. */
12963         return 0;
12964       default:
12965         abort ();
12966       }
12967
12968   /* Global symbols with default visibility in a shared library may be
12969      preempted by another definition.  */
12970   return !shared;
12971 }
12972 #endif
12973
12974 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
12975    Note also work for Skylake and Cascadelake.
12976 ---------------------------------------------------------------------
12977 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
12978 | ------  | ----------- | ------- | -------- |
12979 |   Jo    |      N      |    N    |     Y    |
12980 |   Jno   |      N      |    N    |     Y    |
12981 |  Jc/Jb  |      Y      |    N    |     Y    |
12982 | Jae/Jnb |      Y      |    N    |     Y    |
12983 |  Je/Jz  |      Y      |    Y    |     Y    |
12984 | Jne/Jnz |      Y      |    Y    |     Y    |
12985 | Jna/Jbe |      Y      |    N    |     Y    |
12986 | Ja/Jnbe |      Y      |    N    |     Y    |
12987 |   Js    |      N      |    N    |     Y    |
12988 |   Jns   |      N      |    N    |     Y    |
12989 |  Jp/Jpe |      N      |    N    |     Y    |
12990 | Jnp/Jpo |      N      |    N    |     Y    |
12991 | Jl/Jnge |      Y      |    Y    |     Y    |
12992 | Jge/Jnl |      Y      |    Y    |     Y    |
12993 | Jle/Jng |      Y      |    Y    |     Y    |
12994 | Jg/Jnle |      Y      |    Y    |     Y    |
12995 ---------------------------------------------------------------------  */
12996 static int
12997 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
12998 {
12999   if (mf_cmp == mf_cmp_alu_cmp)
13000     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
13001             || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
13002   if (mf_cmp == mf_cmp_incdec)
13003     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
13004             || mf_jcc == mf_jcc_jle);
13005   if (mf_cmp == mf_cmp_test_and)
13006     return 1;
13007   return 0;
13008 }
13009
13010 /* Return the next non-empty frag.  */
13011
13012 static fragS *
13013 i386_next_non_empty_frag (fragS *fragP)
13014 {
13015   /* There may be a frag with a ".fill 0" when there is no room in
13016      the current frag for frag_grow in output_insn.  */
13017   for (fragP = fragP->fr_next;
13018        (fragP != NULL
13019         && fragP->fr_type == rs_fill
13020         && fragP->fr_fix == 0);
13021        fragP = fragP->fr_next)
13022     ;
13023   return fragP;
13024 }
13025
13026 /* Return the next jcc frag after BRANCH_PADDING.  */
13027
13028 static fragS *
13029 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
13030 {
13031   fragS *branch_fragP;
13032   if (!pad_fragP)
13033     return NULL;
13034
13035   if (pad_fragP->fr_type == rs_machine_dependent
13036       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
13037           == BRANCH_PADDING))
13038     {
13039       branch_fragP = i386_next_non_empty_frag (pad_fragP);
13040       if (branch_fragP->fr_type != rs_machine_dependent)
13041         return NULL;
13042       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
13043           && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
13044                                    pad_fragP->tc_frag_data.mf_type))
13045         return branch_fragP;
13046     }
13047
13048   return NULL;
13049 }
13050
13051 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
13052
13053 static void
13054 i386_classify_machine_dependent_frag (fragS *fragP)
13055 {
13056   fragS *cmp_fragP;
13057   fragS *pad_fragP;
13058   fragS *branch_fragP;
13059   fragS *next_fragP;
13060   unsigned int max_prefix_length;
13061
13062   if (fragP->tc_frag_data.classified)
13063     return;
13064
13065   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
13066      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
13067   for (next_fragP = fragP;
13068        next_fragP != NULL;
13069        next_fragP = next_fragP->fr_next)
13070     {
13071       next_fragP->tc_frag_data.classified = 1;
13072       if (next_fragP->fr_type == rs_machine_dependent)
13073         switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
13074           {
13075           case BRANCH_PADDING:
13076             /* The BRANCH_PADDING frag must be followed by a branch
13077                frag.  */
13078             branch_fragP = i386_next_non_empty_frag (next_fragP);
13079             next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
13080             break;
13081           case FUSED_JCC_PADDING:
13082             /* Check if this is a fused jcc:
13083                FUSED_JCC_PADDING
13084                CMP like instruction
13085                BRANCH_PADDING
13086                COND_JUMP
13087                */
13088             cmp_fragP = i386_next_non_empty_frag (next_fragP);
13089             pad_fragP = i386_next_non_empty_frag (cmp_fragP);
13090             branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
13091             if (branch_fragP)
13092               {
13093                 /* The BRANCH_PADDING frag is merged with the
13094                    FUSED_JCC_PADDING frag.  */
13095                 next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
13096                 /* CMP like instruction size.  */
13097                 next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
13098                 frag_wane (pad_fragP);
13099                 /* Skip to branch_fragP.  */
13100                 next_fragP = branch_fragP;
13101               }
13102             else if (next_fragP->tc_frag_data.max_prefix_length)
13103               {
13104                 /* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
13105                    a fused jcc.  */
13106                 next_fragP->fr_subtype
13107                   = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
13108                 next_fragP->tc_frag_data.max_bytes
13109                   = next_fragP->tc_frag_data.max_prefix_length;
13110                 /* This will be updated in the BRANCH_PREFIX scan.  */
13111                 next_fragP->tc_frag_data.max_prefix_length = 0;
13112               }
13113             else
13114               frag_wane (next_fragP);
13115             break;
13116           }
13117     }
13118
13119   /* Stop if there is no BRANCH_PREFIX.  */
13120   if (!align_branch_prefix_size)
13121     return;
13122
13123   /* Scan for BRANCH_PREFIX.  */
13124   for (; fragP != NULL; fragP = fragP->fr_next)
13125     {
13126       if (fragP->fr_type != rs_machine_dependent
13127           || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
13128               != BRANCH_PREFIX))
13129         continue;
13130
13131       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
13132          COND_JUMP_PREFIX.  */
13133       max_prefix_length = 0;
13134       for (next_fragP = fragP;
13135            next_fragP != NULL;
13136            next_fragP = next_fragP->fr_next)
13137         {
13138           if (next_fragP->fr_type == rs_fill)
13139             /* Skip rs_fill frags.  */
13140             continue;
13141           else if (next_fragP->fr_type != rs_machine_dependent)
13142             /* Stop for all other frags.  */
13143             break;
13144
13145           /* rs_machine_dependent frags.  */
13146           if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13147               == BRANCH_PREFIX)
13148             {
13149               /* Count BRANCH_PREFIX frags.  */
13150               if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
13151                 {
13152                   max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
13153                   frag_wane (next_fragP);
13154                 }
13155               else
13156                 max_prefix_length
13157                   += next_fragP->tc_frag_data.max_bytes;
13158             }
13159           else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13160                     == BRANCH_PADDING)
13161                    || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13162                        == FUSED_JCC_PADDING))
13163             {
13164               /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
13165               fragP->tc_frag_data.u.padding_fragP = next_fragP;
13166               break;
13167             }
13168           else
13169             /* Stop for other rs_machine_dependent frags.  */
13170             break;
13171         }
13172
13173       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
13174
13175       /* Skip to the next frag.  */
13176       fragP = next_fragP;
13177     }
13178 }
13179
13180 /* Compute padding size for
13181
13182         FUSED_JCC_PADDING
13183         CMP like instruction
13184         BRANCH_PADDING
13185         COND_JUMP/UNCOND_JUMP
13186
13187    or
13188
13189         BRANCH_PADDING
13190         COND_JUMP/UNCOND_JUMP
13191  */
13192
13193 static int
13194 i386_branch_padding_size (fragS *fragP, offsetT address)
13195 {
13196   unsigned int offset, size, padding_size;
13197   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
13198
13199   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
13200   if (!address)
13201     address = fragP->fr_address;
13202   address += fragP->fr_fix;
13203
13204   /* CMP like instrunction size.  */
13205   size = fragP->tc_frag_data.cmp_size;
13206
13207   /* The base size of the branch frag.  */
13208   size += branch_fragP->fr_fix;
13209
13210   /* Add opcode and displacement bytes for the rs_machine_dependent
13211      branch frag.  */
13212   if (branch_fragP->fr_type == rs_machine_dependent)
13213     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
13214
13215   /* Check if branch is within boundary and doesn't end at the last
13216      byte.  */
13217   offset = address & ((1U << align_branch_power) - 1);
13218   if ((offset + size) >= (1U << align_branch_power))
13219     /* Padding needed to avoid crossing boundary.  */
13220     padding_size = (1U << align_branch_power) - offset;
13221   else
13222     /* No padding needed.  */
13223     padding_size = 0;
13224
13225   /* The return value may be saved in tc_frag_data.length which is
13226      unsigned byte.  */
13227   if (!fits_in_unsigned_byte (padding_size))
13228     abort ();
13229
13230   return padding_size;
13231 }
13232
13233 /* i386_generic_table_relax_frag()
13234
13235    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
13236    grow/shrink padding to align branch frags.  Hand others to
13237    relax_frag().  */
13238
13239 long
13240 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
13241 {
13242   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13243       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
13244     {
13245       long padding_size = i386_branch_padding_size (fragP, 0);
13246       long grow = padding_size - fragP->tc_frag_data.length;
13247
13248       /* When the BRANCH_PREFIX frag is used, the computed address
13249          must match the actual address and there should be no padding.  */
13250       if (fragP->tc_frag_data.padding_address
13251           && (fragP->tc_frag_data.padding_address != fragP->fr_address
13252               || padding_size))
13253         abort ();
13254
13255       /* Update the padding size.  */
13256       if (grow)
13257         fragP->tc_frag_data.length = padding_size;
13258
13259       return grow;
13260     }
13261   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13262     {
13263       fragS *padding_fragP, *next_fragP;
13264       long padding_size, left_size, last_size;
13265
13266       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13267       if (!padding_fragP)
13268         /* Use the padding set by the leading BRANCH_PREFIX frag.  */
13269         return (fragP->tc_frag_data.length
13270                 - fragP->tc_frag_data.last_length);
13271
13272       /* Compute the relative address of the padding frag in the very
13273         first time where the BRANCH_PREFIX frag sizes are zero.  */
13274       if (!fragP->tc_frag_data.padding_address)
13275         fragP->tc_frag_data.padding_address
13276           = padding_fragP->fr_address - (fragP->fr_address - stretch);
13277
13278       /* First update the last length from the previous interation.  */
13279       left_size = fragP->tc_frag_data.prefix_length;
13280       for (next_fragP = fragP;
13281            next_fragP != padding_fragP;
13282            next_fragP = next_fragP->fr_next)
13283         if (next_fragP->fr_type == rs_machine_dependent
13284             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13285                 == BRANCH_PREFIX))
13286           {
13287             if (left_size)
13288               {
13289                 int max = next_fragP->tc_frag_data.max_bytes;
13290                 if (max)
13291                   {
13292                     int size;
13293                     if (max > left_size)
13294                       size = left_size;
13295                     else
13296                       size = max;
13297                     left_size -= size;
13298                     next_fragP->tc_frag_data.last_length = size;
13299                   }
13300               }
13301             else
13302               next_fragP->tc_frag_data.last_length = 0;
13303           }
13304
13305       /* Check the padding size for the padding frag.  */
13306       padding_size = i386_branch_padding_size
13307         (padding_fragP, (fragP->fr_address
13308                          + fragP->tc_frag_data.padding_address));
13309
13310       last_size = fragP->tc_frag_data.prefix_length;
13311       /* Check if there is change from the last interation.  */
13312       if (padding_size == last_size)
13313         {
13314           /* Update the expected address of the padding frag.  */
13315           padding_fragP->tc_frag_data.padding_address
13316             = (fragP->fr_address + padding_size
13317                + fragP->tc_frag_data.padding_address);
13318           return 0;
13319         }
13320
13321       if (padding_size > fragP->tc_frag_data.max_prefix_length)
13322         {
13323           /* No padding if there is no sufficient room.  Clear the
13324              expected address of the padding frag.  */
13325           padding_fragP->tc_frag_data.padding_address = 0;
13326           padding_size = 0;
13327         }
13328       else
13329         /* Store the expected address of the padding frag.  */
13330         padding_fragP->tc_frag_data.padding_address
13331           = (fragP->fr_address + padding_size
13332              + fragP->tc_frag_data.padding_address);
13333
13334       fragP->tc_frag_data.prefix_length = padding_size;
13335
13336       /* Update the length for the current interation.  */
13337       left_size = padding_size;
13338       for (next_fragP = fragP;
13339            next_fragP != padding_fragP;
13340            next_fragP = next_fragP->fr_next)
13341         if (next_fragP->fr_type == rs_machine_dependent
13342             && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
13343                 == BRANCH_PREFIX))
13344           {
13345             if (left_size)
13346               {
13347                 int max = next_fragP->tc_frag_data.max_bytes;
13348                 if (max)
13349                   {
13350                     int size;
13351                     if (max > left_size)
13352                       size = left_size;
13353                     else
13354                       size = max;
13355                     left_size -= size;
13356                     next_fragP->tc_frag_data.length = size;
13357                   }
13358               }
13359             else
13360               next_fragP->tc_frag_data.length = 0;
13361           }
13362
13363       return (fragP->tc_frag_data.length
13364               - fragP->tc_frag_data.last_length);
13365     }
13366   return relax_frag (segment, fragP, stretch);
13367 }
13368
13369 /* md_estimate_size_before_relax()
13370
13371    Called just before relax() for rs_machine_dependent frags.  The x86
13372    assembler uses these frags to handle variable size jump
13373    instructions.
13374
13375    Any symbol that is now undefined will not become defined.
13376    Return the correct fr_subtype in the frag.
13377    Return the initial "guess for variable size of frag" to caller.
13378    The guess is actually the growth beyond the fixed part.  Whatever
13379    we do to grow the fixed or variable part contributes to our
13380    returned value.  */
13381
13382 int
13383 md_estimate_size_before_relax (fragS *fragP, segT segment)
13384 {
13385   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13386       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
13387       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
13388     {
13389       i386_classify_machine_dependent_frag (fragP);
13390       return fragP->tc_frag_data.length;
13391     }
13392
13393   /* We've already got fragP->fr_subtype right;  all we have to do is
13394      check for un-relaxable symbols.  On an ELF system, we can't relax
13395      an externally visible symbol, because it may be overridden by a
13396      shared library.  */
13397   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
13398 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13399       || (IS_ELF
13400           && !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
13401                                                 fragP->fr_var))
13402 #endif
13403 #if defined (OBJ_COFF) && defined (TE_PE)
13404       || (OUTPUT_FLAVOR == bfd_target_coff_flavour
13405           && S_IS_WEAK (fragP->fr_symbol))
13406 #endif
13407       )
13408     {
13409       /* Symbol is undefined in this segment, or we need to keep a
13410          reloc so that weak symbols can be overridden.  */
13411       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
13412       enum bfd_reloc_code_real reloc_type;
13413       unsigned char *opcode;
13414       int old_fr_fix;
13415       fixS *fixP = NULL;
13416
13417       if (fragP->fr_var != NO_RELOC)
13418         reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
13419       else if (size == 2)
13420         reloc_type = BFD_RELOC_16_PCREL;
13421 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13422       else if (fragP->tc_frag_data.code64 && fragP->fr_offset == 0
13423                && need_plt32_p (fragP->fr_symbol))
13424         reloc_type = BFD_RELOC_X86_64_PLT32;
13425 #endif
13426       else
13427         reloc_type = BFD_RELOC_32_PCREL;
13428
13429       old_fr_fix = fragP->fr_fix;
13430       opcode = (unsigned char *) fragP->fr_opcode;
13431
13432       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
13433         {
13434         case UNCOND_JUMP:
13435           /* Make jmp (0xeb) a (d)word displacement jump.  */
13436           opcode[0] = 0xe9;
13437           fragP->fr_fix += size;
13438           fixP = fix_new (fragP, old_fr_fix, size,
13439                           fragP->fr_symbol,
13440                           fragP->fr_offset, 1,
13441                           reloc_type);
13442           break;
13443
13444         case COND_JUMP86:
13445           if (size == 2
13446               && (!no_cond_jump_promotion || fragP->fr_var != NO_RELOC))
13447             {
13448               /* Negate the condition, and branch past an
13449                  unconditional jump.  */
13450               opcode[0] ^= 1;
13451               opcode[1] = 3;
13452               /* Insert an unconditional jump.  */
13453               opcode[2] = 0xe9;
13454               /* We added two extra opcode bytes, and have a two byte
13455                  offset.  */
13456               fragP->fr_fix += 2 + 2;
13457               fix_new (fragP, old_fr_fix + 2, 2,
13458                        fragP->fr_symbol,
13459                        fragP->fr_offset, 1,
13460                        reloc_type);
13461               break;
13462             }
13463           /* Fall through.  */
13464
13465         case COND_JUMP:
13466           if (no_cond_jump_promotion && fragP->fr_var == NO_RELOC)
13467             {
13468               fragP->fr_fix += 1;
13469               fixP = fix_new (fragP, old_fr_fix, 1,
13470                               fragP->fr_symbol,
13471                               fragP->fr_offset, 1,
13472                               BFD_RELOC_8_PCREL);
13473               fixP->fx_signed = 1;
13474               break;
13475             }
13476
13477           /* This changes the byte-displacement jump 0x7N
13478              to the (d)word-displacement jump 0x0f,0x8N.  */
13479           opcode[1] = opcode[0] + 0x10;
13480           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13481           /* We've added an opcode byte.  */
13482           fragP->fr_fix += 1 + size;
13483           fixP = fix_new (fragP, old_fr_fix + 1, size,
13484                           fragP->fr_symbol,
13485                           fragP->fr_offset, 1,
13486                           reloc_type);
13487           break;
13488
13489         default:
13490           BAD_CASE (fragP->fr_subtype);
13491           break;
13492         }
13493
13494       /* All jumps handled here are signed, but don't unconditionally use a
13495          signed limit check for 32 and 16 bit jumps as we want to allow wrap
13496          around at 4G (outside of 64-bit mode) and 64k.  */
13497       if (size == 4 && flag_code == CODE_64BIT)
13498         fixP->fx_signed = 1;
13499
13500       frag_wane (fragP);
13501       return fragP->fr_fix - old_fr_fix;
13502     }
13503
13504   /* Guess size depending on current relax state.  Initially the relax
13505      state will correspond to a short jump and we return 1, because
13506      the variable part of the frag (the branch offset) is one byte
13507      long.  However, we can relax a section more than once and in that
13508      case we must either set fr_subtype back to the unrelaxed state,
13509      or return the value for the appropriate branch.  */
13510   return md_relax_table[fragP->fr_subtype].rlx_length;
13511 }
13512
13513 /* Called after relax() is finished.
13514
13515    In:  Address of frag.
13516         fr_type == rs_machine_dependent.
13517         fr_subtype is what the address relaxed to.
13518
13519    Out: Any fixSs and constants are set up.
13520         Caller will turn frag into a ".space 0".  */
13521
13522 void
13523 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
13524                  fragS *fragP)
13525 {
13526   unsigned char *opcode;
13527   unsigned char *where_to_put_displacement = NULL;
13528   offsetT target_address;
13529   offsetT opcode_address;
13530   unsigned int extension = 0;
13531   offsetT displacement_from_opcode_start;
13532
13533   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
13534       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
13535       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13536     {
13537       /* Generate nop padding.  */
13538       unsigned int size = fragP->tc_frag_data.length;
13539       if (size)
13540         {
13541           if (size > fragP->tc_frag_data.max_bytes)
13542             abort ();
13543
13544           if (flag_debug)
13545             {
13546               const char *msg;
13547               const char *branch = "branch";
13548               const char *prefix = "";
13549               fragS *padding_fragP;
13550               if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
13551                   == BRANCH_PREFIX)
13552                 {
13553                   padding_fragP = fragP->tc_frag_data.u.padding_fragP;
13554                   switch (fragP->tc_frag_data.default_prefix)
13555                     {
13556                     default:
13557                       abort ();
13558                       break;
13559                     case CS_PREFIX_OPCODE:
13560                       prefix = " cs";
13561                       break;
13562                     case DS_PREFIX_OPCODE:
13563                       prefix = " ds";
13564                       break;
13565                     case ES_PREFIX_OPCODE:
13566                       prefix = " es";
13567                       break;
13568                     case FS_PREFIX_OPCODE:
13569                       prefix = " fs";
13570                       break;
13571                     case GS_PREFIX_OPCODE:
13572                       prefix = " gs";
13573                       break;
13574                     case SS_PREFIX_OPCODE:
13575                       prefix = " ss";
13576                       break;
13577                     }
13578                   if (padding_fragP)
13579                     msg = _("%s:%u: add %d%s at 0x%llx to align "
13580                             "%s within %d-byte boundary\n");
13581                   else
13582                     msg = _("%s:%u: add additional %d%s at 0x%llx to "
13583                             "align %s within %d-byte boundary\n");
13584                 }
13585               else
13586                 {
13587                   padding_fragP = fragP;
13588                   msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
13589                           "%s within %d-byte boundary\n");
13590                 }
13591
13592               if (padding_fragP)
13593                 switch (padding_fragP->tc_frag_data.branch_type)
13594                   {
13595                   case align_branch_jcc:
13596                     branch = "jcc";
13597                     break;
13598                   case align_branch_fused:
13599                     branch = "fused jcc";
13600                     break;
13601                   case align_branch_jmp:
13602                     branch = "jmp";
13603                     break;
13604                   case align_branch_call:
13605                     branch = "call";
13606                     break;
13607                   case align_branch_indirect:
13608                     branch = "indiret branch";
13609                     break;
13610                   case align_branch_ret:
13611                     branch = "ret";
13612                     break;
13613                   default:
13614                     break;
13615                   }
13616
13617               fprintf (stdout, msg,
13618                        fragP->fr_file, fragP->fr_line, size, prefix,
13619                        (long long) fragP->fr_address, branch,
13620                        1 << align_branch_power);
13621             }
13622           if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
13623             memset (fragP->fr_opcode,
13624                     fragP->tc_frag_data.default_prefix, size);
13625           else
13626             i386_generate_nops (fragP, (char *) fragP->fr_opcode,
13627                                 size, 0);
13628           fragP->fr_fix += size;
13629         }
13630       return;
13631     }
13632
13633   opcode = (unsigned char *) fragP->fr_opcode;
13634
13635   /* Address we want to reach in file space.  */
13636   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
13637
13638   /* Address opcode resides at in file space.  */
13639   opcode_address = fragP->fr_address + fragP->fr_fix;
13640
13641   /* Displacement from opcode start to fill into instruction.  */
13642   displacement_from_opcode_start = target_address - opcode_address;
13643
13644   if ((fragP->fr_subtype & BIG) == 0)
13645     {
13646       /* Don't have to change opcode.  */
13647       extension = 1;            /* 1 opcode + 1 displacement  */
13648       where_to_put_displacement = &opcode[1];
13649     }
13650   else
13651     {
13652       if (no_cond_jump_promotion
13653           && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
13654         as_warn_where (fragP->fr_file, fragP->fr_line,
13655                        _("long jump required"));
13656
13657       switch (fragP->fr_subtype)
13658         {
13659         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
13660           extension = 4;                /* 1 opcode + 4 displacement  */
13661           opcode[0] = 0xe9;
13662           where_to_put_displacement = &opcode[1];
13663           break;
13664
13665         case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
13666           extension = 2;                /* 1 opcode + 2 displacement  */
13667           opcode[0] = 0xe9;
13668           where_to_put_displacement = &opcode[1];
13669           break;
13670
13671         case ENCODE_RELAX_STATE (COND_JUMP, BIG):
13672         case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
13673           extension = 5;                /* 2 opcode + 4 displacement  */
13674           opcode[1] = opcode[0] + 0x10;
13675           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13676           where_to_put_displacement = &opcode[2];
13677           break;
13678
13679         case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
13680           extension = 3;                /* 2 opcode + 2 displacement  */
13681           opcode[1] = opcode[0] + 0x10;
13682           opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
13683           where_to_put_displacement = &opcode[2];
13684           break;
13685
13686         case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
13687           extension = 4;
13688           opcode[0] ^= 1;
13689           opcode[1] = 3;
13690           opcode[2] = 0xe9;
13691           where_to_put_displacement = &opcode[3];
13692           break;
13693
13694         default:
13695           BAD_CASE (fragP->fr_subtype);
13696           break;
13697         }
13698     }
13699
13700   /* If size if less then four we are sure that the operand fits,
13701      but if it's 4, then it could be that the displacement is larger
13702      then -/+ 2GB.  */
13703   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
13704       && object_64bit
13705       && ((addressT) (displacement_from_opcode_start - extension
13706                       + ((addressT) 1 << 31))
13707           > (((addressT) 2 << 31) - 1)))
13708     {
13709       as_bad_where (fragP->fr_file, fragP->fr_line,
13710                     _("jump target out of range"));
13711       /* Make us emit 0.  */
13712       displacement_from_opcode_start = extension;
13713     }
13714   /* Now put displacement after opcode.  */
13715   md_number_to_chars ((char *) where_to_put_displacement,
13716                       (valueT) (displacement_from_opcode_start - extension),
13717                       DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
13718   fragP->fr_fix += extension;
13719 }
13720 \f
13721 /* Apply a fixup (fixP) to segment data, once it has been determined
13722    by our caller that we have all the info we need to fix it up.
13723
13724    Parameter valP is the pointer to the value of the bits.
13725
13726    On the 386, immediates, displacements, and data pointers are all in
13727    the same (little-endian) format, so we don't need to care about which
13728    we are handling.  */
13729
13730 void
13731 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
13732 {
13733   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
13734   valueT value = *valP;
13735
13736 #if !defined (TE_Mach)
13737   if (fixP->fx_pcrel)
13738     {
13739       switch (fixP->fx_r_type)
13740         {
13741         default:
13742           break;
13743
13744         case BFD_RELOC_64:
13745           fixP->fx_r_type = BFD_RELOC_64_PCREL;
13746           break;
13747         case BFD_RELOC_32:
13748         case BFD_RELOC_X86_64_32S:
13749           fixP->fx_r_type = BFD_RELOC_32_PCREL;
13750           break;
13751         case BFD_RELOC_16:
13752           fixP->fx_r_type = BFD_RELOC_16_PCREL;
13753           break;
13754         case BFD_RELOC_8:
13755           fixP->fx_r_type = BFD_RELOC_8_PCREL;
13756           break;
13757         }
13758     }
13759
13760   if (fixP->fx_addsy != NULL
13761       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
13762           || fixP->fx_r_type == BFD_RELOC_64_PCREL
13763           || fixP->fx_r_type == BFD_RELOC_16_PCREL
13764           || fixP->fx_r_type == BFD_RELOC_8_PCREL)
13765       && !use_rela_relocations)
13766     {
13767       /* This is a hack.  There should be a better way to handle this.
13768          This covers for the fact that bfd_install_relocation will
13769          subtract the current location (for partial_inplace, PC relative
13770          relocations); see more below.  */
13771 #ifndef OBJ_AOUT
13772       if (IS_ELF
13773 #ifdef TE_PE
13774           || OUTPUT_FLAVOR == bfd_target_coff_flavour
13775 #endif
13776           )
13777         value += fixP->fx_where + fixP->fx_frag->fr_address;
13778 #endif
13779 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13780       if (IS_ELF)
13781         {
13782           segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
13783
13784           if ((sym_seg == seg
13785                || (symbol_section_p (fixP->fx_addsy)
13786                    && sym_seg != absolute_section))
13787               && !generic_force_reloc (fixP))
13788             {
13789               /* Yes, we add the values in twice.  This is because
13790                  bfd_install_relocation subtracts them out again.  I think
13791                  bfd_install_relocation is broken, but I don't dare change
13792                  it.  FIXME.  */
13793               value += fixP->fx_where + fixP->fx_frag->fr_address;
13794             }
13795         }
13796 #endif
13797 #if defined (OBJ_COFF) && defined (TE_PE)
13798       /* For some reason, the PE format does not store a
13799          section address offset for a PC relative symbol.  */
13800       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
13801           || S_IS_WEAK (fixP->fx_addsy))
13802         value += md_pcrel_from (fixP);
13803 #endif
13804     }
13805 #if defined (OBJ_COFF) && defined (TE_PE)
13806   if (fixP->fx_addsy != NULL
13807       && S_IS_WEAK (fixP->fx_addsy)
13808       /* PR 16858: Do not modify weak function references.  */
13809       && ! fixP->fx_pcrel)
13810     {
13811 #if !defined (TE_PEP)
13812       /* For x86 PE weak function symbols are neither PC-relative
13813          nor do they set S_IS_FUNCTION.  So the only reliable way
13814          to detect them is to check the flags of their containing
13815          section.  */
13816       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
13817           && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
13818         ;
13819       else
13820 #endif
13821       value -= S_GET_VALUE (fixP->fx_addsy);
13822     }
13823 #endif
13824
13825   /* Fix a few things - the dynamic linker expects certain values here,
13826      and we must not disappoint it.  */
13827 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
13828   if (IS_ELF && fixP->fx_addsy)
13829     switch (fixP->fx_r_type)
13830       {
13831       case BFD_RELOC_386_PLT32:
13832       case BFD_RELOC_X86_64_PLT32:
13833         /* Make the jump instruction point to the address of the operand.
13834            At runtime we merely add the offset to the actual PLT entry.
13835            NB: Subtract the offset size only for jump instructions.  */
13836         if (fixP->fx_pcrel)
13837           value = -4;
13838         break;
13839
13840       case BFD_RELOC_386_TLS_GD:
13841       case BFD_RELOC_386_TLS_LDM:
13842       case BFD_RELOC_386_TLS_IE_32:
13843       case BFD_RELOC_386_TLS_IE:
13844       case BFD_RELOC_386_TLS_GOTIE:
13845       case BFD_RELOC_386_TLS_GOTDESC:
13846       case BFD_RELOC_X86_64_TLSGD:
13847       case BFD_RELOC_X86_64_TLSLD:
13848       case BFD_RELOC_X86_64_GOTTPOFF:
13849       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
13850         value = 0; /* Fully resolved at runtime.  No addend.  */
13851         /* Fallthrough */
13852       case BFD_RELOC_386_TLS_LE:
13853       case BFD_RELOC_386_TLS_LDO_32:
13854       case BFD_RELOC_386_TLS_LE_32:
13855       case BFD_RELOC_X86_64_DTPOFF32:
13856       case BFD_RELOC_X86_64_DTPOFF64:
13857       case BFD_RELOC_X86_64_TPOFF32:
13858       case BFD_RELOC_X86_64_TPOFF64:
13859         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13860         break;
13861
13862       case BFD_RELOC_386_TLS_DESC_CALL:
13863       case BFD_RELOC_X86_64_TLSDESC_CALL:
13864         value = 0; /* Fully resolved at runtime.  No addend.  */
13865         S_SET_THREAD_LOCAL (fixP->fx_addsy);
13866         fixP->fx_done = 0;
13867         return;
13868
13869       case BFD_RELOC_VTABLE_INHERIT:
13870       case BFD_RELOC_VTABLE_ENTRY:
13871         fixP->fx_done = 0;
13872         return;
13873
13874       default:
13875         break;
13876       }
13877 #endif /* defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)  */
13878
13879   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
13880   if (!object_64bit)
13881     value = extend_to_32bit_address (value);
13882
13883   *valP = value;
13884 #endif /* !defined (TE_Mach)  */
13885
13886   /* Are we finished with this relocation now?  */
13887   if (fixP->fx_addsy == NULL)
13888     {
13889       fixP->fx_done = 1;
13890       switch (fixP->fx_r_type)
13891         {
13892         case BFD_RELOC_X86_64_32S:
13893           fixP->fx_signed = 1;
13894           break;
13895
13896         default:
13897           break;
13898         }
13899     }
13900 #if defined (OBJ_COFF) && defined (TE_PE)
13901   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
13902     {
13903       fixP->fx_done = 0;
13904       /* Remember value for tc_gen_reloc.  */
13905       fixP->fx_addnumber = value;
13906       /* Clear out the frag for now.  */
13907       value = 0;
13908     }
13909 #endif
13910   else if (use_rela_relocations)
13911     {
13912       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
13913         fixP->fx_no_overflow = 1;
13914       /* Remember value for tc_gen_reloc.  */
13915       fixP->fx_addnumber = value;
13916       value = 0;
13917     }
13918
13919   md_number_to_chars (p, value, fixP->fx_size);
13920 }
13921 \f
13922 const char *
13923 md_atof (int type, char *litP, int *sizeP)
13924 {
13925   /* This outputs the LITTLENUMs in REVERSE order;
13926      in accord with the bigendian 386.  */
13927   return ieee_md_atof (type, litP, sizeP, false);
13928 }
13929 \f
13930 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
13931
13932 static char *
13933 output_invalid (int c)
13934 {
13935   if (ISPRINT (c))
13936     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13937               "'%c'", c);
13938   else
13939     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
13940               "(0x%x)", (unsigned char) c);
13941   return output_invalid_buf;
13942 }
13943
13944 /* Verify that @r can be used in the current context.  */
13945
13946 static bool check_register (const reg_entry *r)
13947 {
13948   if (allow_pseudo_reg)
13949     return true;
13950
13951   if (operand_type_all_zero (&r->reg_type))
13952     return false;
13953
13954   if ((r->reg_type.bitfield.dword
13955        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
13956        || r->reg_type.bitfield.class == RegCR
13957        || r->reg_type.bitfield.class == RegDR)
13958       && !cpu_arch_flags.bitfield.cpui386)
13959     return false;
13960
13961   if (r->reg_type.bitfield.class == RegTR
13962       && (flag_code == CODE_64BIT
13963           || !cpu_arch_flags.bitfield.cpui386
13964           || cpu_arch_isa_flags.bitfield.cpui586
13965           || cpu_arch_isa_flags.bitfield.cpui686))
13966     return false;
13967
13968   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
13969     return false;
13970
13971   if (!cpu_arch_flags.bitfield.cpuavx512f)
13972     {
13973       if (r->reg_type.bitfield.zmmword
13974           || r->reg_type.bitfield.class == RegMask)
13975         return false;
13976
13977       if (!cpu_arch_flags.bitfield.cpuavx)
13978         {
13979           if (r->reg_type.bitfield.ymmword)
13980             return false;
13981
13982           if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
13983             return false;
13984         }
13985     }
13986
13987   if (vector_size < VSZ512 && r->reg_type.bitfield.zmmword)
13988     return false;
13989
13990   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
13991     return false;
13992
13993   if (r->reg_type.bitfield.tmmword
13994       && (!cpu_arch_flags.bitfield.cpuamx_tile
13995           || flag_code != CODE_64BIT))
13996     return false;
13997
13998   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
13999     return false;
14000
14001   /* Don't allow fake index register unless allow_index_reg isn't 0. */
14002   if (!allow_index_reg && r->reg_num == RegIZ)
14003     return false;
14004
14005   /* Upper 16 vector registers are only available with VREX in 64bit
14006      mode, and require EVEX encoding.  */
14007   if (r->reg_flags & RegVRex)
14008     {
14009       if (!cpu_arch_flags.bitfield.cpuavx512f
14010           || flag_code != CODE_64BIT)
14011         return false;
14012
14013       if (i.vec_encoding == vex_encoding_default)
14014         i.vec_encoding = vex_encoding_evex;
14015       else if (i.vec_encoding != vex_encoding_evex)
14016         i.vec_encoding = vex_encoding_error;
14017     }
14018
14019   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
14020       && (!cpu_arch_flags.bitfield.cpulm
14021           || r->reg_type.bitfield.class != RegCR
14022           || dot_insn ())
14023       && flag_code != CODE_64BIT)
14024     return false;
14025
14026   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
14027       && !intel_syntax)
14028     return false;
14029
14030   return true;
14031 }
14032
14033 /* REG_STRING starts *before* REGISTER_PREFIX.  */
14034
14035 static const reg_entry *
14036 parse_real_register (const char *reg_string, char **end_op)
14037 {
14038   const char *s = reg_string;
14039   char *p;
14040   char reg_name_given[MAX_REG_NAME_SIZE + 1];
14041   const reg_entry *r;
14042
14043   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
14044   if (*s == REGISTER_PREFIX)
14045     ++s;
14046
14047   if (is_space_char (*s))
14048     ++s;
14049
14050   p = reg_name_given;
14051   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
14052     {
14053       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
14054         return (const reg_entry *) NULL;
14055       s++;
14056     }
14057
14058   if (is_part_of_name (*s))
14059     return (const reg_entry *) NULL;
14060
14061   *end_op = (char *) s;
14062
14063   r = (const reg_entry *) str_hash_find (reg_hash, reg_name_given);
14064
14065   /* Handle floating point regs, allowing spaces in the (i) part.  */
14066   if (r == reg_st0)
14067     {
14068       if (!cpu_arch_flags.bitfield.cpu8087
14069           && !cpu_arch_flags.bitfield.cpu287
14070           && !cpu_arch_flags.bitfield.cpu387
14071           && !allow_pseudo_reg)
14072         return (const reg_entry *) NULL;
14073
14074       if (is_space_char (*s))
14075         ++s;
14076       if (*s == '(')
14077         {
14078           ++s;
14079           if (is_space_char (*s))
14080             ++s;
14081           if (*s >= '0' && *s <= '7')
14082             {
14083               int fpr = *s - '0';
14084               ++s;
14085               if (is_space_char (*s))
14086                 ++s;
14087               if (*s == ')')
14088                 {
14089                   *end_op = (char *) s + 1;
14090                   know (r[fpr].reg_num == fpr);
14091                   return r + fpr;
14092                 }
14093             }
14094           /* We have "%st(" then garbage.  */
14095           return (const reg_entry *) NULL;
14096         }
14097     }
14098
14099   return r && check_register (r) ? r : NULL;
14100 }
14101
14102 /* REG_STRING starts *before* REGISTER_PREFIX.  */
14103
14104 static const reg_entry *
14105 parse_register (const char *reg_string, char **end_op)
14106 {
14107   const reg_entry *r;
14108
14109   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
14110     r = parse_real_register (reg_string, end_op);
14111   else
14112     r = NULL;
14113   if (!r)
14114     {
14115       char *save = input_line_pointer;
14116       char *buf = xstrdup (reg_string), *name;
14117       symbolS *symbolP;
14118
14119       input_line_pointer = buf;
14120       get_symbol_name (&name);
14121       symbolP = symbol_find (name);
14122       while (symbolP && symbol_equated_p (symbolP))
14123         {
14124           const expressionS *e = symbol_get_value_expression(symbolP);
14125
14126           if (e->X_add_number)
14127             break;
14128           symbolP = e->X_add_symbol;
14129         }
14130       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
14131         {
14132           const expressionS *e = symbol_get_value_expression (symbolP);
14133
14134           if (e->X_op == O_register)
14135             {
14136               know (e->X_add_number >= 0
14137                     && (valueT) e->X_add_number < i386_regtab_size);
14138               r = i386_regtab + e->X_add_number;
14139               *end_op = (char *) reg_string + (input_line_pointer - buf);
14140             }
14141           if (r && !check_register (r))
14142             {
14143               as_bad (_("register '%s%s' cannot be used here"),
14144                       register_prefix, r->reg_name);
14145               r = &bad_reg;
14146             }
14147         }
14148       input_line_pointer = save;
14149       free (buf);
14150     }
14151   return r;
14152 }
14153
14154 int
14155 i386_parse_name (char *name, expressionS *e, char *nextcharP)
14156 {
14157   const reg_entry *r = NULL;
14158   char *end = input_line_pointer;
14159
14160   /* We only know the terminating character here.  It being double quote could
14161      be the closing one of a quoted symbol name, or an opening one from a
14162      following string (or another quoted symbol name).  Since the latter can't
14163      be valid syntax for anything, bailing in either case is good enough.  */
14164   if (*nextcharP == '"')
14165     return 0;
14166
14167   *end = *nextcharP;
14168   if (*name == REGISTER_PREFIX || allow_naked_reg)
14169     r = parse_real_register (name, &input_line_pointer);
14170   if (r && end <= input_line_pointer)
14171     {
14172       *nextcharP = *input_line_pointer;
14173       *input_line_pointer = 0;
14174       e->X_op = O_register;
14175       e->X_add_number = r - i386_regtab;
14176       return 1;
14177     }
14178   input_line_pointer = end;
14179   *end = 0;
14180   return intel_syntax ? i386_intel_parse_name (name, e) : 0;
14181 }
14182
14183 void
14184 md_operand (expressionS *e)
14185 {
14186   char *end;
14187   const reg_entry *r;
14188
14189   switch (*input_line_pointer)
14190     {
14191     case REGISTER_PREFIX:
14192       r = parse_real_register (input_line_pointer, &end);
14193       if (r)
14194         {
14195           e->X_op = O_register;
14196           e->X_add_number = r - i386_regtab;
14197           input_line_pointer = end;
14198         }
14199       break;
14200
14201     case '[':
14202       gas_assert (intel_syntax);
14203       end = input_line_pointer++;
14204       expression (e);
14205       if (*input_line_pointer == ']')
14206         {
14207           ++input_line_pointer;
14208           e->X_op_symbol = make_expr_symbol (e);
14209           e->X_add_symbol = NULL;
14210           e->X_add_number = 0;
14211           e->X_op = O_index;
14212         }
14213       else
14214         {
14215           e->X_op = O_absent;
14216           input_line_pointer = end;
14217         }
14218       break;
14219     }
14220 }
14221
14222 #ifdef BFD64
14223 /* To maintain consistency with !BFD64 builds of gas record, whether any
14224    (binary) operator was involved in an expression.  As expressions are
14225    evaluated in only 32 bits when !BFD64, we use this to decide whether to
14226    truncate results.  */
14227 bool i386_record_operator (operatorT op,
14228                            const expressionS *left,
14229                            const expressionS *right)
14230 {
14231   if (op == O_absent)
14232     return false;
14233
14234   if (!left)
14235     {
14236       /* Since the expression parser applies unary operators fine to bignum
14237          operands, we don't need to be concerned of respective operands not
14238          fitting in 32 bits.  */
14239       if (right->X_op == O_constant && right->X_unsigned
14240           && !fits_in_unsigned_long (right->X_add_number))
14241         return false;
14242     }
14243   /* This isn't entirely right: The pattern can also result when constant
14244      expressions are folded (e.g. 0xffffffff + 1).  */
14245   else if ((left->X_op == O_constant && left->X_unsigned
14246             && !fits_in_unsigned_long (left->X_add_number))
14247            || (right->X_op == O_constant && right->X_unsigned
14248                && !fits_in_unsigned_long (right->X_add_number)))
14249     expr_mode = expr_large_value;
14250
14251   if (expr_mode != expr_large_value)
14252     expr_mode = expr_operator_present;
14253
14254   return false;
14255 }
14256 #endif
14257 \f
14258 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14259 const char *md_shortopts = "kVQ:sqnO::";
14260 #else
14261 const char *md_shortopts = "qnO::";
14262 #endif
14263
14264 #define OPTION_32 (OPTION_MD_BASE + 0)
14265 #define OPTION_64 (OPTION_MD_BASE + 1)
14266 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
14267 #define OPTION_MARCH (OPTION_MD_BASE + 3)
14268 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
14269 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
14270 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
14271 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
14272 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
14273 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
14274 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
14275 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
14276 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
14277 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
14278 #define OPTION_X32 (OPTION_MD_BASE + 14)
14279 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
14280 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
14281 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
14282 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
14283 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
14284 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
14285 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
14286 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
14287 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
14288 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
14289 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
14290 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
14291 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
14292 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
14293 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
14294 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
14295 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
14296 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
14297 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
14298 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
14299
14300 struct option md_longopts[] =
14301 {
14302   {"32", no_argument, NULL, OPTION_32},
14303 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14304      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14305   {"64", no_argument, NULL, OPTION_64},
14306 #endif
14307 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14308   {"x32", no_argument, NULL, OPTION_X32},
14309   {"mshared", no_argument, NULL, OPTION_MSHARED},
14310   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
14311 #endif
14312   {"divide", no_argument, NULL, OPTION_DIVIDE},
14313   {"march", required_argument, NULL, OPTION_MARCH},
14314   {"mtune", required_argument, NULL, OPTION_MTUNE},
14315   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
14316   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
14317   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
14318   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
14319   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
14320   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
14321   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
14322   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
14323   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
14324   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
14325   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
14326   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
14327   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
14328 # if defined (TE_PE) || defined (TE_PEP)
14329   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
14330 #endif
14331   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
14332   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
14333   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
14334   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
14335   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
14336   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
14337   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
14338   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
14339   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
14340   {"mlfence-before-indirect-branch", required_argument, NULL,
14341    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
14342   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
14343   {"mamd64", no_argument, NULL, OPTION_MAMD64},
14344   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
14345   {NULL, no_argument, NULL, 0}
14346 };
14347 size_t md_longopts_size = sizeof (md_longopts);
14348
14349 int
14350 md_parse_option (int c, const char *arg)
14351 {
14352   unsigned int j;
14353   char *arch, *next, *saved, *type;
14354
14355   switch (c)
14356     {
14357     case 'n':
14358       optimize_align_code = 0;
14359       break;
14360
14361     case 'q':
14362       quiet_warnings = 1;
14363       break;
14364
14365 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14366       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
14367          should be emitted or not.  FIXME: Not implemented.  */
14368     case 'Q':
14369       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
14370         return 0;
14371       break;
14372
14373       /* -V: SVR4 argument to print version ID.  */
14374     case 'V':
14375       print_version_id ();
14376       break;
14377
14378       /* -k: Ignore for FreeBSD compatibility.  */
14379     case 'k':
14380       break;
14381
14382     case 's':
14383       /* -s: On i386 Solaris, this tells the native assembler to use
14384          .stab instead of .stab.excl.  We always use .stab anyhow.  */
14385       break;
14386
14387     case OPTION_MSHARED:
14388       shared = 1;
14389       break;
14390
14391     case OPTION_X86_USED_NOTE:
14392       if (strcasecmp (arg, "yes") == 0)
14393         x86_used_note = 1;
14394       else if (strcasecmp (arg, "no") == 0)
14395         x86_used_note = 0;
14396       else
14397         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
14398       break;
14399
14400
14401 #endif
14402 #if (defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
14403      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
14404     case OPTION_64:
14405       {
14406         const char **list, **l;
14407
14408         list = bfd_target_list ();
14409         for (l = list; *l != NULL; l++)
14410           if (startswith (*l, "elf64-x86-64")
14411               || strcmp (*l, "coff-x86-64") == 0
14412               || strcmp (*l, "pe-x86-64") == 0
14413               || strcmp (*l, "pei-x86-64") == 0
14414               || strcmp (*l, "mach-o-x86-64") == 0)
14415             {
14416               default_arch = "x86_64";
14417               break;
14418             }
14419         if (*l == NULL)
14420           as_fatal (_("no compiled in support for x86_64"));
14421         free (list);
14422       }
14423       break;
14424 #endif
14425
14426 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
14427     case OPTION_X32:
14428       if (IS_ELF)
14429         {
14430           const char **list, **l;
14431
14432           list = bfd_target_list ();
14433           for (l = list; *l != NULL; l++)
14434             if (startswith (*l, "elf32-x86-64"))
14435               {
14436                 default_arch = "x86_64:32";
14437                 break;
14438               }
14439           if (*l == NULL)
14440             as_fatal (_("no compiled in support for 32bit x86_64"));
14441           free (list);
14442         }
14443       else
14444         as_fatal (_("32bit x86_64 is only supported for ELF"));
14445       break;
14446 #endif
14447
14448     case OPTION_32:
14449       {
14450         const char **list, **l;
14451
14452         list = bfd_target_list ();
14453         for (l = list; *l != NULL; l++)
14454           if (strstr (*l, "-i386")
14455               || strstr (*l, "-go32"))
14456             {
14457               default_arch = "i386";
14458               break;
14459             }
14460         if (*l == NULL)
14461           as_fatal (_("no compiled in support for ix86"));
14462         free (list);
14463       }
14464       break;
14465
14466     case OPTION_DIVIDE:
14467 #ifdef SVR4_COMMENT_CHARS
14468       {
14469         char *n, *t;
14470         const char *s;
14471
14472         n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
14473         t = n;
14474         for (s = i386_comment_chars; *s != '\0'; s++)
14475           if (*s != '/')
14476             *t++ = *s;
14477         *t = '\0';
14478         i386_comment_chars = n;
14479       }
14480 #endif
14481       break;
14482
14483     case OPTION_MARCH:
14484       saved = xstrdup (arg);
14485       arch = saved;
14486       /* Allow -march=+nosse.  */
14487       if (*arch == '+')
14488         arch++;
14489       do
14490         {
14491           char *vsz;
14492
14493           if (*arch == '.')
14494             as_fatal (_("invalid -march= option: `%s'"), arg);
14495           next = strchr (arch, '+');
14496           if (next)
14497             *next++ = '\0';
14498           vsz = strchr (arch, '/');
14499           if (vsz)
14500             *vsz++ = '\0';
14501           for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14502             {
14503               if (vsz && cpu_arch[j].vsz != vsz_set)
14504                 continue;
14505
14506               if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
14507                   && strcmp (arch, cpu_arch[j].name) == 0)
14508                 {
14509                   /* Processor.  */
14510                   if (! cpu_arch[j].enable.bitfield.cpui386)
14511                     continue;
14512
14513                   cpu_arch_name = cpu_arch[j].name;
14514                   free (cpu_sub_arch_name);
14515                   cpu_sub_arch_name = NULL;
14516                   cpu_arch_flags = cpu_arch[j].enable;
14517                   cpu_arch_isa = cpu_arch[j].type;
14518                   cpu_arch_isa_flags = cpu_arch[j].enable;
14519                   if (!cpu_arch_tune_set)
14520                     {
14521                       cpu_arch_tune = cpu_arch_isa;
14522                       cpu_arch_tune_flags = cpu_arch_isa_flags;
14523                     }
14524                   vector_size = VSZ_DEFAULT;
14525                   break;
14526                 }
14527               else if (cpu_arch[j].type == PROCESSOR_NONE
14528                        && strcmp (arch, cpu_arch[j].name) == 0
14529                        && !cpu_flags_all_zero (&cpu_arch[j].enable))
14530                 {
14531                   /* ISA extension.  */
14532                   i386_cpu_flags flags;
14533
14534                   flags = cpu_flags_or (cpu_arch_flags,
14535                                         cpu_arch[j].enable);
14536
14537                   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14538                     {
14539                       extend_cpu_sub_arch_name (arch);
14540                       cpu_arch_flags = flags;
14541                       cpu_arch_isa_flags = flags;
14542                     }
14543                   else
14544                     cpu_arch_isa_flags
14545                       = cpu_flags_or (cpu_arch_isa_flags,
14546                                       cpu_arch[j].enable);
14547
14548                   switch (cpu_arch[j].vsz)
14549                     {
14550                     default:
14551                       break;
14552
14553                     case vsz_set:
14554                       if (vsz)
14555                         {
14556                           char *end;
14557                           unsigned long val = strtoul (vsz, &end, 0);
14558
14559                           if (*end)
14560                             val = 0;
14561                           switch (val)
14562                             {
14563                             case 512: vector_size = VSZ512; break;
14564                             case 256: vector_size = VSZ256; break;
14565                             case 128: vector_size = VSZ128; break;
14566                             default:
14567                               as_warn (_("Unrecognized vector size specifier ignored"));
14568                               break;
14569                             }
14570                           break;
14571                         }
14572                         /* Fall through.  */
14573                     case vsz_reset:
14574                       vector_size = VSZ_DEFAULT;
14575                       break;
14576                     }
14577
14578                   break;
14579                 }
14580             }
14581
14582           if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
14583             {
14584               /* Disable an ISA extension.  */
14585               for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14586                 if (cpu_arch[j].type == PROCESSOR_NONE
14587                     && strcmp (arch + 2, cpu_arch[j].name) == 0)
14588                   {
14589                     i386_cpu_flags flags;
14590
14591                     flags = cpu_flags_and_not (cpu_arch_flags,
14592                                                cpu_arch[j].disable);
14593                     if (!cpu_flags_equal (&flags, &cpu_arch_flags))
14594                       {
14595                         extend_cpu_sub_arch_name (arch);
14596                         cpu_arch_flags = flags;
14597                         cpu_arch_isa_flags = flags;
14598                       }
14599                     if (cpu_arch[j].vsz == vsz_set)
14600                       vector_size = VSZ_DEFAULT;
14601                     break;
14602                   }
14603             }
14604
14605           if (j >= ARRAY_SIZE (cpu_arch))
14606             as_fatal (_("invalid -march= option: `%s'"), arg);
14607
14608           arch = next;
14609         }
14610       while (next != NULL);
14611       free (saved);
14612       break;
14613
14614     case OPTION_MTUNE:
14615       if (*arg == '.')
14616         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14617       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
14618         {
14619           if (cpu_arch[j].type != PROCESSOR_NONE
14620               && strcmp (arg, cpu_arch[j].name) == 0)
14621             {
14622               cpu_arch_tune_set = 1;
14623               cpu_arch_tune = cpu_arch [j].type;
14624               cpu_arch_tune_flags = cpu_arch[j].enable;
14625               break;
14626             }
14627         }
14628       if (j >= ARRAY_SIZE (cpu_arch))
14629         as_fatal (_("invalid -mtune= option: `%s'"), arg);
14630       break;
14631
14632     case OPTION_MMNEMONIC:
14633       if (strcasecmp (arg, "att") == 0)
14634         intel_mnemonic = 0;
14635       else if (strcasecmp (arg, "intel") == 0)
14636         intel_mnemonic = 1;
14637       else
14638         as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
14639       break;
14640
14641     case OPTION_MSYNTAX:
14642       if (strcasecmp (arg, "att") == 0)
14643         intel_syntax = 0;
14644       else if (strcasecmp (arg, "intel") == 0)
14645         intel_syntax = 1;
14646       else
14647         as_fatal (_("invalid -msyntax= option: `%s'"), arg);
14648       break;
14649
14650     case OPTION_MINDEX_REG:
14651       allow_index_reg = 1;
14652       break;
14653
14654     case OPTION_MNAKED_REG:
14655       allow_naked_reg = 1;
14656       break;
14657
14658     case OPTION_MSSE2AVX:
14659       sse2avx = 1;
14660       break;
14661
14662     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
14663       use_unaligned_vector_move = 1;
14664       break;
14665
14666     case OPTION_MSSE_CHECK:
14667       if (strcasecmp (arg, "error") == 0)
14668         sse_check = check_error;
14669       else if (strcasecmp (arg, "warning") == 0)
14670         sse_check = check_warning;
14671       else if (strcasecmp (arg, "none") == 0)
14672         sse_check = check_none;
14673       else
14674         as_fatal (_("invalid -msse-check= option: `%s'"), arg);
14675       break;
14676
14677     case OPTION_MOPERAND_CHECK:
14678       if (strcasecmp (arg, "error") == 0)
14679         operand_check = check_error;
14680       else if (strcasecmp (arg, "warning") == 0)
14681         operand_check = check_warning;
14682       else if (strcasecmp (arg, "none") == 0)
14683         operand_check = check_none;
14684       else
14685         as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
14686       break;
14687
14688     case OPTION_MAVXSCALAR:
14689       if (strcasecmp (arg, "128") == 0)
14690         avxscalar = vex128;
14691       else if (strcasecmp (arg, "256") == 0)
14692         avxscalar = vex256;
14693       else
14694         as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
14695       break;
14696
14697     case OPTION_MVEXWIG:
14698       if (strcmp (arg, "0") == 0)
14699         vexwig = vexw0;
14700       else if (strcmp (arg, "1") == 0)
14701         vexwig = vexw1;
14702       else
14703         as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
14704       break;
14705
14706     case OPTION_MADD_BND_PREFIX:
14707       add_bnd_prefix = 1;
14708       break;
14709
14710     case OPTION_MEVEXLIG:
14711       if (strcmp (arg, "128") == 0)
14712         evexlig = evexl128;
14713       else if (strcmp (arg, "256") == 0)
14714         evexlig = evexl256;
14715       else  if (strcmp (arg, "512") == 0)
14716         evexlig = evexl512;
14717       else
14718         as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
14719       break;
14720
14721     case OPTION_MEVEXRCIG:
14722       if (strcmp (arg, "rne") == 0)
14723         evexrcig = rne;
14724       else if (strcmp (arg, "rd") == 0)
14725         evexrcig = rd;
14726       else if (strcmp (arg, "ru") == 0)
14727         evexrcig = ru;
14728       else if (strcmp (arg, "rz") == 0)
14729         evexrcig = rz;
14730       else
14731         as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
14732       break;
14733
14734     case OPTION_MEVEXWIG:
14735       if (strcmp (arg, "0") == 0)
14736         evexwig = evexw0;
14737       else if (strcmp (arg, "1") == 0)
14738         evexwig = evexw1;
14739       else
14740         as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
14741       break;
14742
14743 # if defined (TE_PE) || defined (TE_PEP)
14744     case OPTION_MBIG_OBJ:
14745       use_big_obj = 1;
14746       break;
14747 #endif
14748
14749     case OPTION_MOMIT_LOCK_PREFIX:
14750       if (strcasecmp (arg, "yes") == 0)
14751         omit_lock_prefix = 1;
14752       else if (strcasecmp (arg, "no") == 0)
14753         omit_lock_prefix = 0;
14754       else
14755         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
14756       break;
14757
14758     case OPTION_MFENCE_AS_LOCK_ADD:
14759       if (strcasecmp (arg, "yes") == 0)
14760         avoid_fence = 1;
14761       else if (strcasecmp (arg, "no") == 0)
14762         avoid_fence = 0;
14763       else
14764         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
14765       break;
14766
14767     case OPTION_MLFENCE_AFTER_LOAD:
14768       if (strcasecmp (arg, "yes") == 0)
14769         lfence_after_load = 1;
14770       else if (strcasecmp (arg, "no") == 0)
14771         lfence_after_load = 0;
14772       else
14773         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
14774       break;
14775
14776     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
14777       if (strcasecmp (arg, "all") == 0)
14778         {
14779           lfence_before_indirect_branch = lfence_branch_all;
14780           if (lfence_before_ret == lfence_before_ret_none)
14781             lfence_before_ret = lfence_before_ret_shl;
14782         }
14783       else if (strcasecmp (arg, "memory") == 0)
14784         lfence_before_indirect_branch = lfence_branch_memory;
14785       else if (strcasecmp (arg, "register") == 0)
14786         lfence_before_indirect_branch = lfence_branch_register;
14787       else if (strcasecmp (arg, "none") == 0)
14788         lfence_before_indirect_branch = lfence_branch_none;
14789       else
14790         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
14791                   arg);
14792       break;
14793
14794     case OPTION_MLFENCE_BEFORE_RET:
14795       if (strcasecmp (arg, "or") == 0)
14796         lfence_before_ret = lfence_before_ret_or;
14797       else if (strcasecmp (arg, "not") == 0)
14798         lfence_before_ret = lfence_before_ret_not;
14799       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
14800         lfence_before_ret = lfence_before_ret_shl;
14801       else if (strcasecmp (arg, "none") == 0)
14802         lfence_before_ret = lfence_before_ret_none;
14803       else
14804         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
14805                   arg);
14806       break;
14807
14808     case OPTION_MRELAX_RELOCATIONS:
14809       if (strcasecmp (arg, "yes") == 0)
14810         generate_relax_relocations = 1;
14811       else if (strcasecmp (arg, "no") == 0)
14812         generate_relax_relocations = 0;
14813       else
14814         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
14815       break;
14816
14817     case OPTION_MALIGN_BRANCH_BOUNDARY:
14818       {
14819         char *end;
14820         long int align = strtoul (arg, &end, 0);
14821         if (*end == '\0')
14822           {
14823             if (align == 0)
14824               {
14825                 align_branch_power = 0;
14826                 break;
14827               }
14828             else if (align >= 16)
14829               {
14830                 int align_power;
14831                 for (align_power = 0;
14832                      (align & 1) == 0;
14833                      align >>= 1, align_power++)
14834                   continue;
14835                 /* Limit alignment power to 31.  */
14836                 if (align == 1 && align_power < 32)
14837                   {
14838                     align_branch_power = align_power;
14839                     break;
14840                   }
14841               }
14842           }
14843         as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
14844       }
14845       break;
14846
14847     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
14848       {
14849         char *end;
14850         int align = strtoul (arg, &end, 0);
14851         /* Some processors only support 5 prefixes.  */
14852         if (*end == '\0' && align >= 0 && align < 6)
14853           {
14854             align_branch_prefix_size = align;
14855             break;
14856           }
14857         as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
14858                   arg);
14859       }
14860       break;
14861
14862     case OPTION_MALIGN_BRANCH:
14863       align_branch = 0;
14864       saved = xstrdup (arg);
14865       type = saved;
14866       do
14867         {
14868           next = strchr (type, '+');
14869           if (next)
14870             *next++ = '\0';
14871           if (strcasecmp (type, "jcc") == 0)
14872             align_branch |= align_branch_jcc_bit;
14873           else if (strcasecmp (type, "fused") == 0)
14874             align_branch |= align_branch_fused_bit;
14875           else if (strcasecmp (type, "jmp") == 0)
14876             align_branch |= align_branch_jmp_bit;
14877           else if (strcasecmp (type, "call") == 0)
14878             align_branch |= align_branch_call_bit;
14879           else if (strcasecmp (type, "ret") == 0)
14880             align_branch |= align_branch_ret_bit;
14881           else if (strcasecmp (type, "indirect") == 0)
14882             align_branch |= align_branch_indirect_bit;
14883           else
14884             as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
14885           type = next;
14886         }
14887       while (next != NULL);
14888       free (saved);
14889       break;
14890
14891     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
14892       align_branch_power = 5;
14893       align_branch_prefix_size = 5;
14894       align_branch = (align_branch_jcc_bit
14895                       | align_branch_fused_bit
14896                       | align_branch_jmp_bit);
14897       break;
14898
14899     case OPTION_MAMD64:
14900       isa64 = amd64;
14901       break;
14902
14903     case OPTION_MINTEL64:
14904       isa64 = intel64;
14905       break;
14906
14907     case 'O':
14908       if (arg == NULL)
14909         {
14910           optimize = 1;
14911           /* Turn off -Os.  */
14912           optimize_for_space = 0;
14913         }
14914       else if (*arg == 's')
14915         {
14916           optimize_for_space = 1;
14917           /* Turn on all encoding optimizations.  */
14918           optimize = INT_MAX;
14919         }
14920       else
14921         {
14922           optimize = atoi (arg);
14923           /* Turn off -Os.  */
14924           optimize_for_space = 0;
14925         }
14926       break;
14927
14928     default:
14929       return 0;
14930     }
14931   return 1;
14932 }
14933
14934 #define MESSAGE_TEMPLATE \
14935 "                                                                                "
14936
14937 static char *
14938 output_message (FILE *stream, char *p, char *message, char *start,
14939                 int *left_p, const char *name, int len)
14940 {
14941   int size = sizeof (MESSAGE_TEMPLATE);
14942   int left = *left_p;
14943
14944   /* Reserve 2 spaces for ", " or ",\0" */
14945   left -= len + 2;
14946
14947   /* Check if there is any room.  */
14948   if (left >= 0)
14949     {
14950       if (p != start)
14951         {
14952           *p++ = ',';
14953           *p++ = ' ';
14954         }
14955       p = mempcpy (p, name, len);
14956     }
14957   else
14958     {
14959       /* Output the current message now and start a new one.  */
14960       *p++ = ',';
14961       *p = '\0';
14962       fprintf (stream, "%s\n", message);
14963       p = start;
14964       left = size - (start - message) - len - 2;
14965
14966       gas_assert (left >= 0);
14967
14968       p = mempcpy (p, name, len);
14969     }
14970
14971   *left_p = left;
14972   return p;
14973 }
14974
14975 static void
14976 show_arch (FILE *stream, int ext, int check)
14977 {
14978   static char message[] = MESSAGE_TEMPLATE;
14979   char *start = message + 27;
14980   char *p;
14981   int size = sizeof (MESSAGE_TEMPLATE);
14982   int left;
14983   const char *name;
14984   int len;
14985   unsigned int j;
14986
14987   p = start;
14988   left = size - (start - message);
14989
14990   if (!ext && check)
14991     {
14992       p = output_message (stream, p, message, start, &left,
14993                           STRING_COMMA_LEN ("default"));
14994       p = output_message (stream, p, message, start, &left,
14995                           STRING_COMMA_LEN ("push"));
14996       p = output_message (stream, p, message, start, &left,
14997                           STRING_COMMA_LEN ("pop"));
14998     }
14999
15000   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
15001     {
15002       /* Should it be skipped?  */
15003       if (cpu_arch [j].skip)
15004         continue;
15005
15006       name = cpu_arch [j].name;
15007       len = cpu_arch [j].len;
15008       if (cpu_arch[j].type == PROCESSOR_NONE)
15009         {
15010           /* It is an extension.  Skip if we aren't asked to show it.  */
15011           if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
15012             continue;
15013         }
15014       else if (ext)
15015         {
15016           /* It is an processor.  Skip if we show only extension.  */
15017           continue;
15018         }
15019       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
15020         {
15021           /* It is an impossible processor - skip.  */
15022           continue;
15023         }
15024
15025       p = output_message (stream, p, message, start, &left, name, len);
15026     }
15027
15028   /* Display disabled extensions.  */
15029   if (ext)
15030     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
15031       {
15032         char *str;
15033
15034         if (cpu_arch[j].type != PROCESSOR_NONE
15035             || !cpu_flags_all_zero (&cpu_arch[j].enable))
15036           continue;
15037         str = xasprintf ("no%s", cpu_arch[j].name);
15038         p = output_message (stream, p, message, start, &left, str,
15039                             strlen (str));
15040         free (str);
15041       }
15042
15043   *p = '\0';
15044   fprintf (stream, "%s\n", message);
15045 }
15046
15047 void
15048 md_show_usage (FILE *stream)
15049 {
15050 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15051   fprintf (stream, _("\
15052   -Qy, -Qn                ignored\n\
15053   -V                      print assembler version number\n\
15054   -k                      ignored\n"));
15055 #endif
15056   fprintf (stream, _("\
15057   -n                      do not optimize code alignment\n\
15058   -O{012s}                attempt some code optimizations\n\
15059   -q                      quieten some warnings\n"));
15060 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15061   fprintf (stream, _("\
15062   -s                      ignored\n"));
15063 #endif
15064 #ifdef BFD64
15065 # if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15066   fprintf (stream, _("\
15067   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
15068 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
15069   fprintf (stream, _("\
15070   --32/--64               generate 32bit/64bit object\n"));
15071 # endif
15072 #endif
15073 #ifdef SVR4_COMMENT_CHARS
15074   fprintf (stream, _("\
15075   --divide                do not treat `/' as a comment character\n"));
15076 #else
15077   fprintf (stream, _("\
15078   --divide                ignored\n"));
15079 #endif
15080   fprintf (stream, _("\
15081   -march=CPU[,+EXTENSION...]\n\
15082                           generate code for CPU and EXTENSION, CPU is one of:\n"));
15083   show_arch (stream, 0, 1);
15084   fprintf (stream, _("\
15085                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
15086   show_arch (stream, 1, 0);
15087   fprintf (stream, _("\
15088   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
15089   show_arch (stream, 0, 0);
15090   fprintf (stream, _("\
15091   -msse2avx               encode SSE instructions with VEX prefix\n"));
15092   fprintf (stream, _("\
15093   -muse-unaligned-vector-move\n\
15094                           encode aligned vector move as unaligned vector move\n"));
15095   fprintf (stream, _("\
15096   -msse-check=[none|error|warning] (default: warning)\n\
15097                           check SSE instructions\n"));
15098   fprintf (stream, _("\
15099   -moperand-check=[none|error|warning] (default: warning)\n\
15100                           check operand combinations for validity\n"));
15101   fprintf (stream, _("\
15102   -mavxscalar=[128|256] (default: 128)\n\
15103                           encode scalar AVX instructions with specific vector\n\
15104                            length\n"));
15105   fprintf (stream, _("\
15106   -mvexwig=[0|1] (default: 0)\n\
15107                           encode VEX instructions with specific VEX.W value\n\
15108                            for VEX.W bit ignored instructions\n"));
15109   fprintf (stream, _("\
15110   -mevexlig=[128|256|512] (default: 128)\n\
15111                           encode scalar EVEX instructions with specific vector\n\
15112                            length\n"));
15113   fprintf (stream, _("\
15114   -mevexwig=[0|1] (default: 0)\n\
15115                           encode EVEX instructions with specific EVEX.W value\n\
15116                            for EVEX.W bit ignored instructions\n"));
15117   fprintf (stream, _("\
15118   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
15119                           encode EVEX instructions with specific EVEX.RC value\n\
15120                            for SAE-only ignored instructions\n"));
15121   fprintf (stream, _("\
15122   -mmnemonic=[att|intel] "));
15123   if (SYSV386_COMPAT)
15124     fprintf (stream, _("(default: att)\n"));
15125   else
15126     fprintf (stream, _("(default: intel)\n"));
15127   fprintf (stream, _("\
15128                           use AT&T/Intel mnemonic\n"));
15129   fprintf (stream, _("\
15130   -msyntax=[att|intel] (default: att)\n\
15131                           use AT&T/Intel syntax\n"));
15132   fprintf (stream, _("\
15133   -mindex-reg             support pseudo index registers\n"));
15134   fprintf (stream, _("\
15135   -mnaked-reg             don't require `%%' prefix for registers\n"));
15136   fprintf (stream, _("\
15137   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
15138 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15139   fprintf (stream, _("\
15140   -mshared                disable branch optimization for shared code\n"));
15141   fprintf (stream, _("\
15142   -mx86-used-note=[no|yes] "));
15143   if (DEFAULT_X86_USED_NOTE)
15144     fprintf (stream, _("(default: yes)\n"));
15145   else
15146     fprintf (stream, _("(default: no)\n"));
15147   fprintf (stream, _("\
15148                           generate x86 used ISA and feature properties\n"));
15149 #endif
15150 #if defined (TE_PE) || defined (TE_PEP)
15151   fprintf (stream, _("\
15152   -mbig-obj               generate big object files\n"));
15153 #endif
15154   fprintf (stream, _("\
15155   -momit-lock-prefix=[no|yes] (default: no)\n\
15156                           strip all lock prefixes\n"));
15157   fprintf (stream, _("\
15158   -mfence-as-lock-add=[no|yes] (default: no)\n\
15159                           encode lfence, mfence and sfence as\n\
15160                            lock addl $0x0, (%%{re}sp)\n"));
15161   fprintf (stream, _("\
15162   -mrelax-relocations=[no|yes] "));
15163   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
15164     fprintf (stream, _("(default: yes)\n"));
15165   else
15166     fprintf (stream, _("(default: no)\n"));
15167   fprintf (stream, _("\
15168                           generate relax relocations\n"));
15169   fprintf (stream, _("\
15170   -malign-branch-boundary=NUM (default: 0)\n\
15171                           align branches within NUM byte boundary\n"));
15172   fprintf (stream, _("\
15173   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
15174                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
15175                            indirect\n\
15176                           specify types of branches to align\n"));
15177   fprintf (stream, _("\
15178   -malign-branch-prefix-size=NUM (default: 5)\n\
15179                           align branches with NUM prefixes per instruction\n"));
15180   fprintf (stream, _("\
15181   -mbranches-within-32B-boundaries\n\
15182                           align branches within 32 byte boundary\n"));
15183   fprintf (stream, _("\
15184   -mlfence-after-load=[no|yes] (default: no)\n\
15185                           generate lfence after load\n"));
15186   fprintf (stream, _("\
15187   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
15188                           generate lfence before indirect near branch\n"));
15189   fprintf (stream, _("\
15190   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
15191                           generate lfence before ret\n"));
15192   fprintf (stream, _("\
15193   -mamd64                 accept only AMD64 ISA [default]\n"));
15194   fprintf (stream, _("\
15195   -mintel64               accept only Intel64 ISA\n"));
15196 }
15197
15198 #if ((defined (OBJ_MAYBE_COFF) && defined (OBJ_MAYBE_AOUT)) \
15199      || defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) \
15200      || defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O))
15201
15202 /* Pick the target format to use.  */
15203
15204 const char *
15205 i386_target_format (void)
15206 {
15207   if (startswith (default_arch, "x86_64"))
15208     {
15209       update_code_flag (CODE_64BIT, 1);
15210       if (default_arch[6] == '\0')
15211         x86_elf_abi = X86_64_ABI;
15212       else
15213         x86_elf_abi = X86_64_X32_ABI;
15214     }
15215   else if (!strcmp (default_arch, "i386"))
15216     update_code_flag (CODE_32BIT, 1);
15217   else if (!strcmp (default_arch, "iamcu"))
15218     {
15219       update_code_flag (CODE_32BIT, 1);
15220       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
15221         {
15222           static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
15223           cpu_arch_name = "iamcu";
15224           free (cpu_sub_arch_name);
15225           cpu_sub_arch_name = NULL;
15226           cpu_arch_flags = iamcu_flags;
15227           cpu_arch_isa = PROCESSOR_IAMCU;
15228           cpu_arch_isa_flags = iamcu_flags;
15229           if (!cpu_arch_tune_set)
15230             {
15231               cpu_arch_tune = cpu_arch_isa;
15232               cpu_arch_tune_flags = cpu_arch_isa_flags;
15233             }
15234         }
15235       else if (cpu_arch_isa != PROCESSOR_IAMCU)
15236         as_fatal (_("Intel MCU doesn't support `%s' architecture"),
15237                   cpu_arch_name);
15238     }
15239   else
15240     as_fatal (_("unknown architecture"));
15241
15242   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
15243     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
15244   if (cpu_flags_all_zero (&cpu_arch_tune_flags))
15245     cpu_arch_tune_flags = cpu_arch[flag_code == CODE_64BIT].enable;
15246
15247   switch (OUTPUT_FLAVOR)
15248     {
15249 #if defined (OBJ_MAYBE_AOUT) || defined (OBJ_AOUT)
15250     case bfd_target_aout_flavour:
15251       return AOUT_TARGET_FORMAT;
15252 #endif
15253 #if defined (OBJ_MAYBE_COFF) || defined (OBJ_COFF)
15254 # if defined (TE_PE) || defined (TE_PEP)
15255     case bfd_target_coff_flavour:
15256       if (flag_code == CODE_64BIT)
15257         {
15258           object_64bit = 1;
15259           return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
15260         }
15261       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
15262 # elif defined (TE_GO32)
15263     case bfd_target_coff_flavour:
15264       return "coff-go32";
15265 # else
15266     case bfd_target_coff_flavour:
15267       return "coff-i386";
15268 # endif
15269 #endif
15270 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
15271     case bfd_target_elf_flavour:
15272       {
15273         const char *format;
15274
15275         switch (x86_elf_abi)
15276           {
15277           default:
15278             format = ELF_TARGET_FORMAT;
15279 #ifndef TE_SOLARIS
15280             tls_get_addr = "___tls_get_addr";
15281 #endif
15282             break;
15283           case X86_64_ABI:
15284             use_rela_relocations = 1;
15285             object_64bit = 1;
15286 #ifndef TE_SOLARIS
15287             tls_get_addr = "__tls_get_addr";
15288 #endif
15289             format = ELF_TARGET_FORMAT64;
15290             break;
15291           case X86_64_X32_ABI:
15292             use_rela_relocations = 1;
15293             object_64bit = 1;
15294 #ifndef TE_SOLARIS
15295             tls_get_addr = "__tls_get_addr";
15296 #endif
15297             disallow_64bit_reloc = 1;
15298             format = ELF_TARGET_FORMAT32;
15299             break;
15300           }
15301         if (cpu_arch_isa == PROCESSOR_IAMCU)
15302           {
15303             if (x86_elf_abi != I386_ABI)
15304               as_fatal (_("Intel MCU is 32bit only"));
15305             return ELF_TARGET_IAMCU_FORMAT;
15306           }
15307         else
15308           return format;
15309       }
15310 #endif
15311 #if defined (OBJ_MACH_O)
15312     case bfd_target_mach_o_flavour:
15313       if (flag_code == CODE_64BIT)
15314         {
15315           use_rela_relocations = 1;
15316           object_64bit = 1;
15317           return "mach-o-x86-64";
15318         }
15319       else
15320         return "mach-o-i386";
15321 #endif
15322     default:
15323       abort ();
15324       return NULL;
15325     }
15326 }
15327
15328 #endif /* OBJ_MAYBE_ more than one  */
15329 \f
15330 symbolS *
15331 md_undefined_symbol (char *name)
15332 {
15333   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
15334       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
15335       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
15336       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
15337     {
15338       if (!GOT_symbol)
15339         {
15340           if (symbol_find (name))
15341             as_bad (_("GOT already in symbol table"));
15342           GOT_symbol = symbol_new (name, undefined_section,
15343                                    &zero_address_frag, 0);
15344         };
15345       return GOT_symbol;
15346     }
15347   return 0;
15348 }
15349
15350 /* Round up a section size to the appropriate boundary.  */
15351
15352 valueT
15353 md_section_align (segT segment ATTRIBUTE_UNUSED, valueT size)
15354 {
15355 #if (defined (OBJ_AOUT) || defined (OBJ_MAYBE_AOUT))
15356   if (OUTPUT_FLAVOR == bfd_target_aout_flavour)
15357     {
15358       /* For a.out, force the section size to be aligned.  If we don't do
15359          this, BFD will align it for us, but it will not write out the
15360          final bytes of the section.  This may be a bug in BFD, but it is
15361          easier to fix it here since that is how the other a.out targets
15362          work.  */
15363       int align;
15364
15365       align = bfd_section_alignment (segment);
15366       size = ((size + (1 << align) - 1) & (-((valueT) 1 << align)));
15367     }
15368 #endif
15369
15370   return size;
15371 }
15372
15373 /* On the i386, PC-relative offsets are relative to the start of the
15374    next instruction.  That is, the address of the offset, plus its
15375    size, since the offset is always the last part of the insn.  */
15376
15377 long
15378 md_pcrel_from (fixS *fixP)
15379 {
15380   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
15381 }
15382
15383 #ifndef I386COFF
15384
15385 static void
15386 s_bss (int ignore ATTRIBUTE_UNUSED)
15387 {
15388   int temp;
15389
15390 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15391   if (IS_ELF)
15392     obj_elf_section_change_hook ();
15393 #endif
15394   temp = get_absolute_expression ();
15395   subseg_set (bss_section, (subsegT) temp);
15396   demand_empty_rest_of_line ();
15397 }
15398
15399 #endif
15400
15401 /* Remember constant directive.  */
15402
15403 void
15404 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
15405 {
15406   if (last_insn.kind != last_insn_directive
15407       && (bfd_section_flags (now_seg) & SEC_CODE))
15408     {
15409       last_insn.seg = now_seg;
15410       last_insn.kind = last_insn_directive;
15411       last_insn.name = "constant directive";
15412       last_insn.file = as_where (&last_insn.line);
15413       if (lfence_before_ret != lfence_before_ret_none)
15414         {
15415           if (lfence_before_indirect_branch != lfence_branch_none)
15416             as_warn (_("constant directive skips -mlfence-before-ret "
15417                        "and -mlfence-before-indirect-branch"));
15418           else
15419             as_warn (_("constant directive skips -mlfence-before-ret"));
15420         }
15421       else if (lfence_before_indirect_branch != lfence_branch_none)
15422         as_warn (_("constant directive skips -mlfence-before-indirect-branch"));
15423     }
15424 }
15425
15426 int
15427 i386_validate_fix (fixS *fixp)
15428 {
15429   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
15430     {
15431       reloc_howto_type *howto;
15432
15433       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
15434       as_bad_where (fixp->fx_file, fixp->fx_line,
15435                     _("invalid %s relocation against register"),
15436                     howto ? howto->name : "<unknown>");
15437       return 0;
15438     }
15439
15440 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15441   if (fixp->fx_r_type == BFD_RELOC_SIZE32
15442       || fixp->fx_r_type == BFD_RELOC_SIZE64)
15443     return IS_ELF && fixp->fx_addsy
15444            && (!S_IS_DEFINED (fixp->fx_addsy)
15445                || S_IS_EXTERNAL (fixp->fx_addsy));
15446 #endif
15447
15448   if (fixp->fx_subsy)
15449     {
15450       if (fixp->fx_subsy == GOT_symbol)
15451         {
15452           if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
15453             {
15454               if (!object_64bit)
15455                 abort ();
15456 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15457               if (fixp->fx_tcbit2)
15458                 fixp->fx_r_type = (fixp->fx_tcbit
15459                                    ? BFD_RELOC_X86_64_REX_GOTPCRELX
15460                                    : BFD_RELOC_X86_64_GOTPCRELX);
15461               else
15462 #endif
15463                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
15464             }
15465           else
15466             {
15467               if (!object_64bit)
15468                 fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
15469               else
15470                 fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
15471             }
15472           fixp->fx_subsy = 0;
15473         }
15474     }
15475 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15476   else
15477     {
15478       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
15479          to section.  Since PLT32 relocation must be against symbols,
15480          turn such PLT32 relocation into PC32 relocation.  */
15481       if (fixp->fx_addsy
15482           && (fixp->fx_r_type == BFD_RELOC_386_PLT32
15483               || fixp->fx_r_type == BFD_RELOC_X86_64_PLT32)
15484           && symbol_section_p (fixp->fx_addsy))
15485         fixp->fx_r_type = BFD_RELOC_32_PCREL;
15486       if (!object_64bit)
15487         {
15488           if (fixp->fx_r_type == BFD_RELOC_386_GOT32
15489               && fixp->fx_tcbit2)
15490             fixp->fx_r_type = BFD_RELOC_386_GOT32X;
15491         }
15492     }
15493 #endif
15494
15495   return 1;
15496 }
15497
15498 arelent *
15499 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
15500 {
15501   arelent *rel;
15502   bfd_reloc_code_real_type code;
15503
15504   switch (fixp->fx_r_type)
15505     {
15506 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15507       symbolS *sym;
15508
15509     case BFD_RELOC_SIZE32:
15510     case BFD_RELOC_SIZE64:
15511       if (fixp->fx_addsy
15512           && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
15513           && (!fixp->fx_subsy
15514               || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
15515         sym = fixp->fx_addsy;
15516       else if (fixp->fx_subsy
15517                && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
15518                && (!fixp->fx_addsy
15519                    || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
15520         sym = fixp->fx_subsy;
15521       else
15522         sym = NULL;
15523       if (IS_ELF && sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
15524         {
15525           /* Resolve size relocation against local symbol to size of
15526              the symbol plus addend.  */
15527           valueT value = S_GET_SIZE (sym);
15528
15529           if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
15530             value = bfd_section_size (S_GET_SEGMENT (sym));
15531           if (sym == fixp->fx_subsy)
15532             {
15533               value = -value;
15534               if (fixp->fx_addsy)
15535                 value += S_GET_VALUE (fixp->fx_addsy);
15536             }
15537           else if (fixp->fx_subsy)
15538             value -= S_GET_VALUE (fixp->fx_subsy);
15539           value += fixp->fx_offset;
15540           if (fixp->fx_r_type == BFD_RELOC_SIZE32
15541               && object_64bit
15542               && !fits_in_unsigned_long (value))
15543             as_bad_where (fixp->fx_file, fixp->fx_line,
15544                           _("symbol size computation overflow"));
15545           fixp->fx_addsy = NULL;
15546           fixp->fx_subsy = NULL;
15547           md_apply_fix (fixp, (valueT *) &value, NULL);
15548           return NULL;
15549         }
15550       if (!fixp->fx_addsy || fixp->fx_subsy)
15551         {
15552           as_bad_where (fixp->fx_file, fixp->fx_line,
15553                         "unsupported expression involving @size");
15554           return NULL;
15555         }
15556 #endif
15557       /* Fall through.  */
15558
15559     case BFD_RELOC_X86_64_PLT32:
15560     case BFD_RELOC_X86_64_GOT32:
15561     case BFD_RELOC_X86_64_GOTPCREL:
15562     case BFD_RELOC_X86_64_GOTPCRELX:
15563     case BFD_RELOC_X86_64_REX_GOTPCRELX:
15564     case BFD_RELOC_386_PLT32:
15565     case BFD_RELOC_386_GOT32:
15566     case BFD_RELOC_386_GOT32X:
15567     case BFD_RELOC_386_GOTOFF:
15568     case BFD_RELOC_386_GOTPC:
15569     case BFD_RELOC_386_TLS_GD:
15570     case BFD_RELOC_386_TLS_LDM:
15571     case BFD_RELOC_386_TLS_LDO_32:
15572     case BFD_RELOC_386_TLS_IE_32:
15573     case BFD_RELOC_386_TLS_IE:
15574     case BFD_RELOC_386_TLS_GOTIE:
15575     case BFD_RELOC_386_TLS_LE_32:
15576     case BFD_RELOC_386_TLS_LE:
15577     case BFD_RELOC_386_TLS_GOTDESC:
15578     case BFD_RELOC_386_TLS_DESC_CALL:
15579     case BFD_RELOC_X86_64_TLSGD:
15580     case BFD_RELOC_X86_64_TLSLD:
15581     case BFD_RELOC_X86_64_DTPOFF32:
15582     case BFD_RELOC_X86_64_DTPOFF64:
15583     case BFD_RELOC_X86_64_GOTTPOFF:
15584     case BFD_RELOC_X86_64_TPOFF32:
15585     case BFD_RELOC_X86_64_TPOFF64:
15586     case BFD_RELOC_X86_64_GOTOFF64:
15587     case BFD_RELOC_X86_64_GOTPC32:
15588     case BFD_RELOC_X86_64_GOT64:
15589     case BFD_RELOC_X86_64_GOTPCREL64:
15590     case BFD_RELOC_X86_64_GOTPC64:
15591     case BFD_RELOC_X86_64_GOTPLT64:
15592     case BFD_RELOC_X86_64_PLTOFF64:
15593     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15594     case BFD_RELOC_X86_64_TLSDESC_CALL:
15595     case BFD_RELOC_RVA:
15596     case BFD_RELOC_VTABLE_ENTRY:
15597     case BFD_RELOC_VTABLE_INHERIT:
15598 #ifdef TE_PE
15599     case BFD_RELOC_32_SECREL:
15600     case BFD_RELOC_16_SECIDX:
15601 #endif
15602       code = fixp->fx_r_type;
15603       break;
15604     case BFD_RELOC_X86_64_32S:
15605       if (!fixp->fx_pcrel)
15606         {
15607           /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
15608           code = fixp->fx_r_type;
15609           break;
15610         }
15611       /* Fall through.  */
15612     default:
15613       if (fixp->fx_pcrel)
15614         {
15615           switch (fixp->fx_size)
15616             {
15617             default:
15618               as_bad_where (fixp->fx_file, fixp->fx_line,
15619                             _("can not do %d byte pc-relative relocation"),
15620                             fixp->fx_size);
15621               code = BFD_RELOC_32_PCREL;
15622               break;
15623             case 1: code = BFD_RELOC_8_PCREL;  break;
15624             case 2: code = BFD_RELOC_16_PCREL; break;
15625             case 4: code = BFD_RELOC_32_PCREL; break;
15626 #ifdef BFD64
15627             case 8: code = BFD_RELOC_64_PCREL; break;
15628 #endif
15629             }
15630         }
15631       else
15632         {
15633           switch (fixp->fx_size)
15634             {
15635             default:
15636               as_bad_where (fixp->fx_file, fixp->fx_line,
15637                             _("can not do %d byte relocation"),
15638                             fixp->fx_size);
15639               code = BFD_RELOC_32;
15640               break;
15641             case 1: code = BFD_RELOC_8;  break;
15642             case 2: code = BFD_RELOC_16; break;
15643             case 4: code = BFD_RELOC_32; break;
15644 #ifdef BFD64
15645             case 8: code = BFD_RELOC_64; break;
15646 #endif
15647             }
15648         }
15649       break;
15650     }
15651
15652   if ((code == BFD_RELOC_32
15653        || code == BFD_RELOC_32_PCREL
15654        || code == BFD_RELOC_X86_64_32S)
15655       && GOT_symbol
15656       && fixp->fx_addsy == GOT_symbol)
15657     {
15658       if (!object_64bit)
15659         code = BFD_RELOC_386_GOTPC;
15660       else
15661         code = BFD_RELOC_X86_64_GOTPC32;
15662     }
15663   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
15664       && GOT_symbol
15665       && fixp->fx_addsy == GOT_symbol)
15666     {
15667       code = BFD_RELOC_X86_64_GOTPC64;
15668     }
15669
15670   rel = XNEW (arelent);
15671   rel->sym_ptr_ptr = XNEW (asymbol *);
15672   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
15673
15674   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
15675
15676   if (!use_rela_relocations)
15677     {
15678       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
15679          vtable entry to be used in the relocation's section offset.  */
15680       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
15681         rel->address = fixp->fx_offset;
15682 #if defined (OBJ_COFF) && defined (TE_PE)
15683       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
15684         rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
15685       else
15686 #endif
15687       rel->addend = 0;
15688     }
15689   /* Use the rela in 64bit mode.  */
15690   else
15691     {
15692       if (disallow_64bit_reloc)
15693         switch (code)
15694           {
15695           case BFD_RELOC_X86_64_DTPOFF64:
15696           case BFD_RELOC_X86_64_TPOFF64:
15697           case BFD_RELOC_64_PCREL:
15698           case BFD_RELOC_X86_64_GOTOFF64:
15699           case BFD_RELOC_X86_64_GOT64:
15700           case BFD_RELOC_X86_64_GOTPCREL64:
15701           case BFD_RELOC_X86_64_GOTPC64:
15702           case BFD_RELOC_X86_64_GOTPLT64:
15703           case BFD_RELOC_X86_64_PLTOFF64:
15704             as_bad_where (fixp->fx_file, fixp->fx_line,
15705                           _("cannot represent relocation type %s in x32 mode"),
15706                           bfd_get_reloc_code_name (code));
15707             break;
15708           default:
15709             break;
15710           }
15711
15712       if (!fixp->fx_pcrel)
15713         rel->addend = fixp->fx_offset;
15714       else
15715         switch (code)
15716           {
15717           case BFD_RELOC_X86_64_PLT32:
15718           case BFD_RELOC_X86_64_GOT32:
15719           case BFD_RELOC_X86_64_GOTPCREL:
15720           case BFD_RELOC_X86_64_GOTPCRELX:
15721           case BFD_RELOC_X86_64_REX_GOTPCRELX:
15722           case BFD_RELOC_X86_64_TLSGD:
15723           case BFD_RELOC_X86_64_TLSLD:
15724           case BFD_RELOC_X86_64_GOTTPOFF:
15725           case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
15726           case BFD_RELOC_X86_64_TLSDESC_CALL:
15727             rel->addend = fixp->fx_offset - fixp->fx_size;
15728             break;
15729           default:
15730             rel->addend = (section->vma
15731                            - fixp->fx_size
15732                            + fixp->fx_addnumber
15733                            + md_pcrel_from (fixp));
15734             break;
15735           }
15736     }
15737
15738   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
15739   if (rel->howto == NULL)
15740     {
15741       as_bad_where (fixp->fx_file, fixp->fx_line,
15742                     _("cannot represent relocation type %s"),
15743                     bfd_get_reloc_code_name (code));
15744       /* Set howto to a garbage value so that we can keep going.  */
15745       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
15746       gas_assert (rel->howto != NULL);
15747     }
15748
15749   return rel;
15750 }
15751
15752 #include "tc-i386-intel.c"
15753
15754 void
15755 tc_x86_parse_to_dw2regnum (expressionS *exp)
15756 {
15757   int saved_naked_reg;
15758   char saved_register_dot;
15759
15760   saved_naked_reg = allow_naked_reg;
15761   allow_naked_reg = 1;
15762   saved_register_dot = register_chars['.'];
15763   register_chars['.'] = '.';
15764   allow_pseudo_reg = 1;
15765   expression_and_evaluate (exp);
15766   allow_pseudo_reg = 0;
15767   register_chars['.'] = saved_register_dot;
15768   allow_naked_reg = saved_naked_reg;
15769
15770   if (exp->X_op == O_register && exp->X_add_number >= 0)
15771     {
15772       if ((addressT) exp->X_add_number < i386_regtab_size)
15773         {
15774           exp->X_op = O_constant;
15775           exp->X_add_number = i386_regtab[exp->X_add_number]
15776                               .dw2_regnum[flag_code >> 1];
15777         }
15778       else
15779         exp->X_op = O_illegal;
15780     }
15781 }
15782
15783 void
15784 tc_x86_frame_initial_instructions (void)
15785 {
15786   static unsigned int sp_regno[2];
15787
15788   if (!sp_regno[flag_code >> 1])
15789     {
15790       char *saved_input = input_line_pointer;
15791       char sp[][4] = {"esp", "rsp"};
15792       expressionS exp;
15793
15794       input_line_pointer = sp[flag_code >> 1];
15795       tc_x86_parse_to_dw2regnum (&exp);
15796       gas_assert (exp.X_op == O_constant);
15797       sp_regno[flag_code >> 1] = exp.X_add_number;
15798       input_line_pointer = saved_input;
15799     }
15800
15801   cfi_add_CFA_def_cfa (sp_regno[flag_code >> 1], -x86_cie_data_alignment);
15802   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
15803 }
15804
15805 int
15806 x86_dwarf2_addr_size (void)
15807 {
15808 #if defined (OBJ_MAYBE_ELF) || defined (OBJ_ELF)
15809   if (x86_elf_abi == X86_64_X32_ABI)
15810     return 4;
15811 #endif
15812   return bfd_arch_bits_per_address (stdoutput) / 8;
15813 }
15814
15815 int
15816 i386_elf_section_type (const char *str, size_t len)
15817 {
15818   if (flag_code == CODE_64BIT
15819       && len == sizeof ("unwind") - 1
15820       && startswith (str, "unwind"))
15821     return SHT_X86_64_UNWIND;
15822
15823   return -1;
15824 }
15825
15826 #ifdef TE_SOLARIS
15827 void
15828 i386_solaris_fix_up_eh_frame (segT sec)
15829 {
15830   if (flag_code == CODE_64BIT)
15831     elf_section_type (sec) = SHT_X86_64_UNWIND;
15832 }
15833 #endif
15834
15835 #ifdef TE_PE
15836 void
15837 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
15838 {
15839   expressionS exp;
15840
15841   exp.X_op = O_secrel;
15842   exp.X_add_symbol = symbol;
15843   exp.X_add_number = 0;
15844   emit_expr (&exp, size);
15845 }
15846 #endif
15847
15848 #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF)
15849 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
15850
15851 bfd_vma
15852 x86_64_section_letter (int letter, const char **ptr_msg)
15853 {
15854   if (flag_code == CODE_64BIT)
15855     {
15856       if (letter == 'l')
15857         return SHF_X86_64_LARGE;
15858
15859       *ptr_msg = _("bad .section directive: want a,l,w,x,M,S,G,T in string");
15860     }
15861   else
15862     *ptr_msg = _("bad .section directive: want a,w,x,M,S,G,T in string");
15863   return -1;
15864 }
15865
15866 static void
15867 handle_large_common (int small ATTRIBUTE_UNUSED)
15868 {
15869   if (flag_code != CODE_64BIT)
15870     {
15871       s_comm_internal (0, elf_common_parse);
15872       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
15873     }
15874   else
15875     {
15876       static segT lbss_section;
15877       asection *saved_com_section_ptr = elf_com_section_ptr;
15878       asection *saved_bss_section = bss_section;
15879
15880       if (lbss_section == NULL)
15881         {
15882           flagword applicable;
15883           segT seg = now_seg;
15884           subsegT subseg = now_subseg;
15885
15886           /* The .lbss section is for local .largecomm symbols.  */
15887           lbss_section = subseg_new (".lbss", 0);
15888           applicable = bfd_applicable_section_flags (stdoutput);
15889           bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
15890           seg_info (lbss_section)->bss = 1;
15891
15892           subseg_set (seg, subseg);
15893         }
15894
15895       elf_com_section_ptr = &_bfd_elf_large_com_section;
15896       bss_section = lbss_section;
15897
15898       s_comm_internal (0, elf_common_parse);
15899
15900       elf_com_section_ptr = saved_com_section_ptr;
15901       bss_section = saved_bss_section;
15902     }
15903 }
15904 #endif /* OBJ_ELF || OBJ_MAYBE_ELF */