From: Hongtao Liu Date: Tue, 3 Mar 2020 14:21:37 +0000 (-0800) Subject: x86: Improve -malign-branch X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=79d72f45a7c9b8b92c7d0452caf968b6709a0d44;p=binutils-gdb.git x86: Improve -malign-branch According to intel SDM manual, not all compare flag-modifying instructions are marcro-fusible with subsequent jcc instructions. For those non-fusible instructions, -malign-branch doesn't need to align them, only jcc itself needs to be aligned. Here are 2 restrictions which separate macro-fusible instruction from not Restriction 1: If TEST/AND/CMP/ADD/SUB/INC/DEC is one of the following format: cmp m, imm add m, imm sub m, imm test m, imm and m, imm inc m dec m it is unfusible with any jcc instruction. Restriction 2: /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture Note it also works for Skylake and Cascadelake. --------------------------------------------------------------------- | JCC | ADD/SUB/CMP | INC/DEC | TEST/AND | | ------ | ----------- | ------- | -------- | | Jo | N | N | Y | | Jno | N | N | Y | | Jc/Jb | Y | N | Y | | Jae/Jnb | Y | N | Y | | Je/Jz | Y | Y | Y | | Jne/Jnz | Y | Y | Y | | Jna/Jbe | Y | N | Y | | Ja/Jnbe | Y | N | Y | | Js | N | N | Y | | Jns | N | N | Y | | Jp/Jpe | N | N | Y | | Jnp/Jpo | N | N | Y | | Jl/Jnge | Y | Y | Y | | Jge/Jnl | Y | Y | Y | | Jle/Jng | Y | Y | Y | | Jg/Jnle | Y | Y | Y | Update maybe_fused_with_jcc_p to check if operands of CMP like instructions can be fused with condition jump. * gas/config/tc-i386.h (i386_tc_frag_data): Add member mf_type. (TC_FRAG_INIT): Init mf_type. * gas/config/tc-i386.c (enum mf_jcc_kind): New enum. (enum mf_cmp_kind): Ditto. (maybe_fused_with_jcc_p): Add argument mf_cmp_p to get mf_type of corresponding instructons, exclude unfusible instructions. (add_fused_jcc_padding_frag_p): Likewise. (add_branch_padding_frag_p): Likewise. (output_insn): Record mf_type for corresponding instructions. (i386_macro_fusible_p): New function. (i386_next_fusible_jcc_frag): Rename from i386_next_jcc_frag, add argument cmp_fragP to return next fusible jcc frag only. (i386_classify_machine_dependant_frag): Seperate macro-fusible instructions from condition jump. * gas/testsuite/gas/i386/align-branch-9.s: New file. * gas/testsuite/gas/i386/align-branch-9.d: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.s: Ditto. * gas/testsuite/gas/i386/x86-64-align-branch-9.d: Ditto. * gas/testsuite/gas/i386/i386.exp: Run new tests. --- diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index 62b7cfbe6c7..dc745aa7d21 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -687,6 +687,27 @@ static unsigned int align_branch = (align_branch_jcc_bit | align_branch_fused_bit | align_branch_jmp_bit); +/* Types of condition jump used by macro-fusion. */ +enum mf_jcc_kind + { + mf_jcc_jo = 0, /* base opcode 0x70 */ + mf_jcc_jc, /* base opcode 0x72 */ + mf_jcc_je, /* base opcode 0x74 */ + mf_jcc_jna, /* base opcode 0x76 */ + mf_jcc_js, /* base opcode 0x78 */ + mf_jcc_jp, /* base opcode 0x7a */ + mf_jcc_jl, /* base opcode 0x7c */ + mf_jcc_jle, /* base opcode 0x7e */ + }; + +/* Types of compare flag-modifying insntructions used by macro-fusion. */ +enum mf_cmp_kind + { + mf_cmp_test_and, /* test/cmp */ + mf_cmp_alu_cmp, /* add/sub/cmp */ + mf_cmp_incdec /* inc/dec */ + }; + /* The maximum padding size for fused jcc. CMP like instruction can be 9 bytes and jcc can be 6 bytes. Leave room just in case for prefixes. */ @@ -8374,10 +8395,22 @@ encoding_length (const fragS *start_frag, offsetT start_off, } /* Return 1 for test, and, cmp, add, sub, inc and dec which may - be macro-fused with conditional jumps. */ + be macro-fused with conditional jumps. + NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address, + or is one of the following format: + + cmp m, imm + add m, imm + sub m, imm + test m, imm + and m, imm + inc m + dec m + + it is unfusible. */ static int -maybe_fused_with_jcc_p (void) +maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p) { /* No RIP address. */ if (i.base_reg && i.base_reg->reg_num == RegIP) @@ -8387,36 +8420,54 @@ maybe_fused_with_jcc_p (void) if (is_any_vex_encoding (&i.tm)) return 0; - /* and, add, sub with destination register. */ - if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25) - || i.tm.base_opcode <= 5 + /* add, sub without add/sub m, imm. */ + if (i.tm.base_opcode <= 5 || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d) || ((i.tm.base_opcode | 3) == 0x83 - && ((i.tm.extension_opcode | 1) == 0x5 + && (i.tm.extension_opcode == 0x5 || i.tm.extension_opcode == 0x0))) - return (i.types[1].bitfield.class == Reg - || i.types[1].bitfield.instance == Accum); + { + *mf_cmp_p = mf_cmp_alu_cmp; + return !(i.mem_operands && i.imm_operands); + } - /* test, cmp with any register. */ + /* and without and m, imm. */ + if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25) + || ((i.tm.base_opcode | 3) == 0x83 + && i.tm.extension_opcode == 0x4)) + { + *mf_cmp_p = mf_cmp_test_and; + return !(i.mem_operands && i.imm_operands); + } + + /* test without test m imm. */ if ((i.tm.base_opcode | 1) == 0x85 || (i.tm.base_opcode | 1) == 0xa9 || ((i.tm.base_opcode | 1) == 0xf7 - && i.tm.extension_opcode == 0) - || (i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d) + && i.tm.extension_opcode == 0)) + { + *mf_cmp_p = mf_cmp_test_and; + return !(i.mem_operands && i.imm_operands); + } + + /* cmp without cmp m, imm. */ + if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d) || ((i.tm.base_opcode | 3) == 0x83 && (i.tm.extension_opcode == 0x7))) - return (i.types[0].bitfield.class == Reg - || i.types[0].bitfield.instance == Accum - || i.types[1].bitfield.class == Reg - || i.types[1].bitfield.instance == Accum); + { + *mf_cmp_p = mf_cmp_alu_cmp; + return !(i.mem_operands && i.imm_operands); + } - /* inc, dec with any register. */ + /* inc, dec without inc/dec m. */ if ((i.tm.cpu_flags.bitfield.cpuno64 && (i.tm.base_opcode | 0xf) == 0x4f) || ((i.tm.base_opcode | 1) == 0xff && i.tm.extension_opcode <= 0x1)) - return (i.types[0].bitfield.class == Reg - || i.types[0].bitfield.instance == Accum); + { + *mf_cmp_p = mf_cmp_incdec; + return !i.mem_operands; + } return 0; } @@ -8424,7 +8475,7 @@ maybe_fused_with_jcc_p (void) /* Return 1 if a FUSED_JCC_PADDING frag should be generated. */ static int -add_fused_jcc_padding_frag_p (void) +add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p) { /* NB: Don't work with COND_JUMP86 without i386. */ if (!align_branch_power @@ -8433,7 +8484,7 @@ add_fused_jcc_padding_frag_p (void) || !(align_branch & align_branch_fused_bit)) return 0; - if (maybe_fused_with_jcc_p ()) + if (maybe_fused_with_jcc_p (mf_cmp_p)) { if (last_insn.kind == last_insn_other || last_insn.seg != now_seg) @@ -8481,7 +8532,8 @@ add_branch_prefix_frag_p (void) /* Return 1 if a BRANCH_PADDING frag should be generated. */ static int -add_branch_padding_frag_p (enum align_branch_kind *branch_p) +add_branch_padding_frag_p (enum align_branch_kind *branch_p, + enum mf_jcc_kind *mf_jcc_p) { int add_padding; @@ -8503,6 +8555,9 @@ add_branch_padding_frag_p (enum align_branch_kind *branch_p) } else { + /* Because J and JN share same group in macro-fusible table, + igore the lowest bit. */ + *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1; *branch_p = align_branch_jcc; if ((align_branch & align_branch_jcc_bit)) add_padding = 1; @@ -8573,6 +8628,10 @@ output_insn (void) offsetT insn_start_off; fragS *fragP = NULL; enum align_branch_kind branch = align_branch_none; + /* The initializer is arbitrary just to avoid uninitialized error. + it's actually either assigned in add_branch_padding_frag_p + or never be used. */ + enum mf_jcc_kind mf_jcc = mf_jcc_jo; #if defined (OBJ_ELF) || defined (OBJ_MAYBE_ELF) if (IS_ELF && x86_used_note) @@ -8665,7 +8724,7 @@ output_insn (void) insn_start_frag = frag_now; insn_start_off = frag_now_fix (); - if (add_branch_padding_frag_p (&branch)) + if (add_branch_padding_frag_p (&branch, &mf_jcc)) { char *p; /* Branch can be 8 bytes. Leave some room for prefixes. */ @@ -8686,6 +8745,7 @@ output_insn (void) ENCODE_RELAX_STATE (BRANCH_PADDING, 0), NULL, 0, p); + fragP->tc_frag_data.mf_type = mf_jcc; fragP->tc_frag_data.branch_type = branch; fragP->tc_frag_data.max_bytes = max_branch_padding_size; } @@ -8705,6 +8765,7 @@ output_insn (void) unsigned char *q; unsigned int j; unsigned int prefix; + enum mf_cmp_kind mf_cmp; if (avoid_fence && (i.tm.base_opcode == 0xfaee8 @@ -8731,7 +8792,7 @@ output_insn (void) if (branch) /* Skip if this is a branch. */ ; - else if (add_fused_jcc_padding_frag_p ()) + else if (add_fused_jcc_padding_frag_p (&mf_cmp)) { /* Make room for padding. */ frag_grow (MAX_FUSED_JCC_PADDING_SIZE); @@ -8743,6 +8804,7 @@ output_insn (void) ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0), NULL, 0, p); + fragP->tc_frag_data.mf_type = mf_cmp; fragP->tc_frag_data.branch_type = align_branch_fused; fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE; } @@ -10948,6 +11010,42 @@ elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var) } #endif +/* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture + Note also work for Skylake and Cascadelake. +--------------------------------------------------------------------- +| JCC | ADD/SUB/CMP | INC/DEC | TEST/AND | +| ------ | ----------- | ------- | -------- | +| Jo | N | N | Y | +| Jno | N | N | Y | +| Jc/Jb | Y | N | Y | +| Jae/Jnb | Y | N | Y | +| Je/Jz | Y | Y | Y | +| Jne/Jnz | Y | Y | Y | +| Jna/Jbe | Y | N | Y | +| Ja/Jnbe | Y | N | Y | +| Js | N | N | Y | +| Jns | N | N | Y | +| Jp/Jpe | N | N | Y | +| Jnp/Jpo | N | N | Y | +| Jl/Jnge | Y | Y | Y | +| Jge/Jnl | Y | Y | Y | +| Jle/Jng | Y | Y | Y | +| Jg/Jnle | Y | Y | Y | +--------------------------------------------------------------------- */ +static int +i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc) +{ + if (mf_cmp == mf_cmp_alu_cmp) + return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna) + || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle); + if (mf_cmp == mf_cmp_incdec) + return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl + || mf_jcc == mf_jcc_jle); + if (mf_cmp == mf_cmp_test_and) + return 1; + return 0; +} + /* Return the next non-empty frag. */ static fragS * @@ -10967,20 +11065,23 @@ i386_next_non_empty_frag (fragS *fragP) /* Return the next jcc frag after BRANCH_PADDING. */ static fragS * -i386_next_jcc_frag (fragS *fragP) +i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP) { - if (!fragP) + fragS *branch_fragP; + if (!pad_fragP) return NULL; - if (fragP->fr_type == rs_machine_dependent - && (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) + if (pad_fragP->fr_type == rs_machine_dependent + && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype) == BRANCH_PADDING)) { - fragP = i386_next_non_empty_frag (fragP); - if (fragP->fr_type != rs_machine_dependent) + branch_fragP = i386_next_non_empty_frag (pad_fragP); + if (branch_fragP->fr_type != rs_machine_dependent) return NULL; - if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == COND_JUMP) - return fragP; + if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP + && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type, + pad_fragP->tc_frag_data.mf_type)) + return branch_fragP; } return NULL; @@ -11025,7 +11126,7 @@ i386_classify_machine_dependent_frag (fragS *fragP) */ cmp_fragP = i386_next_non_empty_frag (next_fragP); pad_fragP = i386_next_non_empty_frag (cmp_fragP); - branch_fragP = i386_next_jcc_frag (pad_fragP); + branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP); if (branch_fragP) { /* The BRANCH_PADDING frag is merged with the diff --git a/gas/config/tc-i386.h b/gas/config/tc-i386.h index 845b3901d4c..93678c22822 100644 --- a/gas/config/tc-i386.h +++ b/gas/config/tc-i386.h @@ -273,6 +273,7 @@ struct i386_tc_frag_data unsigned char prefix_length; unsigned char default_prefix; unsigned char cmp_size; + unsigned int mf_type : 3; unsigned int classified : 1; unsigned int branch_type : 3; }; @@ -299,6 +300,7 @@ struct i386_tc_frag_data (FRAGP)->tc_frag_data.cmp_size = 0; \ (FRAGP)->tc_frag_data.classified = 0; \ (FRAGP)->tc_frag_data.branch_type = 0; \ + (FRAGP)->tc_frag_data.mf_type = 0; \ } \ while (0) diff --git a/gas/testsuite/gas/i386/align-branch-9.d b/gas/testsuite/gas/i386/align-branch-9.d new file mode 100644 index 00000000000..6340817d048 --- /dev/null +++ b/gas/testsuite/gas/i386/align-branch-9.d @@ -0,0 +1,78 @@ +#as: -mbranches-within-32B-boundaries +#objdump: -dw + +.*: +file format .* + +Disassembly of section .text: + +0+ : + 0: 65 a3 01 00 00 00 mov %eax,%gs:0x1 + 6: 55 push %ebp + 7: 55 push %ebp + 8: 55 push %ebp + 9: 55 push %ebp + a: 89 e5 mov %esp,%ebp + c: 89 7d f8 mov %edi,-0x8\(%ebp\) + f: 89 75 f4 mov %esi,-0xc\(%ebp\) + 12: 89 75 f4 mov %esi,-0xc\(%ebp\) + 15: 89 75 f4 mov %esi,-0xc\(%ebp\) + 18: 89 75 f4 mov %esi,-0xc\(%ebp\) + 1b: 89 75 f4 mov %esi,-0xc\(%ebp\) + 1e: 39 c5 cmp %eax,%ebp + 20: 70 62 jo 84 + 22: 89 73 f4 mov %esi,-0xc\(%ebx\) + 25: 89 75 f4 mov %esi,-0xc\(%ebp\) + 28: 89 7d f8 mov %edi,-0x8\(%ebp\) + 2b: 89 75 f4 mov %esi,-0xc\(%ebp\) + 2e: 89 75 f4 mov %esi,-0xc\(%ebp\) + 31: 89 75 f4 mov %esi,-0xc\(%ebp\) + 34: 89 75 f4 mov %esi,-0xc\(%ebp\) + 37: 89 75 f4 mov %esi,-0xc\(%ebp\) + 3a: 5d pop %ebp + 3b: 5d pop %ebp + 3c: 5d pop %ebp + 3d: 74 45 je 84 + 3f: 5d pop %ebp + 40: 74 42 je 84 + 42: 89 44 24 fc mov %eax,-0x4\(%esp\) + 46: 89 75 f4 mov %esi,-0xc\(%ebp\) + 49: 89 7d f8 mov %edi,-0x8\(%ebp\) + 4c: 89 75 f4 mov %esi,-0xc\(%ebp\) + 4f: 89 75 f4 mov %esi,-0xc\(%ebp\) + 52: 89 75 f4 mov %esi,-0xc\(%ebp\) + 55: 89 75 f4 mov %esi,-0xc\(%ebp\) + 58: 89 75 f4 mov %esi,-0xc\(%ebp\) + 5b: 5d pop %ebp + 5c: eb 2c jmp 8a + 5e: 66 90 xchg %ax,%ax + 60: eb 28 jmp 8a + 62: eb 26 jmp 8a + 64: 89 45 fc mov %eax,-0x4\(%ebp\) + 67: 89 75 f4 mov %esi,-0xc\(%ebp\) + 6a: 89 7d f8 mov %edi,-0x8\(%ebp\) + 6d: 5d pop %ebp + 6e: 5d pop %ebp + 6f: 40 inc %eax + 70: 72 12 jb 84 + 72: 36 36 89 45 fc ss mov %eax,%ss:-0x4\(%ebp\) + 77: 89 75 f4 mov %esi,-0xc\(%ebp\) + 7a: 89 7d f8 mov %edi,-0x8\(%ebp\) + 7d: 89 75 f4 mov %esi,-0xc\(%ebp\) + 80: 21 c3 and %eax,%ebx + 82: 7c 06 jl 8a + 84: 8b 45 f4 mov -0xc\(%ebp\),%eax + 87: 89 45 fc mov %eax,-0x4\(%ebp\) + 8a: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 90: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 96: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + 9c: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + a2: 89 75 0c mov %esi,0xc\(%ebp\) + a5: e9 fc ff ff ff jmp a6 + aa: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + b0: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + b6: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + bc: 89 b5 50 fb ff ff mov %esi,-0x4b0\(%ebp\) + c2: 89 75 00 mov %esi,0x0\(%ebp\) + c5: 74 c3 je 8a + c7: 74 c1 je 8a +#pass diff --git a/gas/testsuite/gas/i386/align-branch-9.s b/gas/testsuite/gas/i386/align-branch-9.s new file mode 100644 index 00000000000..357abe30f97 --- /dev/null +++ b/gas/testsuite/gas/i386/align-branch-9.s @@ -0,0 +1,74 @@ + .text + .globl foo + .p2align 4 +foo: + movl %eax, %gs:0x1 + pushl %ebp + pushl %ebp + pushl %ebp + pushl %ebp + movl %esp, %ebp + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + cmp %eax, %ebp + jo .L_2 + movl %esi, -12(%ebx) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + popl %ebp + popl %ebp + je .L_2 + popl %ebp + je .L_2 + movl %eax, -4(%esp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + movl %esi, -12(%ebp) + popl %ebp + jmp .L_3 + jmp .L_3 + jmp .L_3 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + popl %ebp + popl %ebp + inc %eax + jc .L_2 + movl %eax, -4(%ebp) + movl %esi, -12(%ebp) + movl %edi, -8(%ebp) + movl %esi, -12(%ebp) + and %eax, %ebx + jl .L_3 +.L_2: + movl -12(%ebp), %eax + movl %eax, -4(%ebp) +.L_3: + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, 12(%ebp) + jmp bar + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, -1200(%ebp) + movl %esi, (%ebp) + je .L_3 + je .L_3 diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index 685e62ea729..8fc621f2bbd 100644 --- a/gas/testsuite/gas/i386/i386.exp +++ b/gas/testsuite/gas/i386/i386.exp @@ -525,6 +525,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_32_check]] run_dump_test "align-branch-6" run_dump_test "align-branch-7" run_dump_test "align-branch-8" + run_dump_test "align-branch-9" # These tests require support for 8 and 16 bit relocs, # so we only run them for ELF and COFF targets. @@ -1100,6 +1101,7 @@ if [expr ([istarget "i*86-*-*"] || [istarget "x86_64-*-*"]) && [gas_64_check]] t run_dump_test "x86-64-align-branch-6" run_dump_test "x86-64-align-branch-7" run_dump_test "x86-64-align-branch-8" + run_dump_test "x86-64-align-branch-9" if { ![istarget "*-*-aix*"] && ![istarget "*-*-beos*"] diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.d b/gas/testsuite/gas/i386/x86-64-align-branch-9.d new file mode 100644 index 00000000000..1041fd04833 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.d @@ -0,0 +1,46 @@ +#as: -mbranches-within-32B-boundaries +#objdump: -dw + +.*: +file format .* + +Disassembly of section .text: + +0+ : + 0: c1 e9 02 shr \$0x2,%ecx + 3: c1 e9 02 shr \$0x2,%ecx + 6: c1 e9 02 shr \$0x2,%ecx + 9: 89 d1 mov %edx,%ecx + b: 31 c0 xor %eax,%eax + d: c1 e9 02 shr \$0x2,%ecx + 10: c1 e9 02 shr \$0x2,%ecx + 13: c1 e9 02 shr \$0x2,%ecx + 16: c1 e9 02 shr \$0x2,%ecx + 19: c1 e9 02 shr \$0x2,%ecx + 1c: c1 e9 02 shr \$0x2,%ecx + 1f: 80 fa 02 cmp \$0x2,%dl + 22: 70 df jo 3 + 24: 2e 2e 2e 2e 31 c0 cs cs cs cs xor %eax,%eax + 2a: c1 e9 02 shr \$0x2,%ecx + 2d: c1 e9 02 shr \$0x2,%ecx + 30: c1 e9 02 shr \$0x2,%ecx + 33: 89 d1 mov %edx,%ecx + 35: 31 c0 xor %eax,%eax + 37: c1 e9 02 shr \$0x2,%ecx + 3a: c1 e9 02 shr \$0x2,%ecx + 3d: c1 e9 02 shr \$0x2,%ecx + 40: f6 c2 02 test \$0x2,%dl + 43: 75 e8 jne 2d + 45: 31 c0 xor %eax,%eax + 47: c1 e9 02 shr \$0x2,%ecx + 4a: c1 e9 02 shr \$0x2,%ecx + 4d: 89 d1 mov %edx,%ecx + 4f: c1 e9 02 shr \$0x2,%ecx + 52: c1 e9 02 shr \$0x2,%ecx + 55: 89 d1 mov %edx,%ecx + 57: c1 e9 02 shr \$0x2,%ecx + 5a: 89 d1 mov %edx,%ecx + 5c: 31 c0 xor %eax,%eax + 5e: ff c0 inc %eax + 60: 76 cb jbe 2d + 62: 31 c0 xor %eax,%eax +#pass diff --git a/gas/testsuite/gas/i386/x86-64-align-branch-9.s b/gas/testsuite/gas/i386/x86-64-align-branch-9.s new file mode 100644 index 00000000000..917579bda40 --- /dev/null +++ b/gas/testsuite/gas/i386/x86-64-align-branch-9.s @@ -0,0 +1,43 @@ + .text + .p2align 4,,15 +foo: + shrl $2, %ecx +.L1: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + cmpb $2, %dl + jo .L1 + xorl %eax, %eax + shrl $2, %ecx +.L2: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + shrl $2, %ecx + shrl $2, %ecx + shrl $2, %ecx + testb $2, %dl + jne .L2 + xorl %eax, %eax +.L3: + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + shrl $2, %ecx + shrl $2, %ecx + movl %edx, %ecx + shrl $2, %ecx + movl %edx, %ecx + xorl %eax, %eax + inc %eax + jbe .L2 + xorl %eax, %eax