+2018-10-19 Fredrik Noring <noring@nocrew.org>
+
+ * testsuite/gas/mips/r5900.s: Extend the R5900 short loop fix
+ test with border cases.
+ * testsuite/gas/mips/r5900.d: Add extra expected disassembly.
+ * config/tc-mips.c (can_swap_branch_p): Clarify the R5900 short
+ loop hardware bug conditions. Correct note on the R5900
+ instruction count short loop fix.
+
2018-10-16 Matthew Malcomson <matthew.malcomson@arm.com>
* testsuite/gas/aarch64/illegal-dotproduct.d: New test.
&& insn_length (history) != 4)
return FALSE;
- /* On R5900 short loops need to be fixed by inserting a nop in
- the branch delay slots.
- A short loop can be terminated too early. */
+ /* On the R5900 short loops need to be fixed by inserting a NOP in the
+ branch delay slot.
+
+ The short loop bug under certain conditions causes loops to execute
+ only once or twice. We must ensure that the assembler never
+ generates loops that satisfy all of the following conditions:
+
+ - a loop consists of less than or equal to six instructions
+ (including the branch delay slot);
+ - a loop contains only one conditional branch instruction at the end
+ of the loop;
+ - a loop does not contain any other branch or jump instructions;
+ - a branch delay slot of the loop is not NOP (EE 2.9 or later).
+
+ We need to do this because of a hardware bug in the R5900 chip. */
if (mips_opts.arch == CPU_R5900
/* Check if instruction has a parameter, ignore "j $31". */
&& (address_expr != NULL)
|| (ip->insn_opcode & 0xffff0000) == 0x04110000)) /* bgezal $0 */
{
int distance;
- /* Check if loop is shorter than 6 instructions including
- branch and delay slot. */
+ /* Check if loop is shorter than or equal to 6 instructions
+ including branch and delay slot. */
distance = frag_now_fix () - S_GET_VALUE (address_expr->X_add_symbol);
if (distance <= 20)
{
[0-9a-f]+ <[^>]*> 2403012c li \$3,300
[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
-[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop1>
+[0-9a-f]+ <[^>]*> 1460fffd bnez \$3,[0-9a-f]+ <short_loop3>
[0-9a-f]+ <[^>]*> 00000000 nop
+[0-9a-f]+ <[^>]*> 2403012c li \$3,300
+[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
+[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
+[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
+[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
+[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
+[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop6>
+[0-9a-f]+ <[^>]*> 00000000 nop
+[0-9a-f]+ <[^>]*> 2403012c li \$3,300
+[0-9a-f]+ <[^>]*> 2063ffff addi \$3,\$3,-1
+[0-9a-f]+ <[^>]*> 2084ffff addi \$4,\$4,-1
+[0-9a-f]+ <[^>]*> 20a5ffff addi \$5,\$5,-1
+[0-9a-f]+ <[^>]*> 20c6ffff addi \$6,\$6,-1
+[0-9a-f]+ <[^>]*> 20e7ffff addi \$7,\$7,-1
+[0-9a-f]+ <[^>]*> 1460fffa bnez \$3,[0-9a-f]+ <short_loop7>
+[0-9a-f]+ <[^>]*> 2108ffff addi \$8,\$8,-1
[0-9a-f]+ <[^>]*> 24040003 li \$4,3
\.\.\.
.set pop
.set push
.set reorder
- # Short loop fix.
+ # Test the short loop fix with 3 loop instructions.
li $3, 300
-short_loop1:
+short_loop3:
addi $3, -1
addi $4, -1
- # NOP should be inserted in branch delay.
- bne $3, $0, short_loop1
+ # A NOP will be inserted in the branch delay slot.
+ bne $3, $0, short_loop3
+
+ # Test the short loop fix with 6 loop instructions.
+ li $3, 300
+short_loop6:
+ addi $3, -1
+ addi $4, -1
+ addi $5, -1
+ addi $6, -1
+ addi $7, -1
+ # A NOP will be inserted in the branch delay slot.
+ bne $3, $0, short_loop6
+
+ # Test the short loop fix with 7 loop instructions.
+ li $3, 300
+short_loop7:
+ addi $3, -1
+ addi $4, -1
+ addi $5, -1
+ addi $6, -1
+ addi $7, -1
+ addi $8, -1
+ # The short loop fix does not apply for loops with
+ # more than 6 instructions.
+ bne $3, $0, short_loop7
li $4, 3
.set pop