From: Jackie Smith Cashion Date: Sat, 7 Sep 1996 12:45:19 +0000 (+0000) Subject: Sat Sep 7 13:25:55 1996 James G. Smith X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ff8716f58e1eeff58b0e997c980a36627a7a7c83;p=binutils-gdb.git Sat Sep 7 13:25:55 1996 James G. Smith * config/tc-mips.c (COUNT_TOP_ZEROES): Added macro to count leading zeroes. (load_register): Ensure hi32 bits are not lost during lo32bit processing. Fix shift offset that was overflowing into the next instruction field. Add code to generate shorter sequences for constants with a single contiguous seqeuence of ones. Fri Sep 6 18:23:54 1996 James G. Smith * gas/mips/dli.{s,d}: More test cases added. NOTE: The COUNT_TOP_ZEROES macro is a bit bulky, and the same result can be achieved by using a "standard" ffs() routine: count = ffs(~v); count = count == 0 ? 0 : 33 - count; However the following timings (VR4300 CPU clock ticks on a CMA101 board) show the performance gain. Number of ffs() for loop if/then/else conditional leading ?: zeroes ------------------------------------------------------------------------------- 0 167 179 266 251 1 1718 283 263 259 2 1670 379 287 295 3 1622 475 311 311 4 1574 571 295 287 5 1534 667 311 319 6 1478 763 307 299 7 1430 859 323 323 8 1382 962 287 295 9 1334 1051 319 311 10 1286 1154 299 307 11 1238 1250 323 331 12 1183 1346 299 307 13 1135 1442 331 323 14 1087 1546 311 319 15 1039 1642 335 343 16 991 1730 295 287 17 950 1834 311 319 18 895 1922 307 299 19 847 2026 331 323 20 799 2122 307 299 21 751 2218 323 323 22 703 2314 311 311 23 655 2417 343 335 24 599 2506 307 299 25 559 2602 331 331 26 511 2705 311 319 27 463 2801 343 335 28 407 2897 311 319 29 367 2993 343 335 30 311 3097 323 331 31 271 3185 355 355 32 215 3233 379 371 --- diff --git a/gas/config/tc-mips.c b/gas/config/tc-mips.c index 35cd23bea24..85b3f54a146 100644 --- a/gas/config/tc-mips.c +++ b/gas/config/tc-mips.c @@ -1864,6 +1864,82 @@ check_absolute_expr (ip, ex) as_warn ("Instruction %s requires absolute expression", ip->insn_mo->name); } +/* Count the leading zeroes by performing a binary chop. This is a + bulky bit of source, but performance is a LOT better for the + majority of values than a simple loop to count the bits: + for (lcnt = 0; (lcnt < 32); lcnt++) + if ((v) & (1 << (31 - lcnt))) + break; + However it is not code size friendly, and the gain will drop a bit + on certain cached systems. +*/ +#define COUNT_TOP_ZEROES(v) \ + (((v) & ~0xffff) == 0 \ + ? ((v) & ~0xff) == 0 \ + ? ((v) & ~0xf) == 0 \ + ? ((v) & ~0x3) == 0 \ + ? ((v) & ~0x1) == 0 \ + ? !(v) \ + ? 32 \ + : 31 \ + : 30 \ + : ((v) & ~0x7) == 0 \ + ? 29 \ + : 28 \ + : ((v) & ~0x3f) == 0 \ + ? ((v) & ~0x1f) == 0 \ + ? 27 \ + : 26 \ + : ((v) & ~0x7f) == 0 \ + ? 25 \ + : 24 \ + : ((v) & ~0xfff) == 0 \ + ? ((v) & ~0x3ff) == 0 \ + ? ((v) & ~0x1ff) == 0 \ + ? 23 \ + : 22 \ + : ((v) & ~0x7ff) == 0 \ + ? 21 \ + : 20 \ + : ((v) & ~0x3fff) == 0 \ + ? ((v) & ~0x1fff) == 0 \ + ? 19 \ + : 18 \ + : ((v) & ~0x7fff) == 0 \ + ? 17 \ + : 16 \ + : ((v) & ~0xffffff) == 0 \ + ? ((v) & ~0xfffff) == 0 \ + ? ((v) & ~0x3ffff) == 0 \ + ? ((v) & ~0x1ffff) == 0 \ + ? 15 \ + : 14 \ + : ((v) & ~0x7ffff) == 0 \ + ? 13 \ + : 12 \ + : ((v) & ~0x3fffff) == 0 \ + ? ((v) & ~0x1fffff) == 0 \ + ? 11 \ + : 10 \ + : ((v) & ~0x7fffff) == 0 \ + ? 9 \ + : 8 \ + : ((v) & ~0xfffffff) == 0 \ + ? ((v) & ~0x3ffffff) == 0 \ + ? ((v) & ~0x1ffffff) == 0 \ + ? 7 \ + : 6 \ + : ((v) & ~0x7ffffff) == 0 \ + ? 5 \ + : 4 \ + : ((v) & ~0x3fffffff) == 0 \ + ? ((v) & ~0x1fffffff) == 0 \ + ? 3 \ + : 2 \ + : ((v) & ~0x7fffffff) == 0 \ + ? 1 \ + : 0) + /* load_register() * This routine generates the least number of instructions neccessary to load * an absolute expression value into a register. @@ -1876,7 +1952,7 @@ load_register (counter, reg, ep, dbl) int dbl; { int shift, freg; - expressionS hi32, lo32; + expressionS hi32, lo32, tmp; if (ep->X_op != O_big) { @@ -1910,7 +1986,7 @@ load_register (counter, reg, ep, dbl) || ! ep->X_unsigned || sizeof (ep->X_add_number) > 4 || (ep->X_add_number & 0x80000000) == 0)) - || (mips_isa < 3 + || ((mips_isa < 3 || !dbl) && (ep->X_add_number &~ 0xffffffff) == 0)) { /* 32 bit values require an lui. */ @@ -1978,6 +2054,59 @@ load_register (counter, reg, ep, dbl) return; } } + + /* Check for 16bit shifted constant: */ + shift = 32; + tmp.X_add_number = hi32.X_add_number << shift | lo32.X_add_number; + /* We know that hi32 is non-zero, so start the mask on the first + bit of the hi32 value: */ + shift = 17; + do + { + if ((tmp.X_add_number & ~((offsetT)0xffff << shift)) == 0) + { + tmp.X_op = O_constant; + tmp.X_add_number >>= shift; + macro_build ((char *) NULL, counter, &tmp, "ori", "t,r,i", reg, 0, + (int) BFD_RELOC_LO16); + macro_build ((char *) NULL, counter, NULL, + (shift >= 32) ? "dsll32" : "dsll", + "d,w,<", reg, reg, (shift >= 32) ? shift - 32 : shift); + return; + } + shift++; + } while (shift <= (64 - 16)); + + freg = 0; + shift = 32; + tmp.X_add_number = hi32.X_add_number << shift | lo32.X_add_number; + while ((tmp.X_add_number & 1) == 0) + { + tmp.X_add_number >>= 1; + freg++; + } + if (((tmp.X_add_number + 1) & tmp.X_add_number) == 0) /* (power-of-2 - 1) */ + { + shift = COUNT_TOP_ZEROES((unsigned int)hi32.X_add_number); + if (shift != 0) + { + tmp.X_op = O_constant; + tmp.X_add_number = (offsetT)-1; + macro_build ((char *) NULL, counter, &tmp, "addiu", "t,r,j", reg, 0, + (int) BFD_RELOC_LO16); /* set all ones */ + if (freg != 0) + { + freg += shift; + macro_build ((char *) NULL, counter, NULL, + (freg >= 32) ? "dsll32" : "dsll", + "d,w,<", reg, reg, + (freg >= 32) ? freg - 32 : freg); + } + macro_build ((char *) NULL, counter, NULL, (shift >= 32) ? "dsrl32" : "dsrl", + "d,w,<", reg, reg, (shift >= 32) ? shift - 32 : shift); + return; + } + } load_register (counter, reg, &hi32, 0); freg = reg; } @@ -1999,7 +2128,7 @@ load_register (counter, reg, ep, dbl) macro_build ((char *) NULL, counter, &lo32, "lui", "t,u", reg, (int) BFD_RELOC_HI16); macro_build ((char *) NULL, counter, NULL, "dsrl32", "d,w,<", reg, - reg, 32); + reg, 0); return; }